WARNING! Access to this system is limited to authorised users only.
Unauthorised users may be subject to prosecution.
Unauthorised access to this system is a criminal offence under Australian law (Federal Crimes Act 1914 Part VIA)
It is a criminal offence to:
(1) Obtain access to data without authority. -Penalty 2 years imprisonment.
(2) Damage, delete, alter or insert data without authority. -Penalty 10 years imprisonment.
User activity is monitored and recorded. Anyone using this system expressly consents to such monitoring and recording.

To protect your data, the CISO officer has suggested users to enable 2FA as soon as possible.
Currently 2.7% of users enabled 2FA.

Commit c3deb19b authored by John Zhang's avatar John Zhang
Browse files

Merge branch 'version-bump-lusearch' into 'master'

Version bump lusearch

See merge request !9
parents 64b04762 6fb3fa64
Pipeline #2091 failed with stage
in 7 minutes and 13 seconds
......@@ -32,7 +32,7 @@
<target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5"
<javac srcdir="${bm-src-dir}" source="1.8"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
destdir="${bm-build-dir}"
includes="org/dacapo/luindex/*" debug="true" debuglevel="lines,vars,source"/>
......
benchmark luindex
class org.dacapo.harness.Luindex
thread-model single
jars "dacapo-luindex.jar", "lucene-core-2.4.jar", "lucene-demos-2.4.jar";
jars "dacapo-luindex.jar", "lucene-core-7.1.0-SNAPSHOT.jar", "lucene-demo-7.1.0-SNAPSHOT.jar";
size small args "${SCRATCH}/luindex/william/poetry"
output stdout digest 0xebb11b9e1c56c6ef5620b890cca24ec773301388,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
size default args "${SCRATCH}/luindex/william","${SCRATCH}/luindex/kjv"
output stdout digest 0xc90792fce1594b4b9ea1b01d593aefe801e6e58b,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
description
short "A text indexing tool",
......@@ -22,6 +20,6 @@ description
author "Lucene Project Management Committee",
license "Apache License, Version 2.0",
url "http://lucene.apache.org/",
version "2.4.1",
version "7.1.0",
repeats "Single iteration indexes two multi-file documents",
threads "Externally single-threaded. Limited internal concurrency.";
......@@ -37,13 +37,22 @@ package org.dacapo.luindex;
*/
import java.io.File;
import java.nio.file.Paths;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.index.IndexWriterConfig;
/**
* date: $Date: 2009-12-24 11:19:36 +1100 (Thu, 24 Dec 2009) $
......@@ -61,7 +70,10 @@ public class Index {
* Index all text files under a directory.
*/
public void main(final File INDEX_DIR, final String[] args) throws IOException {
IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
IndexWriterConfig IWConfig = new IndexWriterConfig();
IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
for (int arg = 0; arg < args.length; arg++) {
final File docDir = new File(args[arg]);
if (!docDir.exists() || !docDir.canRead()) {
......@@ -71,7 +83,7 @@ public class Index {
indexDocs(writer, docDir);
System.out.println("Optimizing...");
writer.optimize();
writer.forceMerge(1);
}
writer.close();
}
......@@ -102,7 +114,31 @@ public class Index {
} else {
System.out.println("adding " + file.getCanonicalPath().substring(scratchP));
try {
writer.addDocument(FileDocument.Document(file));
Document doc = new Document();
FieldType docFT = new FieldType();
docFT.setTokenized (false);
docFT.setStored (true);
docFT.setIndexOptions (IndexOptions.DOCS);
// Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", file.getPath(), docFT));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE),
docFT));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
docFT.setTokenized (true);
docFT.setStored (false);
doc.add(new Field("contents", new FileReader(file), docFT));
writer.addDocument(doc);
}
// at least on windows, some temporary files raise this exception with
// an "access denied" message
......
......@@ -17,7 +17,7 @@
<target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}:${lucene-jars}/${lucene-queryparser-jar-name}"
destdir="${bm-build-dir}"
includes="org/dacapo/lusearch/*" debug="true" debuglevel="lines,vars,source"/>
</target>
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment