WARNING! Access to this system is limited to authorised users only.
Unauthorised users may be subject to prosecution.
Unauthorised access to this system is a criminal offence under Australian law (Federal Crimes Act 1914 Part VIA)
It is a criminal offence to:
(1) Obtain access to data without authority. -Penalty 2 years imprisonment.
(2) Damage, delete, alter or insert data without authority. -Penalty 10 years imprisonment.
User activity is monitored and recorded. Anyone using this system expressly consents to such monitoring and recording.

To protect your data, the CISO officer has suggested users to enable 2FA as soon as possible.
Currently 2.7% of users enabled 2FA.

Commit c3deb19b authored by John Zhang's avatar John Zhang
Browse files

Merge branch 'version-bump-lusearch' into 'master'

Version bump lusearch

See merge request !9
parents 64b04762 6fb3fa64
Pipeline #2091 failed with stage
in 7 minutes and 13 seconds
...@@ -32,7 +32,7 @@ ...@@ -32,7 +32,7 @@
<target name="bm-build" depends="lucene"> <target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/> <mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5" <javac srcdir="${bm-src-dir}" source="1.8"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}" classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
destdir="${bm-build-dir}" destdir="${bm-build-dir}"
includes="org/dacapo/luindex/*" debug="true" debuglevel="lines,vars,source"/> includes="org/dacapo/luindex/*" debug="true" debuglevel="lines,vars,source"/>
......
benchmark luindex benchmark luindex
class org.dacapo.harness.Luindex class org.dacapo.harness.Luindex
thread-model single thread-model single
jars "dacapo-luindex.jar", "lucene-core-2.4.jar", "lucene-demos-2.4.jar"; jars "dacapo-luindex.jar", "lucene-core-7.1.0-SNAPSHOT.jar", "lucene-demo-7.1.0-SNAPSHOT.jar";
size small args "${SCRATCH}/luindex/william/poetry" size small args "${SCRATCH}/luindex/william/poetry"
output stdout digest 0xebb11b9e1c56c6ef5620b890cca24ec773301388, output stdout digest 0xebb11b9e1c56c6ef5620b890cca24ec773301388,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709, stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20, "index/segments_1" bytes 136;
"index/segments_2" bytes 62;
size default args "${SCRATCH}/luindex/william","${SCRATCH}/luindex/kjv" size default args "${SCRATCH}/luindex/william","${SCRATCH}/luindex/kjv"
output stdout digest 0xc90792fce1594b4b9ea1b01d593aefe801e6e58b, output stdout digest 0xc90792fce1594b4b9ea1b01d593aefe801e6e58b,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709, stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20, "index/segments_1" bytes 136;
"index/segments_2" bytes 62;
description description
short "A text indexing tool", short "A text indexing tool",
...@@ -22,6 +20,6 @@ description ...@@ -22,6 +20,6 @@ description
author "Lucene Project Management Committee", author "Lucene Project Management Committee",
license "Apache License, Version 2.0", license "Apache License, Version 2.0",
url "http://lucene.apache.org/", url "http://lucene.apache.org/",
version "2.4.1", version "7.1.0",
repeats "Single iteration indexes two multi-file documents", repeats "Single iteration indexes two multi-file documents",
threads "Externally single-threaded. Limited internal concurrency."; threads "Externally single-threaded. Limited internal concurrency.";
...@@ -37,13 +37,22 @@ package org.dacapo.luindex; ...@@ -37,13 +37,22 @@ package org.dacapo.luindex;
*/ */
import java.io.File; import java.io.File;
import java.nio.file.Paths;
import java.io.FileReader;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.util.Arrays; import java.util.Arrays;
import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.demo.FileDocument; import org.apache.lucene.index.IndexWriterConfig;
/** /**
* date: $Date: 2009-12-24 11:19:36 +1100 (Thu, 24 Dec 2009) $ * date: $Date: 2009-12-24 11:19:36 +1100 (Thu, 24 Dec 2009) $
...@@ -61,7 +70,10 @@ public class Index { ...@@ -61,7 +70,10 @@ public class Index {
* Index all text files under a directory. * Index all text files under a directory.
*/ */
public void main(final File INDEX_DIR, final String[] args) throws IOException { public void main(final File INDEX_DIR, final String[] args) throws IOException {
IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); IndexWriterConfig IWConfig = new IndexWriterConfig();
IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
for (int arg = 0; arg < args.length; arg++) { for (int arg = 0; arg < args.length; arg++) {
final File docDir = new File(args[arg]); final File docDir = new File(args[arg]);
if (!docDir.exists() || !docDir.canRead()) { if (!docDir.exists() || !docDir.canRead()) {
...@@ -71,7 +83,7 @@ public class Index { ...@@ -71,7 +83,7 @@ public class Index {
indexDocs(writer, docDir); indexDocs(writer, docDir);
System.out.println("Optimizing..."); System.out.println("Optimizing...");
writer.optimize(); writer.forceMerge(1);
} }
writer.close(); writer.close();
} }
...@@ -102,7 +114,31 @@ public class Index { ...@@ -102,7 +114,31 @@ public class Index {
} else { } else {
System.out.println("adding " + file.getCanonicalPath().substring(scratchP)); System.out.println("adding " + file.getCanonicalPath().substring(scratchP));
try { try {
writer.addDocument(FileDocument.Document(file)); Document doc = new Document();
FieldType docFT = new FieldType();
docFT.setTokenized (false);
docFT.setStored (true);
docFT.setIndexOptions (IndexOptions.DOCS);
// Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", file.getPath(), docFT));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE),
docFT));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
docFT.setTokenized (true);
docFT.setStored (false);
doc.add(new Field("contents", new FileReader(file), docFT));
writer.addDocument(doc);
} }
// at least on windows, some temporary files raise this exception with // at least on windows, some temporary files raise this exception with
// an "access denied" message // an "access denied" message
......
...@@ -17,7 +17,7 @@ ...@@ -17,7 +17,7 @@
<target name="bm-build" depends="lucene"> <target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/> <mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5" <javac srcdir="${bm-src-dir}" source="1.5"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}" classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}:${lucene-jars}/${lucene-queryparser-jar-name}"
destdir="${bm-build-dir}" destdir="${bm-build-dir}"
includes="org/dacapo/lusearch/*" debug="true" debuglevel="lines,vars,source"/> includes="org/dacapo/lusearch/*" debug="true" debuglevel="lines,vars,source"/>
</target> </target>
......
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment