GitLab will be upgraded to the 12.10.14-ce.0 on 28 Sept 2020 at 2.00pm (AEDT) to 2.30pm (AEDT). During the update, GitLab and Mattermost services will not be available. If you have any concerns with this, please talk to us at N110 (b) CSIT building.

...
  View open merge request
Commits (6)
......@@ -32,7 +32,7 @@
<target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5"
<javac srcdir="${bm-src-dir}" source="1.8"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
destdir="${bm-build-dir}"
includes="org/dacapo/luindex/*" debug="true" debuglevel="lines,vars,source"/>
......
benchmark luindex
class org.dacapo.harness.Luindex
thread-model single
jars "dacapo-luindex.jar", "lucene-core-2.4.jar", "lucene-demos-2.4.jar";
jars "dacapo-luindex.jar", "lucene-core-7.1.0-SNAPSHOT.jar", "lucene-demo-7.1.0-SNAPSHOT.jar";
size small args "${SCRATCH}/luindex/william/poetry"
output stdout digest 0xebb11b9e1c56c6ef5620b890cca24ec773301388,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
size default args "${SCRATCH}/luindex/william","${SCRATCH}/luindex/kjv"
output stdout digest 0xc90792fce1594b4b9ea1b01d593aefe801e6e58b,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
description
short "A text indexing tool",
......@@ -22,6 +20,6 @@ description
author "Lucene Project Management Committee",
license "Apache License, Version 2.0",
url "http://lucene.apache.org/",
version "2.4.1",
version "7.1.0",
repeats "Single iteration indexes two multi-file documents",
threads "Externally single-threaded. Limited internal concurrency.";
......@@ -37,13 +37,22 @@ package org.dacapo.luindex;
*/
import java.io.File;
import java.nio.file.Paths;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.index.IndexWriterConfig;
/**
* @date $Date: 2009-12-24 11:19:36 +1100 (Thu, 24 Dec 2009) $
......@@ -61,7 +70,10 @@ public class Index {
* Index all text files under a directory.
*/
public void main(final File INDEX_DIR, final String[] args) throws IOException {
IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
IndexWriterConfig IWConfig = new IndexWriterConfig();
IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
for (int arg = 0; arg < args.length; arg++) {
final File docDir = new File(args[arg]);
if (!docDir.exists() || !docDir.canRead()) {
......@@ -71,7 +83,7 @@ public class Index {
indexDocs(writer, docDir);
System.out.println("Optimizing...");
writer.optimize();
writer.forceMerge(1);
}
writer.close();
}
......@@ -102,7 +114,31 @@ public class Index {
} else {
System.out.println("adding " + file.getCanonicalPath().substring(scratchP));
try {
writer.addDocument(FileDocument.Document(file));
Document doc = new Document();
FieldType docFT = new FieldType();
docFT.setTokenized (false);
docFT.setStored (true);
docFT.setIndexOptions (IndexOptions.DOCS);
// Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", file.getPath(), docFT));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE),
docFT));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
docFT.setTokenized (true);
docFT.setStored (false);
doc.add(new Field("contents", new FileReader(file), docFT));
writer.addDocument(doc);
}
// at least on windows, some temporary files raise this exception with
// an "access denied" message
......
......@@ -436,7 +436,10 @@ public abstract class Benchmark {
int refLines = config.getLines(size, file);
int lines;
try {
lines = lineCount(new File(scratch, file));
File tempFile = new File(scratch, file);
if (!tempFile.exists())
throw new FileNotFoundException();
lines = lineCount(tempFile);
} catch (FileNotFoundException e) {
System.err.println("File not found, " + file);
lines = -1;
......@@ -462,7 +465,10 @@ public abstract class Benchmark {
long refBytes = config.getBytes(size, file);
long bytes;
try {
bytes = byteCount(new File(scratch, file));
File genSeg = new File(scratch, file);
if (!genSeg.exists())
throw new FileNotFoundException();
bytes = byteCount(genSeg);
} catch (FileNotFoundException e) {
System.err.println("File not found, " + file);
bytes = -1;
......
......@@ -92,8 +92,8 @@
<!-- lucene -->
<property name="lucene-jars" value="${lib-base}/lucene/dist/jar"/>
<property name="lucene-core-jar-name" value="lucene-core-2.4.jar"/>
<property name="lucene-demos-jar-name" value="lucene-demos-2.4.jar"/>
<property name="lucene-core-jar-name" value="lucene-core-7.1.0-SNAPSHOT.jar"/>
<property name="lucene-demos-jar-name" value="lucene-demo-7.1.0-SNAPSHOT.jar"/>
<target name="lucene">
<ant antfile="${lib-base}/lucene/build.xml"/>
</target>
......
......@@ -10,11 +10,11 @@
<description>lucene library, required by lusearch and luindex</description>
<property file="ant/dacapo.properties"/>
<property name="lib-name" value="lucene"/>
<property name="lib-major-version" value="2.4"/>
<property name="lib-version" value="${lib-major-version}.1"/>
<property name="lib-url" value="${apache.dl.url}/lucene/java"/>
<property name="lib-src" value="lucene-${lib-version}-src.tar.gz"/>
<property name="lib-name" value="lucene"/>
<property name="lib-major-version" value="7.1"/>
<property name="lib-version" value="${lib-major-version}.0"/>
<property name="lib-url" value="${apache.dl.url}/lucene/java/${lib-version}"/>
<property name="lib-src" value="lucene-${lib-version}-src.tgz"/>
<import file="../common.xml"/>
......@@ -22,13 +22,22 @@
<target name="unpack" depends="untar"/>
<target name="build">
<ant antfile="build.xml" target="jar-core" dir="${lib-build-top}" inheritall="false"/>
<ant antfile="build.xml" target="jar-demo" dir="${lib-build-top}" inheritall="false"/>
</target>
<target name="build">
<ant antfile="common-build.xml" target="ivy-bootstrap" dir="${lib-build-top}" inheritall="false"/>
<ant antfile="build.xml" target="jar-core" dir="${lib-build-top}" inheritall="false"/>
<ant antfile="build.xml" target="default" dir="${lib-build-top}/demo/" inheritall="false"/>
</target>
<target name="jar">
<copy file="${lib-build-top}/build/lucene-core-${lib-major-version}.jar" todir="${lib-jars}"/>
<copy file="${lib-build-top}/build/lucene-demos-${lib-major-version}.jar" todir="${lib-jars}"/>
</target>
<target name="jar">
<copy todir="${lib-jars}">
<fileset dir="${lib-build-top}/build/core">
<include name="lucene-core-*.jar"/>
</fileset>
</copy>
<copy todir="${lib-jars}">
<fileset dir="${lib-build-top}/build/demo">
<include name="lucene-demo-*.jar"/>
</fileset>
</copy>
</target>
</project>