Commit c3deb19b authored by John Zhang's avatar John Zhang

Merge branch 'version-bump-lusearch' into 'master'

Version bump lusearch

See merge request dacapo/anu-dev/dacapobench!9
parents 64b04762 6fb3fa64
Pipeline #2091 failed with stage
in 7 minutes and 13 seconds
......@@ -32,7 +32,7 @@
<target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5"
<javac srcdir="${bm-src-dir}" source="1.8"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
destdir="${bm-build-dir}"
includes="org/dacapo/luindex/*" debug="true" debuglevel="lines,vars,source"/>
......
benchmark luindex
class org.dacapo.harness.Luindex
thread-model single
jars "dacapo-luindex.jar", "lucene-core-2.4.jar", "lucene-demos-2.4.jar";
jars "dacapo-luindex.jar", "lucene-core-7.1.0-SNAPSHOT.jar", "lucene-demo-7.1.0-SNAPSHOT.jar";
size small args "${SCRATCH}/luindex/william/poetry"
output stdout digest 0xebb11b9e1c56c6ef5620b890cca24ec773301388,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
size default args "${SCRATCH}/luindex/william","${SCRATCH}/luindex/kjv"
output stdout digest 0xc90792fce1594b4b9ea1b01d593aefe801e6e58b,
stderr digest 0xda39a3ee5e6b4b0d3255bfef95601890afd80709,
"index/segments.gen" bytes 20,
"index/segments_2" bytes 62;
"index/segments_1" bytes 136;
description
short "A text indexing tool",
......@@ -22,6 +20,6 @@ description
author "Lucene Project Management Committee",
license "Apache License, Version 2.0",
url "http://lucene.apache.org/",
version "2.4.1",
version "7.1.0",
repeats "Single iteration indexes two multi-file documents",
threads "Externally single-threaded. Limited internal concurrency.";
......@@ -37,13 +37,22 @@ package org.dacapo.luindex;
*/
import java.io.File;
import java.nio.file.Paths;
import java.io.FileReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.Arrays;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.demo.FileDocument;
import org.apache.lucene.index.IndexWriterConfig;
/**
* date: $Date: 2009-12-24 11:19:36 +1100 (Thu, 24 Dec 2009) $
......@@ -61,7 +70,10 @@ public class Index {
* Index all text files under a directory.
*/
public void main(final File INDEX_DIR, final String[] args) throws IOException {
IndexWriter writer = new IndexWriter(INDEX_DIR, new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
IndexWriterConfig IWConfig = new IndexWriterConfig();
IWConfig.setOpenMode (IndexWriterConfig.OpenMode.CREATE);
IWConfig.setMergePolicy (new LogByteSizeMergePolicy());
IndexWriter writer = new IndexWriter(FSDirectory.open(Paths.get(INDEX_DIR.getCanonicalPath())), IWConfig);
for (int arg = 0; arg < args.length; arg++) {
final File docDir = new File(args[arg]);
if (!docDir.exists() || !docDir.canRead()) {
......@@ -71,7 +83,7 @@ public class Index {
indexDocs(writer, docDir);
System.out.println("Optimizing...");
writer.optimize();
writer.forceMerge(1);
}
writer.close();
}
......@@ -102,7 +114,31 @@ public class Index {
} else {
System.out.println("adding " + file.getCanonicalPath().substring(scratchP));
try {
writer.addDocument(FileDocument.Document(file));
Document doc = new Document();
FieldType docFT = new FieldType();
docFT.setTokenized (false);
docFT.setStored (true);
docFT.setIndexOptions (IndexOptions.DOCS);
// Add the path of the file as a field named "path". Use a field that is
// indexed (i.e. searchable), but don't tokenize the field into words.
doc.add(new Field("path", file.getPath(), docFT));
// Add the last modified date of the file a field named "modified". Use
// a field that is indexed (i.e. searchable), but don't tokenize the field
// into words.
doc.add(new Field("modified",
DateTools.timeToString(file.lastModified(), DateTools.Resolution.MINUTE),
docFT));
// Add the contents of the file to a field named "contents". Specify a Reader,
// so that the text of the file is tokenized and indexed, but not stored.
// Note that FileReader expects the file to be in the system's default encoding.
// If that's not the case searching for special characters will fail.
docFT.setTokenized (true);
docFT.setStored (false);
doc.add(new Field("contents", new FileReader(file), docFT));
writer.addDocument(doc);
}
// at least on windows, some temporary files raise this exception with
// an "access denied" message
......
......@@ -17,7 +17,7 @@
<target name="bm-build" depends="lucene">
<mkdir dir="${bm-build-dir}"/>
<javac srcdir="${bm-src-dir}" source="1.5"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}"
classpath="${harness-classpath}:${lucene-jars}/${lucene-core-jar-name}:${lucene-jars}/${lucene-demos-jar-name}:${lucene-jars}/${lucene-queryparser-jar-name}"
destdir="${bm-build-dir}"
includes="org/dacapo/lusearch/*" debug="true" debuglevel="lines,vars,source"/>
</target>
......
This diff is collapsed.
......@@ -26,18 +26,22 @@ import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.nio.file.Paths;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.FilterIndexReader;
//import org.apache.lucene.index.FilterIndexReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocCollector;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.search.TopDocsCollector;
import org.apache.lucene.search.TopScoreDocCollector;
/**
* Simple command-line based search demo.
......@@ -57,7 +61,7 @@ public class Search {
* memory. If all of the fields contain only a single token, then the norms
* are all identical, then single norm vector may be shared.
*/
private static class OneNormsReader extends FilterIndexReader {
/*private static class OneNormsReader extends FilterIndexReader {
private String field;
public OneNormsReader(IndexReader in, String field) {
......@@ -68,14 +72,15 @@ public class Search {
public byte[] norms(String field) throws IOException {
return in.norms(this.field);
}
}
}*/
public Search() {
}
/** Simple command-line based search demo. */
public void main(String[] args) throws Exception {
String usage = "Usage:\tjava org.dacapo.lusearch.Search [-index dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
String usage = "Usage:\tjava org.dacapo.lusearch.Search [-ind" +
"ex dir] [-field f] [-repeat n] [-queries file] [-raw] [-norms field] [-paging hitsPerPage]";
usage += "\n\tSpecify 'false' for hitsPerPage to use streaming instead of paging search.";
if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) {
System.out.println(usage);
......@@ -192,8 +197,8 @@ public class Search {
int hitsPerPage;
boolean raw;
IndexReader reader;
Searcher searcher;
DirectoryReader reader;
IndexSearcher searcher;
BufferedReader in;
PrintWriter out;
......@@ -204,9 +209,9 @@ public class Search {
this.raw = raw;
this.hitsPerPage = hitsPerPage;
try {
reader = IndexReader.open(index);
if (normsField != null)
reader = new OneNormsReader(reader, normsField);
reader = DirectoryReader.open(FSDirectory.open(Paths.get(index)));
/*if (normsField != null)
reader = new OneNormsReader(reader, normsField);*/
searcher = new IndexSearcher(reader);
String query = queryBase + (id < 10 ? "00" : (id < 100 ? "0" : "")) + id + ".txt";
......@@ -238,7 +243,7 @@ public class Search {
} catch (Exception e) {
e.printStackTrace();
}
searcher.search(query, null, 10);
searcher.search(query, 10);
doPagingSearch(query);
}
......@@ -269,7 +274,7 @@ public class Search {
public void doPagingSearch(Query query) throws IOException {
// Collect enough docs to show 5 pages
TopDocCollector collector = new TopDocCollector(MAX_DOCS_TO_COLLECT);
TopDocsCollector<ScoreDoc> collector = TopScoreDocCollector.create(MAX_DOCS_TO_COLLECT);
searcher.search(query, collector);
ScoreDoc[] hits = collector.topDocs().scoreDocs;
......
......@@ -435,7 +435,10 @@ public abstract class Benchmark {
int refLines = config.getLines(size, file);
int lines;
try {
lines = lineCount(new File(scratch, file));
File tempFile = new File(scratch, file);
if (!tempFile.exists())
throw new FileNotFoundException();
lines = lineCount(tempFile);
} catch (FileNotFoundException e) {
System.err.println("File not found, " + file);
lines = -1;
......@@ -461,7 +464,10 @@ public abstract class Benchmark {
long refBytes = config.getBytes(size, file);
long bytes;
try {
bytes = byteCount(new File(scratch, file));
File genSeg = new File(scratch, file);
if (!genSeg.exists())
throw new FileNotFoundException();
bytes = byteCount(genSeg);
} catch (FileNotFoundException e) {
System.err.println("File not found, " + file);
bytes = -1;
......
......@@ -92,8 +92,9 @@
<!-- lucene -->
<property name="lucene-jars" value="${lib-base}/lucene/dist/jar"/>
<property name="lucene-core-jar-name" value="lucene-core-2.4.jar"/>
<property name="lucene-demos-jar-name" value="lucene-demos-2.4.jar"/>
<property name="lucene-core-jar-name" value="lucene-core-7.1.0-SNAPSHOT.jar"/>
<property name="lucene-demos-jar-name" value="lucene-demo-7.1.0-SNAPSHOT.jar"/>
<property name="lucene-queryparser-jar-name" value="lucene-queryparser-7.1.0-SNAPSHOT.jar"/>
<target name="lucene">
<ant antfile="${lib-base}/lucene/build.xml"/>
</target>
......
......@@ -10,11 +10,11 @@
<description>lucene library, required by lusearch and luindex</description>
<property file="ant/dacapo.properties"/>
<property name="lib-name" value="lucene"/>
<property name="lib-major-version" value="2.4"/>
<property name="lib-version" value="${lib-major-version}.1"/>
<property name="lib-url" value="${apache.dl.url}/lucene/java"/>
<property name="lib-src" value="lucene-${lib-version}-src.tar.gz"/>
<property name="lib-name" value="lucene"/>
<property name="lib-major-version" value="7.1"/>
<property name="lib-version" value="${lib-major-version}.0"/>
<property name="lib-url" value="${apache.dl.url}/lucene/java/${lib-version}"/>
<property name="lib-src" value="lucene-${lib-version}-src.tgz"/>
<import file="../common.xml"/>
......@@ -22,13 +22,28 @@
<target name="unpack" depends="untar"/>
<target name="build">
<ant antfile="build.xml" target="jar-core" dir="${lib-build-top}" inheritall="false"/>
<ant antfile="build.xml" target="jar-demo" dir="${lib-build-top}" inheritall="false"/>
</target>
<target name="build">
<ant antfile="common-build.xml" target="ivy-bootstrap" dir="${lib-build-top}" inheritall="false"/>
<typedef uri="antlib:org.apache.ivy.ant" resource="org/apache/ivy/ant/antlib.xml" classpath="${user.home}/.ant/lib/ivy-2.3.0.jar"/>
<ant antfile="build.xml" target="jar-core" dir="${lib-build-top}" inheritall="false"/>
<ant antfile="build.xml" target="default" dir="${lib-build-top}/demo/" inheritall="false"/>
</target>
<target name="jar">
<copy file="${lib-build-top}/build/lucene-core-${lib-major-version}.jar" todir="${lib-jars}"/>
<copy file="${lib-build-top}/build/lucene-demos-${lib-major-version}.jar" todir="${lib-jars}"/>
</target>
<target name="jar">
<copy todir="${lib-jars}">
<fileset dir="${lib-build-top}/build/core">
<include name="lucene-core-*.jar"/>
</fileset>
</copy>
<copy todir="${lib-jars}">
<fileset dir="${lib-build-top}/build/demo">
<include name="lucene-demo-*.jar"/>
</fileset>
</copy>
<copy todir="${lib-jars}">
<fileset dir="${lib-build-top}/build/queryparser/">
<include name="lucene-queryparser-*.jar"/>
</fileset>
</copy>
</target>
</project>
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment