changeset 545:ddcd4296107a

clean up lucene search
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 14 Jun 2015 01:34:42 -0600
parents c5a93767cc5c
children eaef1005ab87
files lucene/src/luan/modules/lucene/Ab_testing.luan lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSearcher.java lucene/src/luan/modules/lucene/LuceneWriter.java lucene/src/luan/modules/lucene/Web_search.luan
diffstat 6 files changed, 171 insertions(+), 222 deletions(-) [+]
line wrap: on
line diff
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/Ab_testing.luan
--- a/lucene/src/luan/modules/lucene/Ab_testing.luan	Fri Jun 12 19:11:44 2015 -0600
+++ b/lucene/src/luan/modules/lucene/Ab_testing.luan	Sun Jun 14 01:34:42 2015 -0600
@@ -41,37 +41,36 @@
 
 		-- returns map of event name to (map of value to result) and "start_date"
 		function test.results()
-			return index.Searcher( function(searcher)
-				local results = {}
-				for name in pairs(test.aggregator_factories) do
-					results[name] = {}
+			local results = {}
+			for name in pairs(test.aggregator_factories) do
+				results[name] = {}
+			end
+			local date_field = test.date_field
+			local start_date = nil
+			for _, value in ipairs(test.values) do
+				local aggregators = {}
+				for name, factory in pairs(test.aggregator_factories) do
+					aggregators[name] = factory()
 				end
-				local date_field = test.date_field
-				local start_date = nil
-				for _, value in ipairs(test.values) do
-					local aggregators = {}
-					for name, factory in pairs(test.aggregator_factories) do
-						aggregators[name] = factory()
+				local query = field..":"..value
+				index.advanced_search(query, function(_,doc_fn)
+					local doc = doc_fn()
+					for _, aggregator in pairs(aggregators) do
+						aggregator.aggregate(doc)
 					end
-					local query = field..":"..value
-					searcher.search(query, function(doc)
-						for _, aggregator in pairs(aggregators) do
-							aggregator.aggregate(doc)
+					if date_field ~= nil then
+						local date = doc[date_field]
+						if date ~= nil and (start_date==nil or start_date > date) then
+							start_date = date
 						end
-						if date_field ~= nil then
-							local date = doc[date_field]
-							if date ~= nil and (start_date==nil or start_date > date) then
-								start_date = date
-							end
-						end
-					end)
-					for name, aggregator in pairs(aggregators) do
-						results[name][value] = aggregator.result
 					end
+				end)
+				for name, aggregator in pairs(aggregators) do
+					results[name][value] = aggregator.result
 				end
-				results.start_date = start_date
-				return results
-			end )
+			end
+			results.start_date = start_date
+			return results
 		end
 
 		function test.fancy_results()
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/Lucene.luan
--- a/lucene/src/luan/modules/lucene/Lucene.luan	Fri Jun 12 19:11:44 2015 -0600
+++ b/lucene/src/luan/modules/lucene/Lucene.luan	Sun Jun 14 01:34:42 2015 -0600
@@ -16,47 +16,51 @@
 
 function M.index(indexDir)
 	local index = {}
-	local java_index = LuceneIndex.new(indexDir)
+	local java_index = LuceneIndex.new(indexDir,index)
 	index.indexed_fields = java_index.indexedFieldsMeta.newTable()
 	index.to_string = java_index.to_string
 	index.backup = java_index.backup
-	index.Writer = java_index.Writer
-	index.Searcher = java_index.Searcher
+	index.writer = java_index.writer
+	index.advanced_search = java_index.advanced_search
+	index.search_in_transaction = java_index.search_in_transaction
 	index.delete_all = java_index.delete_all
 	index.close = java_index.close
 
 	function index.save_document(doc)
-		index.Writer( function(writer)
+		index.writer( function(writer)
 			writer.save_document(doc)
 		end )
 	end
 
 	function index.delete_documents(terms)
-		index.Writer( function(writer)
+		index.writer( function(writer)
 			writer.delete_documents(terms)
 		end )
 	end
 
-	function index.get_first(query, sort)
-		return index.Searcher( function(searcher)
-			local results, _, total_hits = searcher.search(query,1,sort)
-			return results(), total_hits
-		end )
+	function index.search(query, from, to, sort)
+		local results = {}
+		local function fn(i,doc_fn)
+			if i >= from then
+				results[#results+1] = doc_fn()
+			end
+		end
+		local total_hits = index.advanced_search(query,fn,to,sort)
+		return results, total_hits
 	end
 
 	function index.get_document(query)
-		local doc, total_hits = index.get_first(query);
-		if total_hits > 1 then
-			error( "found " .. total_hits .. " documents" )
+		local doc
+		local function fn(_,doc_fn)
+			doc = doc_fn()
 		end
+		local total_hits = index.advanced_search(query,fn,1)
+		total_hits <= 1 or error( "found " .. total_hits .. " documents" )
 		return doc
 	end
 
 	function index.count(query)
-		return index.Searcher( function(searcher)
-			local _, _, total_hits = searcher.search(query,0)
-			return total_hits
-		end )
+		return index.advanced_search(query)
 	end
 
 	return index
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/LuceneIndex.java
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java	Fri Jun 12 19:11:44 2015 -0600
+++ b/lucene/src/luan/modules/lucene/LuceneIndex.java	Sun Jun 14 01:34:42 2015 -0600
@@ -18,6 +18,7 @@
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.SnapshotDeletionPolicy;
+import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
@@ -27,6 +28,10 @@
 import org.apache.lucene.search.Sort;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.search.ScoreDoc;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
 import sane.lucene.queryparser.SaneQueryParser;
 import sane.lucene.queryparser.FieldParser;
 import sane.lucene.queryparser.MultiFieldParser;
@@ -41,6 +46,7 @@
 import luan.LuanJavaFunction;
 import luan.LuanException;
 import luan.LuanMeta;
+import luan.LuanRuntimeException;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -52,16 +58,19 @@
 	private static final Analyzer analyzer = new KeywordAnalyzer();
 	public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer);
 
+	final LuanTable myTable;
 	final Lock writeLock = new ReentrantLock();
 	private final File indexDir;
 	final SnapshotDeletionPolicy snapshotDeletionPolicy;
 	final IndexWriter writer;
 	private DirectoryReader reader;
-	private LuceneSearcher searcher;
+	private IndexSearcher searcher;
+	private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
 	private boolean isClosed = false;
 	private final MultiFieldParser mfp = new MultiFieldParser();
 
-	public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException {
+	public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException {
+		this.myTable = myTable;
 		mfp.fields.put( "type", STRING_FIELD_PARSER );
 		mfp.fields.put( "id", NumberFieldParser.LONG );
 		File indexDir = new File(indexDirStr);
@@ -75,7 +84,7 @@
 		writer.commit();  // commit index creation
 		reader = DirectoryReader.open(dir);
 		luan.onClose(this);
-		searcher = new LuceneSearcher(this,reader);
+		searcher = new IndexSearcher(reader);
 		initId(luan);
 	}
 
@@ -87,17 +96,22 @@
 		return new LuceneWriter(this);
 	}
 
-	synchronized LuceneSearcher openSearcher() throws IOException {
+	private synchronized IndexSearcher openSearcher() throws IOException {
 		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
 		if( newReader != null ) {
 			reader.decRef();
 			reader = newReader;
-			searcher = new LuceneSearcher(this,reader);
+			searcher = new IndexSearcher(reader);
 		}
 		reader.incRef();
 		return searcher;
 	}
 
+	// call in finally block
+	private static void close(IndexSearcher searcher) throws IOException {
+		searcher.getIndexReader().decRef();
+	}
+
 	LuceneSnapshot openSnapshot() throws IOException {
 		return new LuceneSnapshot(this);
 	}
@@ -119,7 +133,6 @@
 	private final int idBatch = 10;
 
 	private void initId(LuanState luan) throws LuanException, IOException {
-		IndexSearcher searcher = this.searcher.searcher;
 		TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1);
 		switch(td.totalHits) {
 		case 0:
@@ -172,7 +185,7 @@
 		return writer.getDirectory().toString();
 	}
 
-	public void Writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
+	public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
 		LuceneWriter writer = openWriter();
 		try {
 			luan.call( fn, new Object[]{writer.table()} );
@@ -182,15 +195,6 @@
 		}
 	}
 
-	public Object Searcher(LuanState luan,LuanFunction fn) throws LuanException, IOException {
-		LuceneSearcher searcher = openSearcher();
-		try {
-			return luan.call( fn, new Object[]{searcher.table()} );
-		} finally {
-			searcher.close();
-		}
-	}
-
 	public void close() throws IOException {
 		if( !isClosed ) {
 			writer.close();
@@ -209,6 +213,100 @@
 
 
 
+	private static class DocFn extends LuanFunction {
+		final IndexSearcher searcher;
+		int docID;
+
+		DocFn(IndexSearcher searcher) {
+			this.searcher = searcher;
+		}
+
+		@Override public Object call(LuanState luan,Object[] args) throws LuanException {
+			try {
+				return LuceneDocument.toTable(luan,searcher.doc(docID));
+			} catch(IOException e) {
+				throw luan.exception(e);
+			}
+		}
+	}
+
+	private static abstract class MyCollector extends Collector {
+		int docBase;
+		int i = 0;
+
+		@Override public void setScorer(Scorer scorer) {}
+		@Override public void setNextReader(AtomicReaderContext context) {
+			this.docBase = context.docBase;
+		}
+		@Override public boolean acceptsDocsOutOfOrder() {
+			return true;
+		}
+	}
+
+	public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException {
+		Utils.checkNotNull(luan,queryStr);
+		Query query = parseQuery(queryStr);
+		IndexSearcher searcher = threadLocalSearcher.get();
+		boolean inTransaction = searcher != null;
+		if( !inTransaction )
+			searcher = openSearcher();
+		try {
+			if( fn!=null && n==null ) {
+				if( sortStr != null )
+					throw luan.exception("sort must be nil when n is nil");
+				final DocFn docFn = new DocFn(searcher);
+				MyCollector col = new MyCollector() {
+					@Override public void collect(int doc) {
+						try {
+							docFn.docID = doc;
+							luan.call(fn,new Object[]{++i,docFn});
+						} catch(LuanException e) {
+							throw new LuanRuntimeException(e);
+						}
+					}
+				};
+				try {
+					searcher.search(query,col);
+				} catch(LuanRuntimeException e) {
+					throw (LuanException)e.getCause();
+				}
+				return col.i;
+			}
+			if( fn==null || n==0 ) {
+				TotalHitCountCollector thcc = new TotalHitCountCollector();
+				searcher.search(query,thcc);
+				return thcc.getTotalHits();
+			}
+			Sort sort = sortStr==null ? null : parseSort(sortStr);
+			TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort);
+			final ScoreDoc[] scoreDocs = td.scoreDocs;
+			DocFn docFn = new DocFn(searcher);
+			for( int i=0; i<scoreDocs.length; i++ ) {
+				docFn.docID = scoreDocs[i].doc;
+				luan.call(fn,new Object[]{i+1,docFn});
+			}
+			return td.totalHits;
+		} finally {
+			if( !inTransaction )
+				close(searcher);
+		}
+	}
+
+	public Object search_in_transaction(LuanState luan,LuanFunction fn) throws LuanException, IOException {
+		if( threadLocalSearcher.get() != null )
+			throw luan.exception("can't nest search_in_transaction calls");
+		IndexSearcher searcher = openSearcher();
+		threadLocalSearcher.set(searcher);
+		try {
+			return luan.call(fn);
+		} finally {
+			threadLocalSearcher.set(null);
+			close(searcher);
+		}
+	}
+
+
+
 	public final LuanMeta indexedFieldsMeta = new LuanMeta() {
 
 		@Override public boolean canNewindex() {
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/LuceneSearcher.java
--- a/lucene/src/luan/modules/lucene/LuceneSearcher.java	Fri Jun 12 19:11:44 2015 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,151 +0,0 @@
-package luan.modules.lucene;
-
-import java.io.IOException;
-import java.util.NoSuchElementException;
-import java.util.Map;
-import java.util.List;
-import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.TopFieldDocs;
-import org.apache.lucene.search.Sort;
-import org.apache.lucene.search.SortField;
-import org.apache.lucene.search.ScoreDoc;
-import org.apache.lucene.search.TermQuery;
-import org.apache.lucene.search.BooleanQuery;
-import org.apache.lucene.search.BooleanClause;
-import org.apache.lucene.search.Collector;
-import org.apache.lucene.search.TotalHitCountCollector;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.MatchAllDocsQuery;
-import org.apache.lucene.index.AtomicReaderContext;
-import sane.lucene.queryparser.ParseException;
-import luan.Luan;
-import luan.LuanState;
-import luan.LuanTable;
-import luan.LuanFunction;
-import luan.LuanJavaFunction;
-import luan.LuanException;
-import luan.LuanRuntimeException;
-import luan.LuanMethod;
-import luan.modules.Utils;
-
-
-public final class LuceneSearcher {
-	private final LuceneIndex index;
-	final IndexSearcher searcher;
-
-	LuceneSearcher(LuceneIndex index,IndexReader reader) {
-		this.index = index;
-		this.searcher = new IndexSearcher(reader);
-	}
-
-	// call in finally block
-	void close() throws IOException {
-		searcher.getIndexReader().decRef();
-	}
-
-	private LuanTable doc(LuanState luan,int docID) throws LuanException, IOException {
-		return LuceneDocument.toTable(luan,searcher.doc(docID));
-	}
-/*
-	TopDocs search(Query query,int n) throws IOException {
-		return searcher.search(query,n);
-	}
-
-	TopFieldDocs search(Query query,int n,Sort sort) throws IOException {
-		return searcher.search(query,n,sort);
-	}
-*/
-	// luan
-
-	private static final LuanFunction nothingFn = new LuanFunction() {
-		@Override public Object call(LuanState luan,Object[] args) {
-			return LuanFunction.NOTHING;
-		}
-	};
-
-	private static abstract class MyCollector extends Collector {
-		int docBase;
-
-		@Override public void setScorer(Scorer scorer) {}
-		@Override public void setNextReader(AtomicReaderContext context) {
-			this.docBase = context.docBase;
-		}
-		@Override public boolean acceptsDocsOutOfOrder() {
-			return true;
-		}
-	}
-
-	@LuanMethod public Object[] search( final LuanState luan, String queryStr, Object nObj, String sortStr ) throws LuanException, IOException, ParseException {
-		Utils.checkNotNull(luan,queryStr);
-		Query query = index.parseQuery(queryStr);
-		if( nObj instanceof LuanFunction ) {
-			final LuanFunction fn = (LuanFunction)nObj;
-			Collector col = new MyCollector() {
-				@Override public void collect(int doc) {
-					try {
-						try {
-							LuanTable docTbl = doc(luan,docBase+doc);
-							luan.call(fn,new Object[]{docTbl});
-						} catch(IOException e) {
-							throw luan.exception(e);
-						}
-					} catch(LuanException e) {
-						throw new LuanRuntimeException(e);
-					}
-				}
-			};
-			try {
-				searcher.search(query,col);
-			} catch(LuanRuntimeException e) {
-				throw (LuanException)e.getCause();
-			}
-			return LuanFunction.NOTHING;
-		}
-		Integer nI = Luan.asInteger(nObj);
-		if( nI == null )
-			throw luan.exception("bad argument #2 (integer or function expected, got "+Luan.type(nObj)+")");
-		int n = nI;
-		if( n==0 ) {
-			TotalHitCountCollector thcc = new TotalHitCountCollector();
-			searcher.search(query,thcc);
-			return new Object[]{ nothingFn, 0, thcc.getTotalHits() };
-		}
-		Sort sort = sortStr==null ? null : index.parseSort(sortStr);
-		TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort);
-		final ScoreDoc[] scoreDocs = td.scoreDocs;
-		LuanFunction results = new LuanFunction() {
-			int i = 0;
-
-			@Override public Object call(LuanState luan,Object[] args) throws LuanException {
-				if( i >= scoreDocs.length )
-					return LuanFunction.NOTHING;
-				try {
-					LuanTable doc = doc(luan,scoreDocs[i++].doc);
-					return doc;
-				} catch(IOException e) {
-					throw luan.exception(e);
-				}
-			}
-		};
-		return new Object[]{ results, scoreDocs.length, td.totalHits };
-	}
-
-	private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException {
-		t.rawPut( method, new LuanJavaFunction(LuceneSearcher.class.getMethod(method,parameterTypes),this) );
-	}
-
-	LuanTable table() {
-		LuanTable tbl = new LuanTable();
-		try {
-			add( tbl, "search", LuanState.class, String.class, Object.class, String.class );
-		} catch(NoSuchMethodException e) {
-			throw new RuntimeException(e);
-		}
-		return tbl;
-	}
-
-}
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/LuceneWriter.java
--- a/lucene/src/luan/modules/lucene/LuceneWriter.java	Fri Jun 12 19:11:44 2015 -0600
+++ b/lucene/src/luan/modules/lucene/LuceneWriter.java	Sun Jun 14 01:34:42 2015 -0600
@@ -98,6 +98,7 @@
 		} catch(NoSuchMethodException e) {
 			throw new RuntimeException(e);
 		}
+		tbl.rawPut("index",index.myTable);
 		return tbl;
 	}
 
diff -r c5a93767cc5c -r ddcd4296107a lucene/src/luan/modules/lucene/Web_search.luan
--- a/lucene/src/luan/modules/lucene/Web_search.luan	Fri Jun 12 19:11:44 2015 -0600
+++ b/lucene/src/luan/modules/lucene/Web_search.luan	Sun Jun 14 01:34:42 2015 -0600
@@ -133,19 +133,17 @@
 		end
 		local rows = string_to_number(Http.request.parameter.rows)
 		local sort = Http.request.parameter.sort
-		index.Searcher( function(searcher)
-			local results, length, total_hits = searcher.search(query,rows,sort)
-			local headers = {}
-			local table = {}
-			for doc in results do
-				local row = {}
-				for field, value in pairs(doc) do
-					row[index_of(headers,field)] = value
-				end
-				table[#table+1] = row
+		local results = index.search(query,1,rows,sort)
+		local headers = {}
+		local table = {}
+		for _, doc in ipairs(results) do
+			local row = {}
+			for field, value in pairs(doc) do
+				row[index_of(headers,field)] = value
 			end
-			result(query,sort,headers,table)
-		end )
+			table[#table+1] = row
+		end
+		result(query,sort,headers,table)
 	end
 
 end