Mercurial Hosting > luan
changeset 545:ddcd4296107a
clean up lucene search
| author | Franklin Schmidt <fschmidt@gmail.com> | 
|---|---|
| date | Sun, 14 Jun 2015 01:34:42 -0600 | 
| parents | c5a93767cc5c | 
| children | eaef1005ab87 | 
| files | lucene/src/luan/modules/lucene/Ab_testing.luan lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSearcher.java lucene/src/luan/modules/lucene/LuceneWriter.java lucene/src/luan/modules/lucene/Web_search.luan | 
| diffstat | 6 files changed, 171 insertions(+), 222 deletions(-) [+] | 
line wrap: on
 line diff
--- a/lucene/src/luan/modules/lucene/Ab_testing.luan Fri Jun 12 19:11:44 2015 -0600 +++ b/lucene/src/luan/modules/lucene/Ab_testing.luan Sun Jun 14 01:34:42 2015 -0600 @@ -41,37 +41,36 @@ -- returns map of event name to (map of value to result) and "start_date" function test.results() - return index.Searcher( function(searcher) - local results = {} - for name in pairs(test.aggregator_factories) do - results[name] = {} + local results = {} + for name in pairs(test.aggregator_factories) do + results[name] = {} + end + local date_field = test.date_field + local start_date = nil + for _, value in ipairs(test.values) do + local aggregators = {} + for name, factory in pairs(test.aggregator_factories) do + aggregators[name] = factory() end - local date_field = test.date_field - local start_date = nil - for _, value in ipairs(test.values) do - local aggregators = {} - for name, factory in pairs(test.aggregator_factories) do - aggregators[name] = factory() + local query = field..":"..value + index.advanced_search(query, function(_,doc_fn) + local doc = doc_fn() + for _, aggregator in pairs(aggregators) do + aggregator.aggregate(doc) end - local query = field..":"..value - searcher.search(query, function(doc) - for _, aggregator in pairs(aggregators) do - aggregator.aggregate(doc) + if date_field ~= nil then + local date = doc[date_field] + if date ~= nil and (start_date==nil or start_date > date) then + start_date = date end - if date_field ~= nil then - local date = doc[date_field] - if date ~= nil and (start_date==nil or start_date > date) then - start_date = date - end - end - end) - for name, aggregator in pairs(aggregators) do - results[name][value] = aggregator.result end + end) + for name, aggregator in pairs(aggregators) do + results[name][value] = aggregator.result end - results.start_date = start_date - return results - end ) + end + results.start_date = start_date + return results end function test.fancy_results()
--- a/lucene/src/luan/modules/lucene/Lucene.luan Fri Jun 12 19:11:44 2015 -0600 +++ b/lucene/src/luan/modules/lucene/Lucene.luan Sun Jun 14 01:34:42 2015 -0600 @@ -16,47 +16,51 @@ function M.index(indexDir) local index = {} - local java_index = LuceneIndex.new(indexDir) + local java_index = LuceneIndex.new(indexDir,index) index.indexed_fields = java_index.indexedFieldsMeta.newTable() index.to_string = java_index.to_string index.backup = java_index.backup - index.Writer = java_index.Writer - index.Searcher = java_index.Searcher + index.writer = java_index.writer + index.advanced_search = java_index.advanced_search + index.search_in_transaction = java_index.search_in_transaction index.delete_all = java_index.delete_all index.close = java_index.close function index.save_document(doc) - index.Writer( function(writer) + index.writer( function(writer) writer.save_document(doc) end ) end function index.delete_documents(terms) - index.Writer( function(writer) + index.writer( function(writer) writer.delete_documents(terms) end ) end - function index.get_first(query, sort) - return index.Searcher( function(searcher) - local results, _, total_hits = searcher.search(query,1,sort) - return results(), total_hits - end ) + function index.search(query, from, to, sort) + local results = {} + local function fn(i,doc_fn) + if i >= from then + results[#results+1] = doc_fn() + end + end + local total_hits = index.advanced_search(query,fn,to,sort) + return results, total_hits end function index.get_document(query) - local doc, total_hits = index.get_first(query); - if total_hits > 1 then - error( "found " .. total_hits .. " documents" ) + local doc + local function fn(_,doc_fn) + doc = doc_fn() end + local total_hits = index.advanced_search(query,fn,1) + total_hits <= 1 or error( "found " .. total_hits .. " documents" ) return doc end function index.count(query) - return index.Searcher( function(searcher) - local _, _, total_hits = searcher.search(query,0) - return total_hits - end ) + return index.advanced_search(query) end return index
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java Fri Jun 12 19:11:44 2015 -0600 +++ b/lucene/src/luan/modules/lucene/LuceneIndex.java Sun Jun 14 01:34:42 2015 -0600 @@ -18,6 +18,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; @@ -27,6 +28,10 @@ import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.search.ScoreDoc; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; import sane.lucene.queryparser.SaneQueryParser; import sane.lucene.queryparser.FieldParser; import sane.lucene.queryparser.MultiFieldParser; @@ -41,6 +46,7 @@ import luan.LuanJavaFunction; import luan.LuanException; import luan.LuanMeta; +import luan.LuanRuntimeException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -52,16 +58,19 @@ private static final Analyzer analyzer = new KeywordAnalyzer(); public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); + final LuanTable myTable; final Lock writeLock = new ReentrantLock(); private final File indexDir; final SnapshotDeletionPolicy snapshotDeletionPolicy; final IndexWriter writer; private DirectoryReader reader; - private LuceneSearcher searcher; + private IndexSearcher searcher; + private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); private boolean isClosed = false; private final MultiFieldParser mfp = new MultiFieldParser(); - public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException { + public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { + this.myTable = myTable; mfp.fields.put( "type", STRING_FIELD_PARSER ); mfp.fields.put( "id", NumberFieldParser.LONG ); File indexDir = new File(indexDirStr); @@ -75,7 +84,7 @@ writer.commit(); // commit index creation reader = DirectoryReader.open(dir); luan.onClose(this); - searcher = new LuceneSearcher(this,reader); + searcher = new IndexSearcher(reader); initId(luan); } @@ -87,17 +96,22 @@ return new LuceneWriter(this); } - synchronized LuceneSearcher openSearcher() throws IOException { + private synchronized IndexSearcher openSearcher() throws IOException { DirectoryReader newReader = DirectoryReader.openIfChanged(reader); if( newReader != null ) { reader.decRef(); reader = newReader; - searcher = new LuceneSearcher(this,reader); + searcher = new IndexSearcher(reader); } reader.incRef(); return searcher; } + // call in finally block + private static void close(IndexSearcher searcher) throws IOException { + searcher.getIndexReader().decRef(); + } + LuceneSnapshot openSnapshot() throws IOException { return new LuceneSnapshot(this); } @@ -119,7 +133,6 @@ private final int idBatch = 10; private void initId(LuanState luan) throws LuanException, IOException { - IndexSearcher searcher = this.searcher.searcher; TopDocs td = searcher.search(new TermQuery(new Term("type","next_id")),1); switch(td.totalHits) { case 0: @@ -172,7 +185,7 @@ return writer.getDirectory().toString(); } - public void Writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { + public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { LuceneWriter writer = openWriter(); try { luan.call( fn, new Object[]{writer.table()} ); @@ -182,15 +195,6 @@ } } - public Object Searcher(LuanState luan,LuanFunction fn) throws LuanException, IOException { - LuceneSearcher searcher = openSearcher(); - try { - return luan.call( fn, new Object[]{searcher.table()} ); - } finally { - searcher.close(); - } - } - public void close() throws IOException { if( !isClosed ) { writer.close(); @@ -209,6 +213,100 @@ + private static class DocFn extends LuanFunction { + final IndexSearcher searcher; + int docID; + + DocFn(IndexSearcher searcher) { + this.searcher = searcher; + } + + @Override public Object call(LuanState luan,Object[] args) throws LuanException { + try { + return LuceneDocument.toTable(luan,searcher.doc(docID)); + } catch(IOException e) { + throw luan.exception(e); + } + } + } + + private static abstract class MyCollector extends Collector { + int docBase; + int i = 0; + + @Override public void setScorer(Scorer scorer) {} + @Override public void setNextReader(AtomicReaderContext context) { + this.docBase = context.docBase; + } + @Override public boolean acceptsDocsOutOfOrder() { + return true; + } + } + + public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { + Utils.checkNotNull(luan,queryStr); + Query query = parseQuery(queryStr); + IndexSearcher searcher = threadLocalSearcher.get(); + boolean inTransaction = searcher != null; + if( !inTransaction ) + searcher = openSearcher(); + try { + if( fn!=null && n==null ) { + if( sortStr != null ) + throw luan.exception("sort must be nil when n is nil"); + final DocFn docFn = new DocFn(searcher); + MyCollector col = new MyCollector() { + @Override public void collect(int doc) { + try { + docFn.docID = doc; + luan.call(fn,new Object[]{++i,docFn}); + } catch(LuanException e) { + throw new LuanRuntimeException(e); + } + } + }; + try { + searcher.search(query,col); + } catch(LuanRuntimeException e) { + throw (LuanException)e.getCause(); + } + return col.i; + } + if( fn==null || n==0 ) { + TotalHitCountCollector thcc = new TotalHitCountCollector(); + searcher.search(query,thcc); + return thcc.getTotalHits(); + } + Sort sort = sortStr==null ? null : parseSort(sortStr); + TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); + final ScoreDoc[] scoreDocs = td.scoreDocs; + DocFn docFn = new DocFn(searcher); + for( int i=0; i<scoreDocs.length; i++ ) { + docFn.docID = scoreDocs[i].doc; + luan.call(fn,new Object[]{i+1,docFn}); + } + return td.totalHits; + } finally { + if( !inTransaction ) + close(searcher); + } + } + + public Object search_in_transaction(LuanState luan,LuanFunction fn) throws LuanException, IOException { + if( threadLocalSearcher.get() != null ) + throw luan.exception("can't nest search_in_transaction calls"); + IndexSearcher searcher = openSearcher(); + threadLocalSearcher.set(searcher); + try { + return luan.call(fn); + } finally { + threadLocalSearcher.set(null); + close(searcher); + } + } + + + public final LuanMeta indexedFieldsMeta = new LuanMeta() { @Override public boolean canNewindex() {
--- a/lucene/src/luan/modules/lucene/LuceneSearcher.java Fri Jun 12 19:11:44 2015 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,151 +0,0 @@ -package luan.modules.lucene; - -import java.io.IOException; -import java.util.NoSuchElementException; -import java.util.Map; -import java.util.List; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.search.IndexSearcher; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TopDocs; -import org.apache.lucene.search.TopFieldDocs; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.ScoreDoc; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.Collector; -import org.apache.lucene.search.TotalHitCountCollector; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.index.AtomicReaderContext; -import sane.lucene.queryparser.ParseException; -import luan.Luan; -import luan.LuanState; -import luan.LuanTable; -import luan.LuanFunction; -import luan.LuanJavaFunction; -import luan.LuanException; -import luan.LuanRuntimeException; -import luan.LuanMethod; -import luan.modules.Utils; - - -public final class LuceneSearcher { - private final LuceneIndex index; - final IndexSearcher searcher; - - LuceneSearcher(LuceneIndex index,IndexReader reader) { - this.index = index; - this.searcher = new IndexSearcher(reader); - } - - // call in finally block - void close() throws IOException { - searcher.getIndexReader().decRef(); - } - - private LuanTable doc(LuanState luan,int docID) throws LuanException, IOException { - return LuceneDocument.toTable(luan,searcher.doc(docID)); - } -/* - TopDocs search(Query query,int n) throws IOException { - return searcher.search(query,n); - } - - TopFieldDocs search(Query query,int n,Sort sort) throws IOException { - return searcher.search(query,n,sort); - } -*/ - // luan - - private static final LuanFunction nothingFn = new LuanFunction() { - @Override public Object call(LuanState luan,Object[] args) { - return LuanFunction.NOTHING; - } - }; - - private static abstract class MyCollector extends Collector { - int docBase; - - @Override public void setScorer(Scorer scorer) {} - @Override public void setNextReader(AtomicReaderContext context) { - this.docBase = context.docBase; - } - @Override public boolean acceptsDocsOutOfOrder() { - return true; - } - } - - @LuanMethod public Object[] search( final LuanState luan, String queryStr, Object nObj, String sortStr ) throws LuanException, IOException, ParseException { - Utils.checkNotNull(luan,queryStr); - Query query = index.parseQuery(queryStr); - if( nObj instanceof LuanFunction ) { - final LuanFunction fn = (LuanFunction)nObj; - Collector col = new MyCollector() { - @Override public void collect(int doc) { - try { - try { - LuanTable docTbl = doc(luan,docBase+doc); - luan.call(fn,new Object[]{docTbl}); - } catch(IOException e) { - throw luan.exception(e); - } - } catch(LuanException e) { - throw new LuanRuntimeException(e); - } - } - }; - try { - searcher.search(query,col); - } catch(LuanRuntimeException e) { - throw (LuanException)e.getCause(); - } - return LuanFunction.NOTHING; - } - Integer nI = Luan.asInteger(nObj); - if( nI == null ) - throw luan.exception("bad argument #2 (integer or function expected, got "+Luan.type(nObj)+")"); - int n = nI; - if( n==0 ) { - TotalHitCountCollector thcc = new TotalHitCountCollector(); - searcher.search(query,thcc); - return new Object[]{ nothingFn, 0, thcc.getTotalHits() }; - } - Sort sort = sortStr==null ? null : index.parseSort(sortStr); - TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); - final ScoreDoc[] scoreDocs = td.scoreDocs; - LuanFunction results = new LuanFunction() { - int i = 0; - - @Override public Object call(LuanState luan,Object[] args) throws LuanException { - if( i >= scoreDocs.length ) - return LuanFunction.NOTHING; - try { - LuanTable doc = doc(luan,scoreDocs[i++].doc); - return doc; - } catch(IOException e) { - throw luan.exception(e); - } - } - }; - return new Object[]{ results, scoreDocs.length, td.totalHits }; - } - - private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException { - t.rawPut( method, new LuanJavaFunction(LuceneSearcher.class.getMethod(method,parameterTypes),this) ); - } - - LuanTable table() { - LuanTable tbl = new LuanTable(); - try { - add( tbl, "search", LuanState.class, String.class, Object.class, String.class ); - } catch(NoSuchMethodException e) { - throw new RuntimeException(e); - } - return tbl; - } - -}
--- a/lucene/src/luan/modules/lucene/LuceneWriter.java Fri Jun 12 19:11:44 2015 -0600 +++ b/lucene/src/luan/modules/lucene/LuceneWriter.java Sun Jun 14 01:34:42 2015 -0600 @@ -98,6 +98,7 @@ } catch(NoSuchMethodException e) { throw new RuntimeException(e); } + tbl.rawPut("index",index.myTable); return tbl; }
--- a/lucene/src/luan/modules/lucene/Web_search.luan Fri Jun 12 19:11:44 2015 -0600 +++ b/lucene/src/luan/modules/lucene/Web_search.luan Sun Jun 14 01:34:42 2015 -0600 @@ -133,19 +133,17 @@ end local rows = string_to_number(Http.request.parameter.rows) local sort = Http.request.parameter.sort - index.Searcher( function(searcher) - local results, length, total_hits = searcher.search(query,rows,sort) - local headers = {} - local table = {} - for doc in results do - local row = {} - for field, value in pairs(doc) do - row[index_of(headers,field)] = value - end - table[#table+1] = row + local results = index.search(query,1,rows,sort) + local headers = {} + local table = {} + for _, doc in ipairs(results) do + local row = {} + for field, value in pairs(doc) do + row[index_of(headers,field)] = value end - result(query,sort,headers,table) - end ) + table[#table+1] = row + end + result(query,sort,headers,table) end end
