Mercurial Hosting > luan
changeset 546:eaef1005ab87
general lucene cleanup
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 14 Jun 2015 22:17:58 -0600 |
parents | ddcd4296107a |
children | 0be287ab0309 |
files | lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneDocument.java lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSnapshot.java lucene/src/luan/modules/lucene/LuceneWriter.java |
diffstat | 5 files changed, 195 insertions(+), 296 deletions(-) [+] |
line wrap: on
line diff
--- a/lucene/src/luan/modules/lucene/Lucene.luan Sun Jun 14 01:34:42 2015 -0600 +++ b/lucene/src/luan/modules/lucene/Lucene.luan Sun Jun 14 22:17:58 2015 -0600 @@ -16,28 +16,18 @@ function M.index(indexDir) local index = {} - local java_index = LuceneIndex.new(indexDir,index) + local java_index = LuceneIndex.new(indexDir) index.indexed_fields = java_index.indexedFieldsMeta.newTable() index.to_string = java_index.to_string index.backup = java_index.backup - index.writer = java_index.writer index.advanced_search = java_index.advanced_search index.search_in_transaction = java_index.search_in_transaction index.delete_all = java_index.delete_all + index.delete_documents = java_index.delete_documents + index.save_document = java_index.save_document + index.update_in_transaction = java_index.update_in_transaction index.close = java_index.close - function index.save_document(doc) - index.writer( function(writer) - writer.save_document(doc) - end ) - end - - function index.delete_documents(terms) - index.writer( function(writer) - writer.delete_documents(terms) - end ) - end - function index.search(query, from, to, sort) local results = {} local function fn(i,doc_fn)
--- a/lucene/src/luan/modules/lucene/LuceneDocument.java Sun Jun 14 01:34:42 2015 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,96 +0,0 @@ -package luan.modules.lucene; - -import java.util.Map; -import java.util.HashMap; -import java.util.Set; -import java.util.HashSet; -import java.util.Arrays; -import org.apache.lucene.document.Document; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.document.Field; -import org.apache.lucene.document.StoredField; -import org.apache.lucene.document.StringField; -import org.apache.lucene.document.IntField; -import org.apache.lucene.document.LongField; -import org.apache.lucene.document.DoubleField; -import org.apache.lucene.util.BytesRef; -import luan.Luan; -import luan.LuanState; -import luan.LuanTable; -import luan.LuanException; - - -public class LuceneDocument { - private LuceneDocument(String a) {} // never - - static Document toLucene(LuanState luan,LuanTable table,Set<String> indexed) throws LuanException { - Document doc = new Document(); - for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { - Object key = entry.getKey(); - if( !(key instanceof String) ) - throw luan.exception("key must be string"); - String name = (String)key; - Object value = entry.getValue(); - if( value instanceof String ) { - String s = (String)value; - if( indexed.contains(name) ) { - doc.add(new StringField(name, s, Field.Store.YES)); - } else { - doc.add(new StoredField(name, s)); - } - } else if( value instanceof Integer ) { - int i = (Integer)value; - if( indexed.contains(name) ) { - doc.add(new IntField(name, i, Field.Store.YES)); - } else { - doc.add(new StoredField(name, i)); - } - } else if( value instanceof Long ) { - long i = (Long)value; - if( indexed.contains(name) ) { - doc.add(new LongField(name, i, Field.Store.YES)); - } else { - doc.add(new StoredField(name, i)); - } - } else if( value instanceof Double ) { - double i = (Double)value; - if( indexed.contains(name) ) { - doc.add(new DoubleField(name, i, Field.Store.YES)); - } else { - doc.add(new StoredField(name, i)); - } - } else if( value instanceof byte[] ) { - byte[] b = (byte[])value; - doc.add(new StoredField(name, b)); - } else - throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'"); - } - return doc; - } - - static LuanTable toTable(LuanState luan,Document doc) throws LuanException { - if( doc==null ) - return null; - LuanTable table = new LuanTable(); - for( IndexableField ifld : doc ) { - String name = ifld.name(); - BytesRef br = ifld.binaryValue(); - if( br != null ) { - table.rawPut(name,br.bytes); - continue; - } - Number n = ifld.numericValue(); - if( n != null ) { - table.rawPut(name,n); - continue; - } - String s = ifld.stringValue(); - if( s != null ) { - table.rawPut(name,s); - continue; - } - throw luan.exception("invalid field type for "+ifld); - } - return table; - } -}
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java Sun Jun 14 01:34:42 2015 -0600 +++ b/lucene/src/luan/modules/lucene/LuceneIndex.java Sun Jun 14 22:17:58 2015 -0600 @@ -6,6 +6,10 @@ import java.io.FileInputStream; import java.io.IOException; import java.util.Iterator; +import java.util.Map; +import java.util.List; +import java.util.ArrayList; +import java.util.Set; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import java.util.zip.ZipOutputStream; @@ -13,15 +17,25 @@ import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.index.IndexableField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.Term; import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.index.IndexCommit; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.util.Version; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; @@ -58,19 +72,17 @@ private static final Analyzer analyzer = new KeywordAnalyzer(); public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); - final LuanTable myTable; - final Lock writeLock = new ReentrantLock(); + private final ReentrantLock writeLock = new ReentrantLock(); private final File indexDir; final SnapshotDeletionPolicy snapshotDeletionPolicy; - final IndexWriter writer; + private final IndexWriter writer; private DirectoryReader reader; private IndexSearcher searcher; private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); private boolean isClosed = false; private final MultiFieldParser mfp = new MultiFieldParser(); - public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { - this.myTable = myTable; + public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException { mfp.fields.put( "type", STRING_FIELD_PARSER ); mfp.fields.put( "id", NumberFieldParser.LONG ); File indexDir = new File(indexDirStr); @@ -88,40 +100,93 @@ initId(luan); } - Document toLucene(LuanState luan,LuanTable table) throws LuanException { - return LuceneDocument.toLucene(luan,table,mfp.fields.keySet()); - } - public LuceneWriter openWriter() { - return new LuceneWriter(this); - } - - private synchronized IndexSearcher openSearcher() throws IOException { - DirectoryReader newReader = DirectoryReader.openIfChanged(reader); - if( newReader != null ) { - reader.decRef(); - reader = newReader; - searcher = new IndexSearcher(reader); - } - reader.incRef(); - return searcher; - } - - // call in finally block - private static void close(IndexSearcher searcher) throws IOException { - searcher.getIndexReader().decRef(); - } - - LuceneSnapshot openSnapshot() throws IOException { - return new LuceneSnapshot(this); - } public void delete_all() throws IOException { + boolean commit = !writeLock.isHeldByCurrentThread(); writeLock.lock(); try { writer.deleteAll(); - writer.commit(); id = idLim = 0; + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + private static Term term(String key,int value) { + BytesRef br = new BytesRef(); + NumericUtils.intToPrefixCoded(value,0,br); + return new Term(key,br); + } + + private static Term term(String key,long value) { + BytesRef br = new BytesRef(); + NumericUtils.longToPrefixCoded(value,0,br); + return new Term(key,br); + } + + private static Term term(LuanState luan,String key,Object value) throws LuanException { + if( value instanceof String ) + return new Term( key, (String)value ); + if( value instanceof Integer ) + return term( key, (Integer)value ); + if( value instanceof Long ) + return term( key, (Long)value ); + if( value instanceof Float ) + return term( key, NumericUtils.floatToSortableInt((Float)value) ); + if( value instanceof Double ) + return term( key, NumericUtils.doubleToSortableLong((Double)value) ); + throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'"); + } + + public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException { + List<Term> list = new ArrayList<Term>(); + for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) { + Object key = entry.getKey(); + Object value = entry.getValue(); + if( !(key instanceof String) ) + throw luan.exception("key must be a string but got "+key.getClass().getSimpleName()); + list.add( term( luan, (String)key, value ) ); + } + + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + writer.deleteDocuments(list.toArray(new Term[list.size()])); + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException { + if( doc.get(luan,"type")==null ) + throw luan.exception("missing 'type' field"); + Long id = (Long)doc.get(luan,"id"); + + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + if( id == null ) { + id = nextId(luan); + doc.put(luan,"id",id); + writer.addDocument(toLucene(luan,doc)); + } else { + writer.updateDocument( term("id",id), toLucene(luan,doc) ); + } + if(commit) writer.commit(); + } finally { + writeLock.unlock(); + } + } + + public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException { + boolean commit = !writeLock.isHeldByCurrentThread(); + writeLock.lock(); + try { + luan.call(fn); + if(commit) writer.commit(); } finally { writeLock.unlock(); } @@ -146,7 +211,7 @@ } } - synchronized long nextId(LuanState luan) throws LuanException, IOException { + private synchronized long nextId(LuanState luan) throws LuanException, IOException { if( ++id > idLim ) { idLim += idBatch; LuanTable doc = new LuanTable(); @@ -161,10 +226,10 @@ public void backup(LuanState luan,String zipFile) throws LuanException, IOException { if( !zipFile.endsWith(".zip") ) throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); - LuceneSnapshot snapshot = openSnapshot(); + IndexCommit ic = snapshotDeletionPolicy.snapshot(); try { ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); - for( String fileName : snapshot.getFileNames() ) { + for( String fileName : ic.getFileNames() ) { out.putNextEntry(new ZipEntry(fileName)); FileInputStream in = new FileInputStream(new File(indexDir,fileName)); Utils.copyAll(in,out); @@ -173,28 +238,16 @@ } out.close(); } finally { - snapshot.close(); + snapshotDeletionPolicy.release(ic); } } - // luan - public String to_string() { return writer.getDirectory().toString(); } - public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { - LuceneWriter writer = openWriter(); - try { - luan.call( fn, new Object[]{writer.table()} ); - writer.commit(); - } finally { - writer.close(); - } - } - public void close() throws IOException { if( !isClosed ) { writer.close(); @@ -223,7 +276,7 @@ @Override public Object call(LuanState luan,Object[] args) throws LuanException { try { - return LuceneDocument.toTable(luan,searcher.doc(docID)); + return toTable(luan,searcher.doc(docID)); } catch(IOException e) { throw luan.exception(e); } @@ -243,9 +296,25 @@ } } + private synchronized IndexSearcher openSearcher() throws IOException { + DirectoryReader newReader = DirectoryReader.openIfChanged(reader); + if( newReader != null ) { + reader.decRef(); + reader = newReader; + searcher = new IndexSearcher(reader); + } + reader.incRef(); + return searcher; + } + + // call in finally block + private static void close(IndexSearcher searcher) throws IOException { + searcher.getIndexReader().decRef(); + } + public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { Utils.checkNotNull(luan,queryStr); - Query query = parseQuery(queryStr); + Query query = SaneQueryParser.parseQuery(mfp,queryStr); IndexSearcher searcher = threadLocalSearcher.get(); boolean inTransaction = searcher != null; if( !inTransaction ) @@ -277,7 +346,7 @@ searcher.search(query,thcc); return thcc.getTotalHits(); } - Sort sort = sortStr==null ? null : parseSort(sortStr); + Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr); TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); final ScoreDoc[] scoreDocs = td.scoreDocs; DocFn docFn = new DocFn(searcher); @@ -341,12 +410,79 @@ }; - public Query parseQuery(String s) throws ParseException { - return SaneQueryParser.parseQuery(mfp,s); + + + + private Document toLucene(LuanState luan,LuanTable table) throws LuanException { + Set<String> indexed = mfp.fields.keySet(); + Document doc = new Document(); + for( Map.Entry<Object,Object> entry : table.iterable(luan) ) { + Object key = entry.getKey(); + if( !(key instanceof String) ) + throw luan.exception("key must be string"); + String name = (String)key; + Object value = entry.getValue(); + if( value instanceof String ) { + String s = (String)value; + if( indexed.contains(name) ) { + doc.add(new StringField(name, s, Field.Store.YES)); + } else { + doc.add(new StoredField(name, s)); + } + } else if( value instanceof Integer ) { + int i = (Integer)value; + if( indexed.contains(name) ) { + doc.add(new IntField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof Long ) { + long i = (Long)value; + if( indexed.contains(name) ) { + doc.add(new LongField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof Double ) { + double i = (Double)value; + if( indexed.contains(name) ) { + doc.add(new DoubleField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof byte[] ) { + byte[] b = (byte[])value; + doc.add(new StoredField(name, b)); + } else + throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'"); + } + return doc; } - public Sort parseSort(String s) throws ParseException { - return SaneQueryParser.parseSort(mfp,s); + private static LuanTable toTable(LuanState luan,Document doc) throws LuanException { + if( doc==null ) + return null; + LuanTable table = new LuanTable(); + for( IndexableField ifld : doc ) { + String name = ifld.name(); + BytesRef br = ifld.binaryValue(); + if( br != null ) { + table.rawPut(name,br.bytes); + continue; + } + Number n = ifld.numericValue(); + if( n != null ) { + table.rawPut(name,n); + continue; + } + String s = ifld.stringValue(); + if( s != null ) { + table.rawPut(name,s); + continue; + } + throw luan.exception("invalid field type for "+ifld); + } + return table; } }
--- a/lucene/src/luan/modules/lucene/LuceneSnapshot.java Sun Jun 14 01:34:42 2015 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,26 +0,0 @@ -package luan.modules.lucene; - -import java.io.IOException; -import java.util.Collection; -import org.apache.lucene.index.IndexCommit; - - -public final class LuceneSnapshot { - private final LuceneIndex index; - private final IndexCommit ic; - - LuceneSnapshot(LuceneIndex index) throws IOException { - this.index = index; - this.ic = index.snapshotDeletionPolicy.snapshot(); - } - - // call in finally block - public void close() throws IOException { - index.snapshotDeletionPolicy.release(ic); - } - - public Collection<String> getFileNames() throws IOException { - return ic.getFileNames(); - } - -}
--- a/lucene/src/luan/modules/lucene/LuceneWriter.java Sun Jun 14 01:34:42 2015 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,105 +0,0 @@ -package luan.modules.lucene; - -import java.io.IOException; -import java.util.Map; -import java.util.Set; -import java.util.List; -import java.util.ArrayList; -import org.apache.lucene.index.IndexableField; -import org.apache.lucene.index.Term; -import org.apache.lucene.util.BytesRef; -import org.apache.lucene.util.NumericUtils; -import luan.Luan; -import luan.LuanState; -import luan.LuanTable; -import luan.LuanJavaFunction; -import luan.LuanException; - - -public final class LuceneWriter { - private final LuceneIndex index; - - LuceneWriter(LuceneIndex index) { - index.writeLock.lock(); - this.index = index; - } - - // call in finally block - void close() { - index.writeLock.unlock(); - } - - void commit() throws IOException { - index.writer.commit(); - } - - private Term term(String key,int value) { - BytesRef br = new BytesRef(); - NumericUtils.intToPrefixCoded(value,0,br); - return new Term(key,br); - } - - private Term term(String key,long value) { - BytesRef br = new BytesRef(); - NumericUtils.longToPrefixCoded(value,0,br); - return new Term(key,br); - } - - private Term term(LuanState luan,String key,Object value) throws LuanException { - if( value instanceof String ) - return new Term( key, (String)value ); - if( value instanceof Integer ) - return term( key, (Integer)value ); - if( value instanceof Long ) - return term( key, (Long)value ); - if( value instanceof Float ) - return term( key, NumericUtils.floatToSortableInt((Float)value) ); - if( value instanceof Double ) - return term( key, NumericUtils.doubleToSortableLong((Double)value) ); - throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'"); - } - - public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException { - List<Term> list = new ArrayList<Term>(); - for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) { - Object key = entry.getKey(); - Object value = entry.getValue(); - if( !(key instanceof String) ) - throw luan.exception("key must be a string but got "+key.getClass().getSimpleName()); - list.add( term( luan, (String)key, value ) ); - } - index.writer.deleteDocuments(list.toArray(new Term[list.size()])); - } - - public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException { - if( doc.get(luan,"type")==null ) - throw luan.exception("missing 'type' field"); - Long id = (Long)doc.get(luan,"id"); - if( id == null ) { - id = index.nextId(luan); - doc.put(luan,"id",id); - index.writer.addDocument(index.toLucene(luan,doc)); - } else { - index.writer.updateDocument( term("id",id), index.toLucene(luan,doc) ); - } - } - - // luan - - private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException { - t.rawPut( method, new LuanJavaFunction(LuceneWriter.class.getMethod(method,parameterTypes),this) ); - } - - LuanTable table() { - LuanTable tbl = new LuanTable(); - try { - add( tbl, "save_document", LuanState.class, LuanTable.class ); - add( tbl, "delete_documents", LuanState.class, LuanTable.class ); - } catch(NoSuchMethodException e) { - throw new RuntimeException(e); - } - tbl.rawPut("index",index.myTable); - return tbl; - } - -}