changeset 546:eaef1005ab87

general lucene cleanup
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 14 Jun 2015 22:17:58 -0600
parents ddcd4296107a
children 0be287ab0309
files lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneDocument.java lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSnapshot.java lucene/src/luan/modules/lucene/LuceneWriter.java
diffstat 5 files changed, 195 insertions(+), 296 deletions(-) [+]
line wrap: on
line diff
--- a/lucene/src/luan/modules/lucene/Lucene.luan	Sun Jun 14 01:34:42 2015 -0600
+++ b/lucene/src/luan/modules/lucene/Lucene.luan	Sun Jun 14 22:17:58 2015 -0600
@@ -16,28 +16,18 @@
 
 function M.index(indexDir)
 	local index = {}
-	local java_index = LuceneIndex.new(indexDir,index)
+	local java_index = LuceneIndex.new(indexDir)
 	index.indexed_fields = java_index.indexedFieldsMeta.newTable()
 	index.to_string = java_index.to_string
 	index.backup = java_index.backup
-	index.writer = java_index.writer
 	index.advanced_search = java_index.advanced_search
 	index.search_in_transaction = java_index.search_in_transaction
 	index.delete_all = java_index.delete_all
+	index.delete_documents = java_index.delete_documents
+	index.save_document = java_index.save_document
+	index.update_in_transaction = java_index.update_in_transaction
 	index.close = java_index.close
 
-	function index.save_document(doc)
-		index.writer( function(writer)
-			writer.save_document(doc)
-		end )
-	end
-
-	function index.delete_documents(terms)
-		index.writer( function(writer)
-			writer.delete_documents(terms)
-		end )
-	end
-
 	function index.search(query, from, to, sort)
 		local results = {}
 		local function fn(i,doc_fn)
--- a/lucene/src/luan/modules/lucene/LuceneDocument.java	Sun Jun 14 01:34:42 2015 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,96 +0,0 @@
-package luan.modules.lucene;
-
-import java.util.Map;
-import java.util.HashMap;
-import java.util.Set;
-import java.util.HashSet;
-import java.util.Arrays;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.document.Field;
-import org.apache.lucene.document.StoredField;
-import org.apache.lucene.document.StringField;
-import org.apache.lucene.document.IntField;
-import org.apache.lucene.document.LongField;
-import org.apache.lucene.document.DoubleField;
-import org.apache.lucene.util.BytesRef;
-import luan.Luan;
-import luan.LuanState;
-import luan.LuanTable;
-import luan.LuanException;
-
-
-public class LuceneDocument {
-	private LuceneDocument(String a) {}  // never
-
-	static Document toLucene(LuanState luan,LuanTable table,Set<String> indexed) throws LuanException {
-		Document doc = new Document();
-		for( Map.Entry<Object,Object> entry : table.iterable(luan) ) {
-			Object key = entry.getKey();
-			if( !(key instanceof String) )
-				throw luan.exception("key must be string");
-			String name = (String)key;
-			Object value = entry.getValue();
-			if( value instanceof String ) {
-				String s = (String)value;
-				if( indexed.contains(name) ) {
-					doc.add(new StringField(name, s, Field.Store.YES));
-				} else {
-					doc.add(new StoredField(name, s));
-				}
-			} else if( value instanceof Integer ) {
-				int i = (Integer)value;
-				if( indexed.contains(name) ) {
-					doc.add(new IntField(name, i, Field.Store.YES));
-				} else {
-					doc.add(new StoredField(name, i));
-				}
-			} else if( value instanceof Long ) {
-				long i = (Long)value;
-				if( indexed.contains(name) ) {
-					doc.add(new LongField(name, i, Field.Store.YES));
-				} else {
-					doc.add(new StoredField(name, i));
-				}
-			} else if( value instanceof Double ) {
-				double i = (Double)value;
-				if( indexed.contains(name) ) {
-					doc.add(new DoubleField(name, i, Field.Store.YES));
-				} else {
-					doc.add(new StoredField(name, i));
-				}
-			} else if( value instanceof byte[] ) {
-				byte[] b = (byte[])value;
-				doc.add(new StoredField(name, b));
-			} else
-				throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'");
-		}
-		return doc;
-	}
-
-	static LuanTable toTable(LuanState luan,Document doc) throws LuanException {
-		if( doc==null )
-			return null;
-		LuanTable table = new LuanTable();
-		for( IndexableField ifld : doc ) {
-			String name = ifld.name();
-			BytesRef br = ifld.binaryValue();
-			if( br != null ) {
-				table.rawPut(name,br.bytes);
-				continue;
-			}
-			Number n = ifld.numericValue();
-			if( n != null ) {
-				table.rawPut(name,n);
-				continue;
-			}
-			String s = ifld.stringValue();
-			if( s != null ) {
-				table.rawPut(name,s);
-				continue;
-			}
-			throw luan.exception("invalid field type for "+ifld);
-		}
-		return table;
-	}
-}
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java	Sun Jun 14 01:34:42 2015 -0600
+++ b/lucene/src/luan/modules/lucene/LuceneIndex.java	Sun Jun 14 22:17:58 2015 -0600
@@ -6,6 +6,10 @@
 import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.Iterator;
+import java.util.Map;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Set;
 import java.util.concurrent.locks.Lock;
 import java.util.concurrent.locks.ReentrantLock;
 import java.util.zip.ZipOutputStream;
@@ -13,15 +17,25 @@
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.index.IndexableField;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.Term;
 import org.apache.lucene.index.SnapshotDeletionPolicy;
+import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.AtomicReaderContext;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TopDocs;
@@ -58,19 +72,17 @@
 	private static final Analyzer analyzer = new KeywordAnalyzer();
 	public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer);
 
-	final LuanTable myTable;
-	final Lock writeLock = new ReentrantLock();
+	private final ReentrantLock writeLock = new ReentrantLock();
 	private final File indexDir;
 	final SnapshotDeletionPolicy snapshotDeletionPolicy;
-	final IndexWriter writer;
+	private final IndexWriter writer;
 	private DirectoryReader reader;
 	private IndexSearcher searcher;
 	private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
 	private boolean isClosed = false;
 	private final MultiFieldParser mfp = new MultiFieldParser();
 
-	public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException {
-		this.myTable = myTable;
+	public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException {
 		mfp.fields.put( "type", STRING_FIELD_PARSER );
 		mfp.fields.put( "id", NumberFieldParser.LONG );
 		File indexDir = new File(indexDirStr);
@@ -88,40 +100,93 @@
 		initId(luan);
 	}
 
-	Document toLucene(LuanState luan,LuanTable table) throws LuanException {
-		return LuceneDocument.toLucene(luan,table,mfp.fields.keySet());
-	}
 
-	public LuceneWriter openWriter() {
-		return new LuceneWriter(this);
-	}
-
-	private synchronized IndexSearcher openSearcher() throws IOException {
-		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
-		if( newReader != null ) {
-			reader.decRef();
-			reader = newReader;
-			searcher = new IndexSearcher(reader);
-		}
-		reader.incRef();
-		return searcher;
-	}
-
-	// call in finally block
-	private static void close(IndexSearcher searcher) throws IOException {
-		searcher.getIndexReader().decRef();
-	}
-
-	LuceneSnapshot openSnapshot() throws IOException {
-		return new LuceneSnapshot(this);
-	}
 
 	public void delete_all() throws IOException {
+		boolean commit = !writeLock.isHeldByCurrentThread();
 		writeLock.lock();
 		try {
 			writer.deleteAll();
-			writer.commit();
 			id = idLim = 0;
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	private static Term term(String key,int value) {
+		BytesRef br = new BytesRef();
+		NumericUtils.intToPrefixCoded(value,0,br);
+		return new Term(key,br);
+	}
+
+	private static Term term(String key,long value) {
+		BytesRef br = new BytesRef();
+		NumericUtils.longToPrefixCoded(value,0,br);
+		return new Term(key,br);
+	}
+
+	private static Term term(LuanState luan,String key,Object value) throws LuanException {
+		if( value instanceof String )
+			return new Term( key, (String)value );
+		if( value instanceof Integer )
+			return term( key, (Integer)value );
+		if( value instanceof Long )
+			return term( key, (Long)value );
+		if( value instanceof Float )
+			return term( key, NumericUtils.floatToSortableInt((Float)value) );
+		if( value instanceof Double )
+			return term( key, NumericUtils.doubleToSortableLong((Double)value) );
+		throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'");
+	}
+
+	public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
+		List<Term> list = new ArrayList<Term>();
+		for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) {
+			Object key = entry.getKey();
+			Object value = entry.getValue();
+			if( !(key instanceof String) )
+				throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
+			list.add( term( luan, (String)key, value ) );
+		}
+
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			writer.deleteDocuments(list.toArray(new Term[list.size()]));
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException {
+		if( doc.get(luan,"type")==null )
+			throw luan.exception("missing 'type' field");
+		Long id = (Long)doc.get(luan,"id");
+
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			if( id == null ) {
+				id = nextId(luan);
+				doc.put(luan,"id",id);
+				writer.addDocument(toLucene(luan,doc));
+			} else {
+				writer.updateDocument( term("id",id), toLucene(luan,doc) );
+			}
+			if(commit) writer.commit();
+		} finally {
+			writeLock.unlock();
+		}
+	}
+
+	public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException {
+		boolean commit = !writeLock.isHeldByCurrentThread();
+		writeLock.lock();
+		try {
+			luan.call(fn);
+			if(commit) writer.commit();
 		} finally {
 			writeLock.unlock();
 		}
@@ -146,7 +211,7 @@
 		}
 	}
 
-	synchronized long nextId(LuanState luan) throws LuanException, IOException {
+	private synchronized long nextId(LuanState luan) throws LuanException, IOException {
 		if( ++id > idLim ) {
 			idLim += idBatch;
 			LuanTable doc = new LuanTable();
@@ -161,10 +226,10 @@
 	public void backup(LuanState luan,String zipFile) throws LuanException, IOException {
 		if( !zipFile.endsWith(".zip") )
 			throw luan.exception("file "+zipFile+" doesn't end with '.zip'");
-		LuceneSnapshot snapshot = openSnapshot();
+		IndexCommit ic = snapshotDeletionPolicy.snapshot();
 		try {
 			ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile));
-			for( String fileName : snapshot.getFileNames() ) {
+			for( String fileName : ic.getFileNames() ) {
 				out.putNextEntry(new ZipEntry(fileName));
 				FileInputStream in = new FileInputStream(new File(indexDir,fileName));
 				Utils.copyAll(in,out);
@@ -173,28 +238,16 @@
 			}
 			out.close();
 		} finally {
-			snapshot.close();
+			snapshotDeletionPolicy.release(ic);
 		}
 	}
 
 
 
-	// luan
-
 	public String to_string() {
 		return writer.getDirectory().toString();
 	}
 
-	public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
-		LuceneWriter writer = openWriter();
-		try {
-			luan.call( fn, new Object[]{writer.table()} );
-			writer.commit();
-		} finally {
-			writer.close();
-		}
-	}
-
 	public void close() throws IOException {
 		if( !isClosed ) {
 			writer.close();
@@ -223,7 +276,7 @@
 
 		@Override public Object call(LuanState luan,Object[] args) throws LuanException {
 			try {
-				return LuceneDocument.toTable(luan,searcher.doc(docID));
+				return toTable(luan,searcher.doc(docID));
 			} catch(IOException e) {
 				throw luan.exception(e);
 			}
@@ -243,9 +296,25 @@
 		}
 	}
 
+	private synchronized IndexSearcher openSearcher() throws IOException {
+		DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
+		if( newReader != null ) {
+			reader.decRef();
+			reader = newReader;
+			searcher = new IndexSearcher(reader);
+		}
+		reader.incRef();
+		return searcher;
+	}
+
+	// call in finally block
+	private static void close(IndexSearcher searcher) throws IOException {
+		searcher.getIndexReader().decRef();
+	}
+
 	public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException {
 		Utils.checkNotNull(luan,queryStr);
-		Query query = parseQuery(queryStr);
+		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
 		IndexSearcher searcher = threadLocalSearcher.get();
 		boolean inTransaction = searcher != null;
 		if( !inTransaction )
@@ -277,7 +346,7 @@
 				searcher.search(query,thcc);
 				return thcc.getTotalHits();
 			}
-			Sort sort = sortStr==null ? null : parseSort(sortStr);
+			Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr);
 			TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort);
 			final ScoreDoc[] scoreDocs = td.scoreDocs;
 			DocFn docFn = new DocFn(searcher);
@@ -341,12 +410,79 @@
 
 	};
 
-	public Query parseQuery(String s) throws ParseException {
-		return SaneQueryParser.parseQuery(mfp,s);
+
+
+
+	private Document toLucene(LuanState luan,LuanTable table) throws LuanException {
+		Set<String> indexed = mfp.fields.keySet();
+		Document doc = new Document();
+		for( Map.Entry<Object,Object> entry : table.iterable(luan) ) {
+			Object key = entry.getKey();
+			if( !(key instanceof String) )
+				throw luan.exception("key must be string");
+			String name = (String)key;
+			Object value = entry.getValue();
+			if( value instanceof String ) {
+				String s = (String)value;
+				if( indexed.contains(name) ) {
+					doc.add(new StringField(name, s, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, s));
+				}
+			} else if( value instanceof Integer ) {
+				int i = (Integer)value;
+				if( indexed.contains(name) ) {
+					doc.add(new IntField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof Long ) {
+				long i = (Long)value;
+				if( indexed.contains(name) ) {
+					doc.add(new LongField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof Double ) {
+				double i = (Double)value;
+				if( indexed.contains(name) ) {
+					doc.add(new DoubleField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof byte[] ) {
+				byte[] b = (byte[])value;
+				doc.add(new StoredField(name, b));
+			} else
+				throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'");
+		}
+		return doc;
 	}
 
-	public Sort parseSort(String s) throws ParseException {
-		return SaneQueryParser.parseSort(mfp,s);
+	private static LuanTable toTable(LuanState luan,Document doc) throws LuanException {
+		if( doc==null )
+			return null;
+		LuanTable table = new LuanTable();
+		for( IndexableField ifld : doc ) {
+			String name = ifld.name();
+			BytesRef br = ifld.binaryValue();
+			if( br != null ) {
+				table.rawPut(name,br.bytes);
+				continue;
+			}
+			Number n = ifld.numericValue();
+			if( n != null ) {
+				table.rawPut(name,n);
+				continue;
+			}
+			String s = ifld.stringValue();
+			if( s != null ) {
+				table.rawPut(name,s);
+				continue;
+			}
+			throw luan.exception("invalid field type for "+ifld);
+		}
+		return table;
 	}
 
 }
--- a/lucene/src/luan/modules/lucene/LuceneSnapshot.java	Sun Jun 14 01:34:42 2015 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,26 +0,0 @@
-package luan.modules.lucene;
-
-import java.io.IOException;
-import java.util.Collection;
-import org.apache.lucene.index.IndexCommit;
-
-
-public final class LuceneSnapshot {
-	private final LuceneIndex index;
-	private final IndexCommit ic;
-
-	LuceneSnapshot(LuceneIndex index) throws IOException {
-		this.index = index;
-		this.ic = index.snapshotDeletionPolicy.snapshot();
-	}
-
-	// call in finally block
-	public void close() throws IOException {
-		index.snapshotDeletionPolicy.release(ic);
-	}
-
-	public Collection<String> getFileNames() throws IOException {
-		return ic.getFileNames();
-	}
-
-}
--- a/lucene/src/luan/modules/lucene/LuceneWriter.java	Sun Jun 14 01:34:42 2015 -0600
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,105 +0,0 @@
-package luan.modules.lucene;
-
-import java.io.IOException;
-import java.util.Map;
-import java.util.Set;
-import java.util.List;
-import java.util.ArrayList;
-import org.apache.lucene.index.IndexableField;
-import org.apache.lucene.index.Term;
-import org.apache.lucene.util.BytesRef;
-import org.apache.lucene.util.NumericUtils;
-import luan.Luan;
-import luan.LuanState;
-import luan.LuanTable;
-import luan.LuanJavaFunction;
-import luan.LuanException;
-
-
-public final class LuceneWriter {
-	private final LuceneIndex index;
-
-	LuceneWriter(LuceneIndex index) {
-		index.writeLock.lock();
-		this.index = index;
-	}
-
-	// call in finally block
-	void close() {
-		index.writeLock.unlock();
-	}
-
-	void commit() throws IOException {
-		index.writer.commit();
-	}
-
-	private Term term(String key,int value) {
-		BytesRef br = new BytesRef();
-		NumericUtils.intToPrefixCoded(value,0,br);
-		return new Term(key,br);
-	}
-
-	private Term term(String key,long value) {
-		BytesRef br = new BytesRef();
-		NumericUtils.longToPrefixCoded(value,0,br);
-		return new Term(key,br);
-	}
-
-	private Term term(LuanState luan,String key,Object value) throws LuanException {
-		if( value instanceof String )
-			return new Term( key, (String)value );
-		if( value instanceof Integer )
-			return term( key, (Integer)value );
-		if( value instanceof Long )
-			return term( key, (Long)value );
-		if( value instanceof Float )
-			return term( key, NumericUtils.floatToSortableInt((Float)value) );
-		if( value instanceof Double )
-			return term( key, NumericUtils.doubleToSortableLong((Double)value) );
-		throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'");
-	}
-
-	public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
-		List<Term> list = new ArrayList<Term>();
-		for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) {
-			Object key = entry.getKey();
-			Object value = entry.getValue();
-			if( !(key instanceof String) )
-				throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
-			list.add( term( luan, (String)key, value ) );
-		}
-		index.writer.deleteDocuments(list.toArray(new Term[list.size()]));
-	}
-
-	public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException {
-		if( doc.get(luan,"type")==null )
-			throw luan.exception("missing 'type' field");
-		Long id = (Long)doc.get(luan,"id");
-		if( id == null ) {
-			id = index.nextId(luan);
-			doc.put(luan,"id",id);
-			index.writer.addDocument(index.toLucene(luan,doc));
-		} else {
-			index.writer.updateDocument( term("id",id), index.toLucene(luan,doc) );
-		}
-	}
-
-	// luan
-
-	private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException {
-		t.rawPut( method, new LuanJavaFunction(LuceneWriter.class.getMethod(method,parameterTypes),this) );
-	}
-
-	LuanTable table() {
-		LuanTable tbl = new LuanTable();
-		try {
-			add( tbl, "save_document", LuanState.class, LuanTable.class );
-			add( tbl, "delete_documents", LuanState.class, LuanTable.class );
-		} catch(NoSuchMethodException e) {
-			throw new RuntimeException(e);
-		}
-		tbl.rawPut("index",index.myTable);
-		return tbl;
-	}
-
-}