changeset 230:4438cb2e04d0

start lucene git-svn-id: https://luan-java.googlecode.com/svn/trunk@231 21e917c8-12df-6dd8-5cb6-c86387c605b9
author fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
date Tue, 30 Sep 2014 20:03:56 +0000
parents 2a54cb7d1cf4
children a35417bf493a
files lucene/ext/lucene-analyzers-common-4.9.0.jar lucene/ext/lucene-core-4.9.0.jar lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneDocument.java lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSearcher.java lucene/src/luan/modules/lucene/LuceneSnapshot.java lucene/src/luan/modules/lucene/LuceneWriter.java
diffstat 8 files changed, 537 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
Binary file lucene/ext/lucene-analyzers-common-4.9.0.jar has changed
Binary file lucene/ext/lucene-core-4.9.0.jar has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/Lucene.luan	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,25 @@
+import "Java"
+import "luan.modules.lucene.LuceneIndex"
+
+standard_fields = {
+	"type" = "type index";
+	"id" = "id index";
+}
+
+function Index(indexDir)
+	local index LuceneIndex.new(indexDir).table()
+
+	function index.save_document(doc)
+		index.Writer( function(writer)
+			writer.save_document(doc)
+		end )
+	end
+
+	function index.delete_documents(terms)
+		index.Writer( function(writer)
+			writer.delete_documents(terms)
+		end )
+	end
+
+	return index
+end
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/LuceneDocument.java	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,98 @@
+package luan.modules.lucene;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.Arrays;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.util.BytesRef;
+import luan.Luan;
+import luan.LuanTable;
+
+
+public class LuceneDocument {
+	// I assume there will be more flags later
+	public static final String INDEX = "index";
+
+	private LuceneDocument(String a) {}  // never
+
+	static Document toLucene(LuanTable table) {
+		Document doc = new Document();
+		for( Map.Entry<Object,Object> entry : table ) {
+			Object key = entry.getKey();
+			if( !(key instanceof String) )
+				throw new IllegalArgumentException("key must be string");
+			String name = (String)key;
+			Object value = entry.getValue();
+			if( value == null )
+				continue;
+			Set<String> flags = new HashSet<String>();
+			String[] a = name.split(" +");
+			for( int i=1; i<a.length; i++ ) {
+				flags.add(a[i]);
+			}
+			if( value instanceof String ) {
+				String s = (String)value;
+				if( flags.remove(INDEX) ) {
+					doc.add(new StringField(name, s, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, s));
+				}
+			} else if( value instanceof Integer ) {
+				int i = (Integer)value;
+				if( flags.remove(INDEX) ) {
+					doc.add(new IntField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof Long ) {
+				long i = (Long)value;
+				if( flags.remove(INDEX) ) {
+					doc.add(new LongField(name, i, Field.Store.YES));
+				} else {
+					doc.add(new StoredField(name, i));
+				}
+			} else if( value instanceof byte[] ) {
+				byte[] b = (byte[])value;
+				doc.add(new StoredField(name, b));
+			} else
+				throw new IllegalArgumentException("invalid value type "+value.getClass()+"' for '"+name+"'");
+			if( !flags.isEmpty() )
+				throw new IllegalArgumentException("invalid flags "+flags+" in '"+name+"'");
+		}
+		return doc;
+	}
+
+	static LuanTable toTable(Document doc) {
+		if( doc==null )
+			return null;
+		LuanTable table = Luan.newTable();
+		for( IndexableField ifld : doc ) {
+			String name = ifld.name();
+			BytesRef br = ifld.binaryValue();
+			if( br != null ) {
+				table.put(name,br.bytes);
+				continue;
+			}
+			Number n = ifld.numericValue();
+			if( n != null ) {
+				table.put(name,n);
+				continue;
+			}
+			String s = ifld.stringValue();
+			if( s != null ) {
+				table.put(name,s);
+				continue;
+			}
+			throw new RuntimeException("invalid field type for "+ifld);
+		}
+		return table;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/LuceneIndex.java	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,198 @@
+package luan.modules.lucene;
+
+import java.io.File;
+import java.io.FileOutputStream;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.concurrent.locks.Lock;
+import java.util.concurrent.locks.ReentrantLock;
+import java.util.zip.ZipOutputStream;
+import java.util.zip.ZipEntry;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.standard.StandardAnalyzer;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.SnapshotDeletionPolicy;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.util.Version;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import luan.modules.Utils;
+import luan.Luan;
+import luan.LuanState;
+import luan.LuanTable;
+import luan.LuanFunction;
+import luan.LuanJavaFunction;
+import luan.LuanException;
+
+
+public final class LuceneIndex {
+	private static final String FLD_TYPE = LuceneWriter.FLD_TYPE;
+	private static final String FLD_NEXT_ID = "nextId";
+
+	final Lock writeLock = new ReentrantLock();
+	private final File indexDir;
+	final SnapshotDeletionPolicy snapshotDeletionPolicy;
+	final IndexWriter writer;
+	private DirectoryReader reader;
+	private LuceneSearcher searcher;
+
+	public LuceneIndex(String indexDirStr) {
+		try {
+			File indexDir = new File(indexDirStr);
+			this.indexDir = indexDir;
+			Directory dir = FSDirectory.open(indexDir);
+			Version version = Version.LUCENE_4_9;
+			Analyzer analyzer = new StandardAnalyzer(version);
+			IndexWriterConfig conf = new IndexWriterConfig(version,analyzer);
+			snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
+			conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
+			writer = new IndexWriter(dir,conf);
+			writer.commit();  // commit index creation
+			reader = DirectoryReader.open(dir);
+			searcher = new LuceneSearcher(reader);
+			initId();
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public LuceneWriter openWriter() {
+		return new LuceneWriter(this);
+	}
+
+	public synchronized LuceneSearcher openSearcher() {
+		try {
+			DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
+			if( newReader != null ) {
+				reader.decRef();
+				reader = newReader;
+				searcher = new LuceneSearcher(reader);
+			}
+			reader.incRef();
+			return searcher;
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public LuceneSnapshot openSnapshot() {
+		return new LuceneSnapshot(this);
+	}
+
+
+	private long id = 0;
+	private long idLim = 0;
+	private final int idBatch = 10;
+
+	private void initId() {
+		TopDocs td = searcher.search(new TermQuery(new Term(FLD_TYPE,"next_id")),1);
+		switch(td.totalHits) {
+		case 0:
+			break;  // do nothing
+		case 1:
+			LuanTable doc = searcher.doc(td.scoreDocs[0].doc);
+			idLim = (Long)doc.get(FLD_NEXT_ID);
+			id = idLim;
+			break;
+		default:
+			throw new RuntimeException();
+		}
+	}
+
+	synchronized String nextId() {
+		try {
+			String rtn = Long.toString(++id);
+			if( id > idLim ) {
+				idLim += idBatch;
+				LuanTable doc = Luan.newTable();
+				doc.put( FLD_TYPE, "next_id" );
+				doc.put( FLD_NEXT_ID, idLim );
+				writer.updateDocument(new Term(FLD_TYPE,"next_id"),LuceneDocument.toLucene(doc));
+			}
+			return rtn;
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public LuanTable getDocument(String id) {
+		return getDocument(new Term(LuceneWriter.FLD_ID,id));
+	}
+
+	public LuanTable getDocument(Term term) {
+		LuceneSearcher searcher = openSearcher();
+		try {
+			TopDocs td = searcher.search(new TermQuery(term),1);
+			switch(td.totalHits) {
+			case 0:
+				return null;
+			case 1:
+				return searcher.doc(td.scoreDocs[0].doc);
+			default:
+				throw new RuntimeException();
+			}
+		} finally {
+			searcher.close();
+		}
+	}
+
+
+	public void backup(String zipFile) {
+		if( !zipFile.endsWith(".zip") )
+			throw new RuntimeException("file "+zipFile+" doesn't end with '.zip'");
+		LuceneSnapshot snapshot = openSnapshot();
+		try {
+			ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile));
+			for( String fileName : snapshot.getFileNames() ) {
+				out.putNextEntry(new ZipEntry(fileName));
+				FileInputStream in = new FileInputStream(new File(indexDir,fileName));
+				Utils.copyAll(in,out);
+				in.close();
+				out.closeEntry();
+			}
+			out.close();
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		} finally {
+			snapshot.close();
+		}
+	}
+
+
+
+	// luan
+
+	public String to_string() {
+		return writer.getDirectory().toString();
+	}
+
+	public void Writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
+		LuceneWriter writer = openWriter();
+		try {
+			luan.call( fn, new Object[]{writer.table()} );
+			writer.commit();
+		} finally {
+			writer.close();
+		}
+	}
+
+	private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException {
+		t.put( method, new LuanJavaFunction(LuceneIndex.class.getMethod(method,parameterTypes),this) );
+	}
+
+	public LuanTable table() {
+		LuanTable tbl = Luan.newTable();
+		try {
+			add( tbl, "to_string" );
+			add( tbl, "backup", String.class );
+		} catch(NoSuchMethodException e) {
+			throw new RuntimeException(e);
+		}
+		return tbl;
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/LuceneSearcher.java	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,85 @@
+package luan.modules.lucene;
+
+import java.io.IOException;
+import java.util.Iterator;
+import java.util.NoSuchElementException;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.TopFieldDocs;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.ScoreDoc;
+import luan.LuanTable;
+
+
+public final class LuceneSearcher {
+	private final IndexSearcher searcher;
+
+	LuceneSearcher(IndexReader reader) {
+		this.searcher = new IndexSearcher(reader);
+	}
+
+	// call in finally block
+	public void close() {
+		try {
+			searcher.getIndexReader().decRef();
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	private Document rawDoc(int docID) {
+		try {
+			return searcher.doc(docID);
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public LuanTable doc(int docID) {
+		return LuceneDocument.toTable(rawDoc(docID));
+	}
+
+	public TopDocs search(Query query,int n) {
+		try {
+			return searcher.search(query,n);
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public TopFieldDocs search(Query query,int n,Sort sort) {
+		try {
+			return searcher.search(query,n,sort);
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public Iterable<LuanTable> docs(TopDocs td) {
+		final ScoreDoc[] scoreDocs = td.scoreDocs;
+		return new Iterable<LuanTable>() {
+			public Iterator<LuanTable> iterator() {
+				return new Iterator<LuanTable>() {
+					private int i = 0;
+
+					public boolean hasNext() {
+						return i < scoreDocs.length;
+					}
+
+					public LuanTable next() {
+						if( !hasNext() )
+							throw new NoSuchElementException();
+						return doc(scoreDocs[i++].doc);
+					}
+
+					public void remove() {
+						throw new UnsupportedOperationException();
+					}
+				};
+			}
+		};
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/LuceneSnapshot.java	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,38 @@
+package luan.modules.lucene;
+
+import java.io.IOException;
+import java.util.Collection;
+import org.apache.lucene.index.IndexCommit;
+
+
+public final class LuceneSnapshot {
+	private final LuceneIndex index;
+	private final IndexCommit ic;
+
+	LuceneSnapshot(LuceneIndex index) {
+		this.index = index;
+		try {
+			this.ic = index.snapshotDeletionPolicy.snapshot();
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	// call in finally block
+	public void close() {
+		try {
+			index.snapshotDeletionPolicy.release(ic);
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public Collection<String> getFileNames() {
+		try {
+			return ic.getFileNames();
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/lucene/src/luan/modules/lucene/LuceneWriter.java	Tue Sep 30 20:03:56 2014 +0000
@@ -0,0 +1,93 @@
+package luan.modules.lucene;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.Set;
+import java.util.List;
+import java.util.ArrayList;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import luan.Luan;
+import luan.LuanState;
+import luan.LuanTable;
+import luan.LuanJavaFunction;
+import luan.LuanException;
+
+
+public final class LuceneWriter {
+	public static final String FLD_TYPE = "type index";
+	public static final String FLD_ID = "id index";
+
+	private final LuceneIndex index;
+
+	LuceneWriter(LuceneIndex index) {
+		index.writeLock.lock();
+		this.index = index;
+	}
+
+	// call in finally block
+	void close() {
+		index.writeLock.unlock();
+	}
+
+	void commit() throws IOException {
+		index.writer.commit();
+	}
+
+	void addDocument(LuanTable doc) throws IOException {
+		index.writer.addDocument(LuceneDocument.toLucene(doc));
+	}
+
+	void updateDocument(Term term,LuanTable doc) throws IOException {
+		index.writer.updateDocument(term,LuceneDocument.toLucene(doc));
+	}
+
+	public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
+		List<Term> list = new ArrayList<Term>();
+		for( Map.Entry<Object,Object> entry : tblTerms ) {
+			Object key = entry.getKey();
+			Object value = entry.getValue();
+			if( !(key instanceof String) )
+				throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
+			if( !(value instanceof String) )
+				throw luan.exception("value must be a string but got "+value.getClass().getSimpleName());
+			list.add( new Term( (String)key, (String)value ) );
+		}
+		index.writer.deleteDocuments(list.toArray(new Term[list.size()]));
+	}
+
+	String nextId() {
+		return index.nextId();
+	}
+
+	public void save_document(LuanTable doc) throws IOException {
+		if( doc.get(FLD_TYPE)==null )
+			throw new RuntimeException("missing '"+FLD_TYPE+"'");
+		String id = (String)doc.get(FLD_ID);
+		if( id == null ) {
+			id = nextId();
+			doc.put(FLD_ID,id);
+			addDocument(doc);
+		} else {
+			updateDocument(new Term(FLD_ID,id),doc);
+		}
+	}
+
+	// luan
+
+	private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException {
+		t.put( method, new LuanJavaFunction(LuceneWriter.class.getMethod(method,parameterTypes),this) );
+	}
+
+	LuanTable table() {
+		LuanTable tbl = Luan.newTable();
+		try {
+			add( tbl, "save_document", LuanTable.class );
+			add( tbl, "delete_documents", LuanState.class, LuanTable.class );
+		} catch(NoSuchMethodException e) {
+			throw new RuntimeException(e);
+		}
+		return tbl;
+	}
+
+}