Mercurial Hosting > luan
changeset 230:4438cb2e04d0
start lucene
git-svn-id: https://luan-java.googlecode.com/svn/trunk@231 21e917c8-12df-6dd8-5cb6-c86387c605b9
author | fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9> |
---|---|
date | Tue, 30 Sep 2014 20:03:56 +0000 |
parents | 2a54cb7d1cf4 |
children | a35417bf493a |
files | lucene/ext/lucene-analyzers-common-4.9.0.jar lucene/ext/lucene-core-4.9.0.jar lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneDocument.java lucene/src/luan/modules/lucene/LuceneIndex.java lucene/src/luan/modules/lucene/LuceneSearcher.java lucene/src/luan/modules/lucene/LuceneSnapshot.java lucene/src/luan/modules/lucene/LuceneWriter.java |
diffstat | 8 files changed, 537 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/ext/lucene-analyzers-common-4.9.0.jar Binary file lucene/ext/lucene-analyzers-common-4.9.0.jar has changed
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/ext/lucene-core-4.9.0.jar Binary file lucene/ext/lucene-core-4.9.0.jar has changed
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/Lucene.luan --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/Lucene.luan Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,25 @@ +import "Java" +import "luan.modules.lucene.LuceneIndex" + +standard_fields = { + "type" = "type index"; + "id" = "id index"; +} + +function Index(indexDir) + local index LuceneIndex.new(indexDir).table() + + function index.save_document(doc) + index.Writer( function(writer) + writer.save_document(doc) + end ) + end + + function index.delete_documents(terms) + index.Writer( function(writer) + writer.delete_documents(terms) + end ) + end + + return index +end
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/LuceneDocument.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/LuceneDocument.java Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,98 @@ +package luan.modules.lucene; + +import java.util.Map; +import java.util.HashMap; +import java.util.Set; +import java.util.HashSet; +import java.util.Arrays; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.util.BytesRef; +import luan.Luan; +import luan.LuanTable; + + +public class LuceneDocument { + // I assume there will be more flags later + public static final String INDEX = "index"; + + private LuceneDocument(String a) {} // never + + static Document toLucene(LuanTable table) { + Document doc = new Document(); + for( Map.Entry<Object,Object> entry : table ) { + Object key = entry.getKey(); + if( !(key instanceof String) ) + throw new IllegalArgumentException("key must be string"); + String name = (String)key; + Object value = entry.getValue(); + if( value == null ) + continue; + Set<String> flags = new HashSet<String>(); + String[] a = name.split(" +"); + for( int i=1; i<a.length; i++ ) { + flags.add(a[i]); + } + if( value instanceof String ) { + String s = (String)value; + if( flags.remove(INDEX) ) { + doc.add(new StringField(name, s, Field.Store.YES)); + } else { + doc.add(new StoredField(name, s)); + } + } else if( value instanceof Integer ) { + int i = (Integer)value; + if( flags.remove(INDEX) ) { + doc.add(new IntField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof Long ) { + long i = (Long)value; + if( flags.remove(INDEX) ) { + doc.add(new LongField(name, i, Field.Store.YES)); + } else { + doc.add(new StoredField(name, i)); + } + } else if( value instanceof byte[] ) { + byte[] b = (byte[])value; + doc.add(new StoredField(name, b)); + } else + throw new IllegalArgumentException("invalid value type "+value.getClass()+"' for '"+name+"'"); + if( !flags.isEmpty() ) + throw new IllegalArgumentException("invalid flags "+flags+" in '"+name+"'"); + } + return doc; + } + + static LuanTable toTable(Document doc) { + if( doc==null ) + return null; + LuanTable table = Luan.newTable(); + for( IndexableField ifld : doc ) { + String name = ifld.name(); + BytesRef br = ifld.binaryValue(); + if( br != null ) { + table.put(name,br.bytes); + continue; + } + Number n = ifld.numericValue(); + if( n != null ) { + table.put(name,n); + continue; + } + String s = ifld.stringValue(); + if( s != null ) { + table.put(name,s); + continue; + } + throw new RuntimeException("invalid field type for "+ifld); + } + return table; + } +}
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/LuceneIndex.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/LuceneIndex.java Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,198 @@ +package luan.modules.lucene; + +import java.io.File; +import java.io.FileOutputStream; +import java.io.FileInputStream; +import java.io.IOException; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.zip.ZipOutputStream; +import java.util.zip.ZipEntry; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.standard.StandardAnalyzer; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.util.Version; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import luan.modules.Utils; +import luan.Luan; +import luan.LuanState; +import luan.LuanTable; +import luan.LuanFunction; +import luan.LuanJavaFunction; +import luan.LuanException; + + +public final class LuceneIndex { + private static final String FLD_TYPE = LuceneWriter.FLD_TYPE; + private static final String FLD_NEXT_ID = "nextId"; + + final Lock writeLock = new ReentrantLock(); + private final File indexDir; + final SnapshotDeletionPolicy snapshotDeletionPolicy; + final IndexWriter writer; + private DirectoryReader reader; + private LuceneSearcher searcher; + + public LuceneIndex(String indexDirStr) { + try { + File indexDir = new File(indexDirStr); + this.indexDir = indexDir; + Directory dir = FSDirectory.open(indexDir); + Version version = Version.LUCENE_4_9; + Analyzer analyzer = new StandardAnalyzer(version); + IndexWriterConfig conf = new IndexWriterConfig(version,analyzer); + snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); + conf.setIndexDeletionPolicy(snapshotDeletionPolicy); + writer = new IndexWriter(dir,conf); + writer.commit(); // commit index creation + reader = DirectoryReader.open(dir); + searcher = new LuceneSearcher(reader); + initId(); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public LuceneWriter openWriter() { + return new LuceneWriter(this); + } + + public synchronized LuceneSearcher openSearcher() { + try { + DirectoryReader newReader = DirectoryReader.openIfChanged(reader); + if( newReader != null ) { + reader.decRef(); + reader = newReader; + searcher = new LuceneSearcher(reader); + } + reader.incRef(); + return searcher; + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public LuceneSnapshot openSnapshot() { + return new LuceneSnapshot(this); + } + + + private long id = 0; + private long idLim = 0; + private final int idBatch = 10; + + private void initId() { + TopDocs td = searcher.search(new TermQuery(new Term(FLD_TYPE,"next_id")),1); + switch(td.totalHits) { + case 0: + break; // do nothing + case 1: + LuanTable doc = searcher.doc(td.scoreDocs[0].doc); + idLim = (Long)doc.get(FLD_NEXT_ID); + id = idLim; + break; + default: + throw new RuntimeException(); + } + } + + synchronized String nextId() { + try { + String rtn = Long.toString(++id); + if( id > idLim ) { + idLim += idBatch; + LuanTable doc = Luan.newTable(); + doc.put( FLD_TYPE, "next_id" ); + doc.put( FLD_NEXT_ID, idLim ); + writer.updateDocument(new Term(FLD_TYPE,"next_id"),LuceneDocument.toLucene(doc)); + } + return rtn; + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public LuanTable getDocument(String id) { + return getDocument(new Term(LuceneWriter.FLD_ID,id)); + } + + public LuanTable getDocument(Term term) { + LuceneSearcher searcher = openSearcher(); + try { + TopDocs td = searcher.search(new TermQuery(term),1); + switch(td.totalHits) { + case 0: + return null; + case 1: + return searcher.doc(td.scoreDocs[0].doc); + default: + throw new RuntimeException(); + } + } finally { + searcher.close(); + } + } + + + public void backup(String zipFile) { + if( !zipFile.endsWith(".zip") ) + throw new RuntimeException("file "+zipFile+" doesn't end with '.zip'"); + LuceneSnapshot snapshot = openSnapshot(); + try { + ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); + for( String fileName : snapshot.getFileNames() ) { + out.putNextEntry(new ZipEntry(fileName)); + FileInputStream in = new FileInputStream(new File(indexDir,fileName)); + Utils.copyAll(in,out); + in.close(); + out.closeEntry(); + } + out.close(); + } catch(IOException e) { + throw new RuntimeException(e); + } finally { + snapshot.close(); + } + } + + + + // luan + + public String to_string() { + return writer.getDirectory().toString(); + } + + public void Writer(LuanState luan,LuanFunction fn) throws LuanException, IOException { + LuceneWriter writer = openWriter(); + try { + luan.call( fn, new Object[]{writer.table()} ); + writer.commit(); + } finally { + writer.close(); + } + } + + private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException { + t.put( method, new LuanJavaFunction(LuceneIndex.class.getMethod(method,parameterTypes),this) ); + } + + public LuanTable table() { + LuanTable tbl = Luan.newTable(); + try { + add( tbl, "to_string" ); + add( tbl, "backup", String.class ); + } catch(NoSuchMethodException e) { + throw new RuntimeException(e); + } + return tbl; + } + +}
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/LuceneSearcher.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/LuceneSearcher.java Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,85 @@ +package luan.modules.lucene; + +import java.io.IOException; +import java.util.Iterator; +import java.util.NoSuchElementException; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.document.Document; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.TopFieldDocs; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.ScoreDoc; +import luan.LuanTable; + + +public final class LuceneSearcher { + private final IndexSearcher searcher; + + LuceneSearcher(IndexReader reader) { + this.searcher = new IndexSearcher(reader); + } + + // call in finally block + public void close() { + try { + searcher.getIndexReader().decRef(); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + private Document rawDoc(int docID) { + try { + return searcher.doc(docID); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public LuanTable doc(int docID) { + return LuceneDocument.toTable(rawDoc(docID)); + } + + public TopDocs search(Query query,int n) { + try { + return searcher.search(query,n); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public TopFieldDocs search(Query query,int n,Sort sort) { + try { + return searcher.search(query,n,sort); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public Iterable<LuanTable> docs(TopDocs td) { + final ScoreDoc[] scoreDocs = td.scoreDocs; + return new Iterable<LuanTable>() { + public Iterator<LuanTable> iterator() { + return new Iterator<LuanTable>() { + private int i = 0; + + public boolean hasNext() { + return i < scoreDocs.length; + } + + public LuanTable next() { + if( !hasNext() ) + throw new NoSuchElementException(); + return doc(scoreDocs[i++].doc); + } + + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + }; + } +}
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/LuceneSnapshot.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/LuceneSnapshot.java Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,38 @@ +package luan.modules.lucene; + +import java.io.IOException; +import java.util.Collection; +import org.apache.lucene.index.IndexCommit; + + +public final class LuceneSnapshot { + private final LuceneIndex index; + private final IndexCommit ic; + + LuceneSnapshot(LuceneIndex index) { + this.index = index; + try { + this.ic = index.snapshotDeletionPolicy.snapshot(); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + // call in finally block + public void close() { + try { + index.snapshotDeletionPolicy.release(ic); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public Collection<String> getFileNames() { + try { + return ic.getFileNames(); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + +}
diff -r 2a54cb7d1cf4 -r 4438cb2e04d0 lucene/src/luan/modules/lucene/LuceneWriter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lucene/src/luan/modules/lucene/LuceneWriter.java Tue Sep 30 20:03:56 2014 +0000 @@ -0,0 +1,93 @@ +package luan.modules.lucene; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; +import java.util.List; +import java.util.ArrayList; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import luan.Luan; +import luan.LuanState; +import luan.LuanTable; +import luan.LuanJavaFunction; +import luan.LuanException; + + +public final class LuceneWriter { + public static final String FLD_TYPE = "type index"; + public static final String FLD_ID = "id index"; + + private final LuceneIndex index; + + LuceneWriter(LuceneIndex index) { + index.writeLock.lock(); + this.index = index; + } + + // call in finally block + void close() { + index.writeLock.unlock(); + } + + void commit() throws IOException { + index.writer.commit(); + } + + void addDocument(LuanTable doc) throws IOException { + index.writer.addDocument(LuceneDocument.toLucene(doc)); + } + + void updateDocument(Term term,LuanTable doc) throws IOException { + index.writer.updateDocument(term,LuceneDocument.toLucene(doc)); + } + + public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException { + List<Term> list = new ArrayList<Term>(); + for( Map.Entry<Object,Object> entry : tblTerms ) { + Object key = entry.getKey(); + Object value = entry.getValue(); + if( !(key instanceof String) ) + throw luan.exception("key must be a string but got "+key.getClass().getSimpleName()); + if( !(value instanceof String) ) + throw luan.exception("value must be a string but got "+value.getClass().getSimpleName()); + list.add( new Term( (String)key, (String)value ) ); + } + index.writer.deleteDocuments(list.toArray(new Term[list.size()])); + } + + String nextId() { + return index.nextId(); + } + + public void save_document(LuanTable doc) throws IOException { + if( doc.get(FLD_TYPE)==null ) + throw new RuntimeException("missing '"+FLD_TYPE+"'"); + String id = (String)doc.get(FLD_ID); + if( id == null ) { + id = nextId(); + doc.put(FLD_ID,id); + addDocument(doc); + } else { + updateDocument(new Term(FLD_ID,id),doc); + } + } + + // luan + + private void add(LuanTable t,String method,Class<?>... parameterTypes) throws NoSuchMethodException { + t.put( method, new LuanJavaFunction(LuceneWriter.class.getMethod(method,parameterTypes),this) ); + } + + LuanTable table() { + LuanTable tbl = Luan.newTable(); + try { + add( tbl, "save_document", LuanTable.class ); + add( tbl, "delete_documents", LuanState.class, LuanTable.class ); + } catch(NoSuchMethodException e) { + throw new RuntimeException(e); + } + return tbl; + } + +}