comparison lucene/src/luan/modules/lucene/LuceneIndex.java @ 546:eaef1005ab87

general lucene cleanup
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 14 Jun 2015 22:17:58 -0600
parents ddcd4296107a
children 0be287ab0309
comparison
equal deleted inserted replaced
545:ddcd4296107a 546:eaef1005ab87
4 import java.io.File; 4 import java.io.File;
5 import java.io.FileOutputStream; 5 import java.io.FileOutputStream;
6 import java.io.FileInputStream; 6 import java.io.FileInputStream;
7 import java.io.IOException; 7 import java.io.IOException;
8 import java.util.Iterator; 8 import java.util.Iterator;
9 import java.util.Map;
10 import java.util.List;
11 import java.util.ArrayList;
12 import java.util.Set;
9 import java.util.concurrent.locks.Lock; 13 import java.util.concurrent.locks.Lock;
10 import java.util.concurrent.locks.ReentrantLock; 14 import java.util.concurrent.locks.ReentrantLock;
11 import java.util.zip.ZipOutputStream; 15 import java.util.zip.ZipOutputStream;
12 import java.util.zip.ZipEntry; 16 import java.util.zip.ZipEntry;
13 import org.apache.lucene.analysis.Analyzer; 17 import org.apache.lucene.analysis.Analyzer;
14 import org.apache.lucene.analysis.core.KeywordAnalyzer; 18 import org.apache.lucene.analysis.core.KeywordAnalyzer;
15 import org.apache.lucene.document.Document; 19 import org.apache.lucene.document.Document;
20 import org.apache.lucene.document.Field;
21 import org.apache.lucene.document.StoredField;
22 import org.apache.lucene.document.StringField;
23 import org.apache.lucene.document.IntField;
24 import org.apache.lucene.document.LongField;
25 import org.apache.lucene.document.DoubleField;
26 import org.apache.lucene.index.IndexableField;
16 import org.apache.lucene.index.IndexWriter; 27 import org.apache.lucene.index.IndexWriter;
17 import org.apache.lucene.index.IndexWriterConfig; 28 import org.apache.lucene.index.IndexWriterConfig;
18 import org.apache.lucene.index.DirectoryReader; 29 import org.apache.lucene.index.DirectoryReader;
19 import org.apache.lucene.index.Term; 30 import org.apache.lucene.index.Term;
20 import org.apache.lucene.index.SnapshotDeletionPolicy; 31 import org.apache.lucene.index.SnapshotDeletionPolicy;
32 import org.apache.lucene.index.IndexCommit;
21 import org.apache.lucene.index.AtomicReaderContext; 33 import org.apache.lucene.index.AtomicReaderContext;
22 import org.apache.lucene.store.Directory; 34 import org.apache.lucene.store.Directory;
23 import org.apache.lucene.store.FSDirectory; 35 import org.apache.lucene.store.FSDirectory;
24 import org.apache.lucene.util.Version; 36 import org.apache.lucene.util.Version;
37 import org.apache.lucene.util.BytesRef;
38 import org.apache.lucene.util.NumericUtils;
25 import org.apache.lucene.search.Query; 39 import org.apache.lucene.search.Query;
26 import org.apache.lucene.search.TermQuery; 40 import org.apache.lucene.search.TermQuery;
27 import org.apache.lucene.search.TopDocs; 41 import org.apache.lucene.search.TopDocs;
28 import org.apache.lucene.search.Sort; 42 import org.apache.lucene.search.Sort;
29 import org.apache.lucene.search.SortField; 43 import org.apache.lucene.search.SortField;
56 70
57 private static final String FLD_NEXT_ID = "nextId"; 71 private static final String FLD_NEXT_ID = "nextId";
58 private static final Analyzer analyzer = new KeywordAnalyzer(); 72 private static final Analyzer analyzer = new KeywordAnalyzer();
59 public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer); 73 public static final FieldParser STRING_FIELD_PARSER = new StringFieldParser(analyzer);
60 74
61 final LuanTable myTable; 75 private final ReentrantLock writeLock = new ReentrantLock();
62 final Lock writeLock = new ReentrantLock();
63 private final File indexDir; 76 private final File indexDir;
64 final SnapshotDeletionPolicy snapshotDeletionPolicy; 77 final SnapshotDeletionPolicy snapshotDeletionPolicy;
65 final IndexWriter writer; 78 private final IndexWriter writer;
66 private DirectoryReader reader; 79 private DirectoryReader reader;
67 private IndexSearcher searcher; 80 private IndexSearcher searcher;
68 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); 81 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
69 private boolean isClosed = false; 82 private boolean isClosed = false;
70 private final MultiFieldParser mfp = new MultiFieldParser(); 83 private final MultiFieldParser mfp = new MultiFieldParser();
71 84
72 public LuceneIndex(LuanState luan,String indexDirStr,LuanTable myTable) throws LuanException, IOException { 85 public LuceneIndex(LuanState luan,String indexDirStr) throws LuanException, IOException {
73 this.myTable = myTable;
74 mfp.fields.put( "type", STRING_FIELD_PARSER ); 86 mfp.fields.put( "type", STRING_FIELD_PARSER );
75 mfp.fields.put( "id", NumberFieldParser.LONG ); 87 mfp.fields.put( "id", NumberFieldParser.LONG );
76 File indexDir = new File(indexDirStr); 88 File indexDir = new File(indexDirStr);
77 this.indexDir = indexDir; 89 this.indexDir = indexDir;
78 Directory dir = FSDirectory.open(indexDir); 90 Directory dir = FSDirectory.open(indexDir);
86 luan.onClose(this); 98 luan.onClose(this);
87 searcher = new IndexSearcher(reader); 99 searcher = new IndexSearcher(reader);
88 initId(luan); 100 initId(luan);
89 } 101 }
90 102
91 Document toLucene(LuanState luan,LuanTable table) throws LuanException { 103
92 return LuceneDocument.toLucene(luan,table,mfp.fields.keySet());
93 }
94
95 public LuceneWriter openWriter() {
96 return new LuceneWriter(this);
97 }
98
99 private synchronized IndexSearcher openSearcher() throws IOException {
100 DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
101 if( newReader != null ) {
102 reader.decRef();
103 reader = newReader;
104 searcher = new IndexSearcher(reader);
105 }
106 reader.incRef();
107 return searcher;
108 }
109
110 // call in finally block
111 private static void close(IndexSearcher searcher) throws IOException {
112 searcher.getIndexReader().decRef();
113 }
114
115 LuceneSnapshot openSnapshot() throws IOException {
116 return new LuceneSnapshot(this);
117 }
118 104
119 public void delete_all() throws IOException { 105 public void delete_all() throws IOException {
106 boolean commit = !writeLock.isHeldByCurrentThread();
120 writeLock.lock(); 107 writeLock.lock();
121 try { 108 try {
122 writer.deleteAll(); 109 writer.deleteAll();
123 writer.commit();
124 id = idLim = 0; 110 id = idLim = 0;
111 if(commit) writer.commit();
112 } finally {
113 writeLock.unlock();
114 }
115 }
116
117 private static Term term(String key,int value) {
118 BytesRef br = new BytesRef();
119 NumericUtils.intToPrefixCoded(value,0,br);
120 return new Term(key,br);
121 }
122
123 private static Term term(String key,long value) {
124 BytesRef br = new BytesRef();
125 NumericUtils.longToPrefixCoded(value,0,br);
126 return new Term(key,br);
127 }
128
129 private static Term term(LuanState luan,String key,Object value) throws LuanException {
130 if( value instanceof String )
131 return new Term( key, (String)value );
132 if( value instanceof Integer )
133 return term( key, (Integer)value );
134 if( value instanceof Long )
135 return term( key, (Long)value );
136 if( value instanceof Float )
137 return term( key, NumericUtils.floatToSortableInt((Float)value) );
138 if( value instanceof Double )
139 return term( key, NumericUtils.doubleToSortableLong((Double)value) );
140 throw luan.exception("invalid value type '"+value.getClass().getSimpleName()+"' for key '"+key+"'");
141 }
142
143 public void delete_documents(LuanState luan,LuanTable tblTerms) throws LuanException, IOException {
144 List<Term> list = new ArrayList<Term>();
145 for( Map.Entry<Object,Object> entry : tblTerms.iterable(luan) ) {
146 Object key = entry.getKey();
147 Object value = entry.getValue();
148 if( !(key instanceof String) )
149 throw luan.exception("key must be a string but got "+key.getClass().getSimpleName());
150 list.add( term( luan, (String)key, value ) );
151 }
152
153 boolean commit = !writeLock.isHeldByCurrentThread();
154 writeLock.lock();
155 try {
156 writer.deleteDocuments(list.toArray(new Term[list.size()]));
157 if(commit) writer.commit();
158 } finally {
159 writeLock.unlock();
160 }
161 }
162
163 public void save_document(LuanState luan,LuanTable doc) throws LuanException, IOException {
164 if( doc.get(luan,"type")==null )
165 throw luan.exception("missing 'type' field");
166 Long id = (Long)doc.get(luan,"id");
167
168 boolean commit = !writeLock.isHeldByCurrentThread();
169 writeLock.lock();
170 try {
171 if( id == null ) {
172 id = nextId(luan);
173 doc.put(luan,"id",id);
174 writer.addDocument(toLucene(luan,doc));
175 } else {
176 writer.updateDocument( term("id",id), toLucene(luan,doc) );
177 }
178 if(commit) writer.commit();
179 } finally {
180 writeLock.unlock();
181 }
182 }
183
184 public void update_in_transaction(LuanState luan,LuanFunction fn) throws IOException, LuanException {
185 boolean commit = !writeLock.isHeldByCurrentThread();
186 writeLock.lock();
187 try {
188 luan.call(fn);
189 if(commit) writer.commit();
125 } finally { 190 } finally {
126 writeLock.unlock(); 191 writeLock.unlock();
127 } 192 }
128 } 193 }
129 194
144 default: 209 default:
145 throw new RuntimeException(); 210 throw new RuntimeException();
146 } 211 }
147 } 212 }
148 213
149 synchronized long nextId(LuanState luan) throws LuanException, IOException { 214 private synchronized long nextId(LuanState luan) throws LuanException, IOException {
150 if( ++id > idLim ) { 215 if( ++id > idLim ) {
151 idLim += idBatch; 216 idLim += idBatch;
152 LuanTable doc = new LuanTable(); 217 LuanTable doc = new LuanTable();
153 doc.rawPut( "type", "next_id" ); 218 doc.rawPut( "type", "next_id" );
154 doc.rawPut( FLD_NEXT_ID, idLim ); 219 doc.rawPut( FLD_NEXT_ID, idLim );
159 224
160 225
161 public void backup(LuanState luan,String zipFile) throws LuanException, IOException { 226 public void backup(LuanState luan,String zipFile) throws LuanException, IOException {
162 if( !zipFile.endsWith(".zip") ) 227 if( !zipFile.endsWith(".zip") )
163 throw luan.exception("file "+zipFile+" doesn't end with '.zip'"); 228 throw luan.exception("file "+zipFile+" doesn't end with '.zip'");
164 LuceneSnapshot snapshot = openSnapshot(); 229 IndexCommit ic = snapshotDeletionPolicy.snapshot();
165 try { 230 try {
166 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile)); 231 ZipOutputStream out = new ZipOutputStream(new FileOutputStream(zipFile));
167 for( String fileName : snapshot.getFileNames() ) { 232 for( String fileName : ic.getFileNames() ) {
168 out.putNextEntry(new ZipEntry(fileName)); 233 out.putNextEntry(new ZipEntry(fileName));
169 FileInputStream in = new FileInputStream(new File(indexDir,fileName)); 234 FileInputStream in = new FileInputStream(new File(indexDir,fileName));
170 Utils.copyAll(in,out); 235 Utils.copyAll(in,out);
171 in.close(); 236 in.close();
172 out.closeEntry(); 237 out.closeEntry();
173 } 238 }
174 out.close(); 239 out.close();
175 } finally { 240 } finally {
176 snapshot.close(); 241 snapshotDeletionPolicy.release(ic);
177 } 242 }
178 } 243 }
179 244
180 245
181
182 // luan
183 246
184 public String to_string() { 247 public String to_string() {
185 return writer.getDirectory().toString(); 248 return writer.getDirectory().toString();
186 }
187
188 public void writer(LuanState luan,LuanFunction fn) throws LuanException, IOException {
189 LuceneWriter writer = openWriter();
190 try {
191 luan.call( fn, new Object[]{writer.table()} );
192 writer.commit();
193 } finally {
194 writer.close();
195 }
196 } 249 }
197 250
198 public void close() throws IOException { 251 public void close() throws IOException {
199 if( !isClosed ) { 252 if( !isClosed ) {
200 writer.close(); 253 writer.close();
221 this.searcher = searcher; 274 this.searcher = searcher;
222 } 275 }
223 276
224 @Override public Object call(LuanState luan,Object[] args) throws LuanException { 277 @Override public Object call(LuanState luan,Object[] args) throws LuanException {
225 try { 278 try {
226 return LuceneDocument.toTable(luan,searcher.doc(docID)); 279 return toTable(luan,searcher.doc(docID));
227 } catch(IOException e) { 280 } catch(IOException e) {
228 throw luan.exception(e); 281 throw luan.exception(e);
229 } 282 }
230 } 283 }
231 } 284 }
241 @Override public boolean acceptsDocsOutOfOrder() { 294 @Override public boolean acceptsDocsOutOfOrder() {
242 return true; 295 return true;
243 } 296 }
244 } 297 }
245 298
299 private synchronized IndexSearcher openSearcher() throws IOException {
300 DirectoryReader newReader = DirectoryReader.openIfChanged(reader);
301 if( newReader != null ) {
302 reader.decRef();
303 reader = newReader;
304 searcher = new IndexSearcher(reader);
305 }
306 reader.incRef();
307 return searcher;
308 }
309
310 // call in finally block
311 private static void close(IndexSearcher searcher) throws IOException {
312 searcher.getIndexReader().decRef();
313 }
314
246 public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { 315 public int advanced_search( final LuanState luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException {
247 Utils.checkNotNull(luan,queryStr); 316 Utils.checkNotNull(luan,queryStr);
248 Query query = parseQuery(queryStr); 317 Query query = SaneQueryParser.parseQuery(mfp,queryStr);
249 IndexSearcher searcher = threadLocalSearcher.get(); 318 IndexSearcher searcher = threadLocalSearcher.get();
250 boolean inTransaction = searcher != null; 319 boolean inTransaction = searcher != null;
251 if( !inTransaction ) 320 if( !inTransaction )
252 searcher = openSearcher(); 321 searcher = openSearcher();
253 try { 322 try {
275 if( fn==null || n==0 ) { 344 if( fn==null || n==0 ) {
276 TotalHitCountCollector thcc = new TotalHitCountCollector(); 345 TotalHitCountCollector thcc = new TotalHitCountCollector();
277 searcher.search(query,thcc); 346 searcher.search(query,thcc);
278 return thcc.getTotalHits(); 347 return thcc.getTotalHits();
279 } 348 }
280 Sort sort = sortStr==null ? null : parseSort(sortStr); 349 Sort sort = sortStr==null ? null : SaneQueryParser.parseSort(mfp,sortStr);
281 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort); 350 TopDocs td = sort==null ? searcher.search(query,n) : searcher.search(query,n,sort);
282 final ScoreDoc[] scoreDocs = td.scoreDocs; 351 final ScoreDoc[] scoreDocs = td.scoreDocs;
283 DocFn docFn = new DocFn(searcher); 352 DocFn docFn = new DocFn(searcher);
284 for( int i=0; i<scoreDocs.length; i++ ) { 353 for( int i=0; i<scoreDocs.length; i++ ) {
285 docFn.docID = scoreDocs[i].doc; 354 docFn.docID = scoreDocs[i].doc;
339 return "lucene-indexed-fields"; 408 return "lucene-indexed-fields";
340 } 409 }
341 410
342 }; 411 };
343 412
344 public Query parseQuery(String s) throws ParseException { 413
345 return SaneQueryParser.parseQuery(mfp,s); 414
346 } 415
347 416 private Document toLucene(LuanState luan,LuanTable table) throws LuanException {
348 public Sort parseSort(String s) throws ParseException { 417 Set<String> indexed = mfp.fields.keySet();
349 return SaneQueryParser.parseSort(mfp,s); 418 Document doc = new Document();
419 for( Map.Entry<Object,Object> entry : table.iterable(luan) ) {
420 Object key = entry.getKey();
421 if( !(key instanceof String) )
422 throw luan.exception("key must be string");
423 String name = (String)key;
424 Object value = entry.getValue();
425 if( value instanceof String ) {
426 String s = (String)value;
427 if( indexed.contains(name) ) {
428 doc.add(new StringField(name, s, Field.Store.YES));
429 } else {
430 doc.add(new StoredField(name, s));
431 }
432 } else if( value instanceof Integer ) {
433 int i = (Integer)value;
434 if( indexed.contains(name) ) {
435 doc.add(new IntField(name, i, Field.Store.YES));
436 } else {
437 doc.add(new StoredField(name, i));
438 }
439 } else if( value instanceof Long ) {
440 long i = (Long)value;
441 if( indexed.contains(name) ) {
442 doc.add(new LongField(name, i, Field.Store.YES));
443 } else {
444 doc.add(new StoredField(name, i));
445 }
446 } else if( value instanceof Double ) {
447 double i = (Double)value;
448 if( indexed.contains(name) ) {
449 doc.add(new DoubleField(name, i, Field.Store.YES));
450 } else {
451 doc.add(new StoredField(name, i));
452 }
453 } else if( value instanceof byte[] ) {
454 byte[] b = (byte[])value;
455 doc.add(new StoredField(name, b));
456 } else
457 throw luan.exception("invalid value type "+value.getClass()+"' for '"+name+"'");
458 }
459 return doc;
460 }
461
462 private static LuanTable toTable(LuanState luan,Document doc) throws LuanException {
463 if( doc==null )
464 return null;
465 LuanTable table = new LuanTable();
466 for( IndexableField ifld : doc ) {
467 String name = ifld.name();
468 BytesRef br = ifld.binaryValue();
469 if( br != null ) {
470 table.rawPut(name,br.bytes);
471 continue;
472 }
473 Number n = ifld.numericValue();
474 if( n != null ) {
475 table.rawPut(name,n);
476 continue;
477 }
478 String s = ifld.stringValue();
479 if( s != null ) {
480 table.rawPut(name,s);
481 continue;
482 }
483 throw luan.exception("invalid field type for "+ifld);
484 }
485 return table;
350 } 486 }
351 487
352 } 488 }