comparison src/luan/modules/lucene/LuceneIndex.java @ 1341:a015a0b5c388

add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 19 Feb 2019 08:14:40 -0700
parents 8b61c8c4e07a
children 60599adc27b8
comparison
equal deleted inserted replaced
1340:b3c4fcf29a53 1341:a015a0b5c388
18 import java.util.concurrent.locks.Lock; 18 import java.util.concurrent.locks.Lock;
19 import java.util.concurrent.locks.ReentrantLock; 19 import java.util.concurrent.locks.ReentrantLock;
20 import java.util.zip.ZipOutputStream; 20 import java.util.zip.ZipOutputStream;
21 import java.util.zip.ZipEntry; 21 import java.util.zip.ZipEntry;
22 import org.apache.lucene.analysis.Analyzer; 22 import org.apache.lucene.analysis.Analyzer;
23 import org.apache.lucene.analysis.TokenStream;
23 import org.apache.lucene.analysis.core.KeywordAnalyzer; 24 import org.apache.lucene.analysis.core.KeywordAnalyzer;
24 import org.apache.lucene.document.Document; 25 import org.apache.lucene.document.Document;
25 import org.apache.lucene.document.Field; 26 import org.apache.lucene.document.Field;
26 import org.apache.lucene.document.StoredField; 27 import org.apache.lucene.document.StoredField;
27 import org.apache.lucene.document.StringField; 28 import org.apache.lucene.document.StringField;
164 BytesRef br = new BytesRef(); 165 BytesRef br = new BytesRef();
165 NumericUtils.longToPrefixCoded(value,0,br); 166 NumericUtils.longToPrefixCoded(value,0,br);
166 return new Term(key,br); 167 return new Term(key,br);
167 } 168 }
168 169
169 public void delete(Luan luan,String queryStr) throws LuanException, IOException, ParseException { 170 public void delete(Luan luan,String queryStr)
171 throws LuanException, IOException, ParseException
172 {
170 Query query = SaneQueryParser.parseQuery(mfp,queryStr); 173 Query query = SaneQueryParser.parseQuery(mfp,queryStr);
171 174
172 boolean commit = !writeLock.isHeldByCurrentThread(); 175 boolean commit = !writeLock.isHeldByCurrentThread();
173 writeLock.lock(); 176 writeLock.lock();
174 try { 177 try {
185 indexedOnlyFields.putIfAbsent(type,new ConcurrentHashMap<String,LuanFunction>()); 188 indexedOnlyFields.putIfAbsent(type,new ConcurrentHashMap<String,LuanFunction>());
186 Map<String,LuanFunction> map = indexedOnlyFields.get(type); 189 Map<String,LuanFunction> map = indexedOnlyFields.get(type);
187 map.put(field,fn); 190 map.put(field,fn);
188 } 191 }
189 192
190 public void save(Luan luan,LuanTable doc) throws LuanException, IOException { 193 public void save(Luan luan,LuanTable doc,LuanTable boosts)
194 throws LuanException, IOException
195 {
191 Set indexedOnlySet = new HashSet(); 196 Set indexedOnlySet = new HashSet();
192 Object typeObj = doc.get("type"); 197 Object typeObj = doc.get("type");
193 if( typeObj==null ) 198 if( typeObj==null )
194 throw new LuanException("missing 'type' field"); 199 throw new LuanException("missing 'type' field");
195 if( !(typeObj instanceof String) ) 200 if( !(typeObj instanceof String) )
217 writeLock.lock(); 222 writeLock.lock();
218 try { 223 try {
219 if( id == null ) { 224 if( id == null ) {
220 id = nextId(luan); 225 id = nextId(luan);
221 doc.put("id",id); 226 doc.put("id",id);
222 writer.addDocument(toLucene(doc,indexedOnlySet)); 227 writer.addDocument(toLucene(doc,indexedOnlySet,boosts));
223 } else { 228 } else {
224 writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet) ); 229 writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet,boosts) );
225 } 230 }
226 if(commit) writer.commit(); 231 if(commit) writer.commit();
227 } finally { 232 } finally {
228 wrote(); 233 wrote();
229 writeLock.unlock(); 234 writeLock.unlock();
281 if( ++id > idLim ) { 286 if( ++id > idLim ) {
282 idLim += idBatch; 287 idLim += idBatch;
283 LuanTable doc = new LuanTable(luan); 288 LuanTable doc = new LuanTable(luan);
284 doc.rawPut( "type", "next_id" ); 289 doc.rawPut( "type", "next_id" );
285 doc.rawPut( FLD_NEXT_ID, idLim ); 290 doc.rawPut( FLD_NEXT_ID, idLim );
286 writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET)); 291 writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET,null));
287 wrote(); 292 wrote();
288 } 293 }
289 return id; 294 return id;
290 } 295 }
291 296
401 406
402 public void ensure_open() throws IOException { 407 public void ensure_open() throws IOException {
403 close(openSearcher()); 408 close(openSearcher());
404 } 409 }
405 410
406 public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { 411 public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr )
412 throws LuanException, IOException, ParseException
413 {
407 Utils.checkNotNull(queryStr); 414 Utils.checkNotNull(queryStr);
408 Query query = SaneQueryParser.parseQuery(mfp,queryStr); 415 Query query = SaneQueryParser.parseQuery(mfp,queryStr);
409 IndexSearcher searcher = threadLocalSearcher.get(); 416 IndexSearcher searcher = threadLocalSearcher.get();
410 boolean inTransaction = searcher != null; 417 boolean inTransaction = searcher != null;
411 if( !inTransaction ) 418 if( !inTransaction )
477 } 484 }
478 mfp.fields.put( field, fp ); 485 mfp.fields.put( field, fp );
479 } 486 }
480 487
481 488
482 private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed) 489 private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed,Float boost)
490 throws LuanException
491 {
492 IndexableField fld = newField2(name,value,store,indexed);
493 if( boost != null )
494 ((Field)fld).setBoost(boost);
495 return fld;
496 }
497
498 private IndexableField newField2(String name,Object value,Field.Store store,Set<String> indexed)
483 throws LuanException 499 throws LuanException
484 { 500 {
485 if( value instanceof String ) { 501 if( value instanceof String ) {
486 String s = (String)value; 502 String s = (String)value;
487 FieldParser fp = mfp.fields.get(name); 503 FieldParser fp = mfp.fields.get(name);
520 return new StoredField(name, b); 536 return new StoredField(name, b);
521 } else 537 } else
522 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); 538 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'");
523 } 539 }
524 540
525 private Document toLucene(LuanTable table,Set indexOnly) throws LuanException { 541 private Document toLucene(LuanTable table,Set indexOnly,LuanTable boosts) throws LuanException {
526 Set<String> indexed = mfp.fields.keySet(); 542 Set<String> indexed = mfp.fields.keySet();
527 Document doc = new Document(); 543 Document doc = new Document();
528 for( Map.Entry<Object,Object> entry : table.iterable() ) { 544 for( Map.Entry<Object,Object> entry : table.iterable() ) {
529 Object key = entry.getKey(); 545 Object key = entry.getKey();
530 if( !(key instanceof String) ) 546 if( !(key instanceof String) )
531 throw new LuanException("key must be string"); 547 throw new LuanException("key must be string");
532 String name = (String)key; 548 String name = (String)key;
533 Object value = entry.getValue(); 549 Object value = entry.getValue();
534 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; 550 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES;
551 Float boost = null;
552 if( boosts != null ) {
553 Object obj = boosts.get(name);
554 if( obj != null ) {
555 if( !(obj instanceof Number) )
556 throw new LuanException("boost '"+name+"' must be number");
557 boost = ((Number)obj).floatValue();
558 }
559 }
535 if( !(value instanceof LuanTable) ) { 560 if( !(value instanceof LuanTable) ) {
536 doc.add(newField(name, value, store, indexed)); 561 doc.add(newField( name, value, store, indexed, boost ));
537 } else { // list 562 } else { // list
538 LuanTable list = (LuanTable)value; 563 LuanTable list = (LuanTable)value;
539 for( Object el : list.asList() ) { 564 for( Object el : list.asList() ) {
540 doc.add(newField(name, el, store, indexed)); 565 doc.add(newField( name, el, store, indexed, boost ));
541 } 566 }
542 } 567 }
543 } 568 }
544 return doc; 569 return doc;
545 } 570 }
640 throw new RuntimeException(e); 665 throw new RuntimeException(e);
641 } 666 }
642 } 667 }
643 }; 668 };
644 } 669 }
670
671 public int count_tokens(String text)
672 throws IOException
673 {
674 int n = 0;
675 TokenStream ts = analyzer.tokenStream(null,text);
676 ts.reset();
677 while( ts.incrementToken() ) {
678 n++;
679 }
680 ts.close();
681 return n;
682 }
683
645 } 684 }