Mercurial Hosting > luan
comparison src/luan/modules/lucene/LuceneIndex.java @ 1341:a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 19 Feb 2019 08:14:40 -0700 |
parents | 8b61c8c4e07a |
children | 60599adc27b8 |
comparison
equal
deleted
inserted
replaced
1340:b3c4fcf29a53 | 1341:a015a0b5c388 |
---|---|
18 import java.util.concurrent.locks.Lock; | 18 import java.util.concurrent.locks.Lock; |
19 import java.util.concurrent.locks.ReentrantLock; | 19 import java.util.concurrent.locks.ReentrantLock; |
20 import java.util.zip.ZipOutputStream; | 20 import java.util.zip.ZipOutputStream; |
21 import java.util.zip.ZipEntry; | 21 import java.util.zip.ZipEntry; |
22 import org.apache.lucene.analysis.Analyzer; | 22 import org.apache.lucene.analysis.Analyzer; |
23 import org.apache.lucene.analysis.TokenStream; | |
23 import org.apache.lucene.analysis.core.KeywordAnalyzer; | 24 import org.apache.lucene.analysis.core.KeywordAnalyzer; |
24 import org.apache.lucene.document.Document; | 25 import org.apache.lucene.document.Document; |
25 import org.apache.lucene.document.Field; | 26 import org.apache.lucene.document.Field; |
26 import org.apache.lucene.document.StoredField; | 27 import org.apache.lucene.document.StoredField; |
27 import org.apache.lucene.document.StringField; | 28 import org.apache.lucene.document.StringField; |
164 BytesRef br = new BytesRef(); | 165 BytesRef br = new BytesRef(); |
165 NumericUtils.longToPrefixCoded(value,0,br); | 166 NumericUtils.longToPrefixCoded(value,0,br); |
166 return new Term(key,br); | 167 return new Term(key,br); |
167 } | 168 } |
168 | 169 |
169 public void delete(Luan luan,String queryStr) throws LuanException, IOException, ParseException { | 170 public void delete(Luan luan,String queryStr) |
171 throws LuanException, IOException, ParseException | |
172 { | |
170 Query query = SaneQueryParser.parseQuery(mfp,queryStr); | 173 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
171 | 174 |
172 boolean commit = !writeLock.isHeldByCurrentThread(); | 175 boolean commit = !writeLock.isHeldByCurrentThread(); |
173 writeLock.lock(); | 176 writeLock.lock(); |
174 try { | 177 try { |
185 indexedOnlyFields.putIfAbsent(type,new ConcurrentHashMap<String,LuanFunction>()); | 188 indexedOnlyFields.putIfAbsent(type,new ConcurrentHashMap<String,LuanFunction>()); |
186 Map<String,LuanFunction> map = indexedOnlyFields.get(type); | 189 Map<String,LuanFunction> map = indexedOnlyFields.get(type); |
187 map.put(field,fn); | 190 map.put(field,fn); |
188 } | 191 } |
189 | 192 |
190 public void save(Luan luan,LuanTable doc) throws LuanException, IOException { | 193 public void save(Luan luan,LuanTable doc,LuanTable boosts) |
194 throws LuanException, IOException | |
195 { | |
191 Set indexedOnlySet = new HashSet(); | 196 Set indexedOnlySet = new HashSet(); |
192 Object typeObj = doc.get("type"); | 197 Object typeObj = doc.get("type"); |
193 if( typeObj==null ) | 198 if( typeObj==null ) |
194 throw new LuanException("missing 'type' field"); | 199 throw new LuanException("missing 'type' field"); |
195 if( !(typeObj instanceof String) ) | 200 if( !(typeObj instanceof String) ) |
217 writeLock.lock(); | 222 writeLock.lock(); |
218 try { | 223 try { |
219 if( id == null ) { | 224 if( id == null ) { |
220 id = nextId(luan); | 225 id = nextId(luan); |
221 doc.put("id",id); | 226 doc.put("id",id); |
222 writer.addDocument(toLucene(doc,indexedOnlySet)); | 227 writer.addDocument(toLucene(doc,indexedOnlySet,boosts)); |
223 } else { | 228 } else { |
224 writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet) ); | 229 writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet,boosts) ); |
225 } | 230 } |
226 if(commit) writer.commit(); | 231 if(commit) writer.commit(); |
227 } finally { | 232 } finally { |
228 wrote(); | 233 wrote(); |
229 writeLock.unlock(); | 234 writeLock.unlock(); |
281 if( ++id > idLim ) { | 286 if( ++id > idLim ) { |
282 idLim += idBatch; | 287 idLim += idBatch; |
283 LuanTable doc = new LuanTable(luan); | 288 LuanTable doc = new LuanTable(luan); |
284 doc.rawPut( "type", "next_id" ); | 289 doc.rawPut( "type", "next_id" ); |
285 doc.rawPut( FLD_NEXT_ID, idLim ); | 290 doc.rawPut( FLD_NEXT_ID, idLim ); |
286 writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET)); | 291 writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET,null)); |
287 wrote(); | 292 wrote(); |
288 } | 293 } |
289 return id; | 294 return id; |
290 } | 295 } |
291 | 296 |
401 | 406 |
402 public void ensure_open() throws IOException { | 407 public void ensure_open() throws IOException { |
403 close(openSearcher()); | 408 close(openSearcher()); |
404 } | 409 } |
405 | 410 |
406 public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException { | 411 public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) |
412 throws LuanException, IOException, ParseException | |
413 { | |
407 Utils.checkNotNull(queryStr); | 414 Utils.checkNotNull(queryStr); |
408 Query query = SaneQueryParser.parseQuery(mfp,queryStr); | 415 Query query = SaneQueryParser.parseQuery(mfp,queryStr); |
409 IndexSearcher searcher = threadLocalSearcher.get(); | 416 IndexSearcher searcher = threadLocalSearcher.get(); |
410 boolean inTransaction = searcher != null; | 417 boolean inTransaction = searcher != null; |
411 if( !inTransaction ) | 418 if( !inTransaction ) |
477 } | 484 } |
478 mfp.fields.put( field, fp ); | 485 mfp.fields.put( field, fp ); |
479 } | 486 } |
480 | 487 |
481 | 488 |
482 private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed) | 489 private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed,Float boost) |
490 throws LuanException | |
491 { | |
492 IndexableField fld = newField2(name,value,store,indexed); | |
493 if( boost != null ) | |
494 ((Field)fld).setBoost(boost); | |
495 return fld; | |
496 } | |
497 | |
498 private IndexableField newField2(String name,Object value,Field.Store store,Set<String> indexed) | |
483 throws LuanException | 499 throws LuanException |
484 { | 500 { |
485 if( value instanceof String ) { | 501 if( value instanceof String ) { |
486 String s = (String)value; | 502 String s = (String)value; |
487 FieldParser fp = mfp.fields.get(name); | 503 FieldParser fp = mfp.fields.get(name); |
520 return new StoredField(name, b); | 536 return new StoredField(name, b); |
521 } else | 537 } else |
522 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); | 538 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); |
523 } | 539 } |
524 | 540 |
525 private Document toLucene(LuanTable table,Set indexOnly) throws LuanException { | 541 private Document toLucene(LuanTable table,Set indexOnly,LuanTable boosts) throws LuanException { |
526 Set<String> indexed = mfp.fields.keySet(); | 542 Set<String> indexed = mfp.fields.keySet(); |
527 Document doc = new Document(); | 543 Document doc = new Document(); |
528 for( Map.Entry<Object,Object> entry : table.iterable() ) { | 544 for( Map.Entry<Object,Object> entry : table.iterable() ) { |
529 Object key = entry.getKey(); | 545 Object key = entry.getKey(); |
530 if( !(key instanceof String) ) | 546 if( !(key instanceof String) ) |
531 throw new LuanException("key must be string"); | 547 throw new LuanException("key must be string"); |
532 String name = (String)key; | 548 String name = (String)key; |
533 Object value = entry.getValue(); | 549 Object value = entry.getValue(); |
534 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; | 550 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; |
551 Float boost = null; | |
552 if( boosts != null ) { | |
553 Object obj = boosts.get(name); | |
554 if( obj != null ) { | |
555 if( !(obj instanceof Number) ) | |
556 throw new LuanException("boost '"+name+"' must be number"); | |
557 boost = ((Number)obj).floatValue(); | |
558 } | |
559 } | |
535 if( !(value instanceof LuanTable) ) { | 560 if( !(value instanceof LuanTable) ) { |
536 doc.add(newField(name, value, store, indexed)); | 561 doc.add(newField( name, value, store, indexed, boost )); |
537 } else { // list | 562 } else { // list |
538 LuanTable list = (LuanTable)value; | 563 LuanTable list = (LuanTable)value; |
539 for( Object el : list.asList() ) { | 564 for( Object el : list.asList() ) { |
540 doc.add(newField(name, el, store, indexed)); | 565 doc.add(newField( name, el, store, indexed, boost )); |
541 } | 566 } |
542 } | 567 } |
543 } | 568 } |
544 return doc; | 569 return doc; |
545 } | 570 } |
640 throw new RuntimeException(e); | 665 throw new RuntimeException(e); |
641 } | 666 } |
642 } | 667 } |
643 }; | 668 }; |
644 } | 669 } |
670 | |
671 public int count_tokens(String text) | |
672 throws IOException | |
673 { | |
674 int n = 0; | |
675 TokenStream ts = analyzer.tokenStream(null,text); | |
676 ts.reset(); | |
677 while( ts.incrementToken() ) { | |
678 n++; | |
679 } | |
680 ts.close(); | |
681 return n; | |
682 } | |
683 | |
645 } | 684 } |