comparison lucene/src/luan/modules/lucene/LuceneIndex.java @ 624:8281a248c47e

add lucene highlighter
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 07 Jan 2016 18:46:07 -0700
parents 1a53333eb4d5
children cdc70de628b5
comparison
equal deleted inserted replaced
623:d592bf0947a9 624:8281a248c47e
48 import org.apache.lucene.search.TotalHitCountCollector; 48 import org.apache.lucene.search.TotalHitCountCollector;
49 import org.apache.lucene.search.ScoreDoc; 49 import org.apache.lucene.search.ScoreDoc;
50 import org.apache.lucene.search.Collector; 50 import org.apache.lucene.search.Collector;
51 import org.apache.lucene.search.Scorer; 51 import org.apache.lucene.search.Scorer;
52 import org.apache.lucene.search.BooleanClause; 52 import org.apache.lucene.search.BooleanClause;
53 import org.apache.lucene.search.highlight.Formatter;
54 import org.apache.lucene.search.highlight.Highlighter;
55 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
56 import org.apache.lucene.search.highlight.NullFragmenter;
57 import org.apache.lucene.search.highlight.QueryScorer;
58 import org.apache.lucene.search.highlight.TokenGroup;
53 import sane.lucene.queryparser.SaneQueryParser; 59 import sane.lucene.queryparser.SaneQueryParser;
54 import sane.lucene.queryparser.FieldParser; 60 import sane.lucene.queryparser.FieldParser;
55 import sane.lucene.queryparser.MultiFieldParser; 61 import sane.lucene.queryparser.MultiFieldParser;
56 import sane.lucene.queryparser.StringFieldParser; 62 import sane.lucene.queryparser.StringFieldParser;
57 import sane.lucene.queryparser.NumberFieldParser; 63 import sane.lucene.queryparser.NumberFieldParser;
83 private IndexSearcher searcher; 89 private IndexSearcher searcher;
84 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); 90 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
85 private boolean isClosed = false; 91 private boolean isClosed = false;
86 private final MultiFieldParser mfp; 92 private final MultiFieldParser mfp;
87 public final LuanTable indexed_only_fields = new LuanTable(); 93 public final LuanTable indexed_only_fields = new LuanTable();
94 private final Analyzer analyzer;
88 95
89 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException { 96 public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException {
90 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); 97 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields);
91 mfp.fields.put( "type", STRING_FIELD_PARSER ); 98 mfp.fields.put( "type", STRING_FIELD_PARSER );
92 mfp.fields.put( "id", NumberFieldParser.LONG ); 99 mfp.fields.put( "id", NumberFieldParser.LONG );
97 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; 104 Analyzer analyzer = STRING_FIELD_PARSER.analyzer;
98 if( defaultFieldParser instanceof StringFieldParser ) { 105 if( defaultFieldParser instanceof StringFieldParser ) {
99 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; 106 StringFieldParser sfp = (StringFieldParser)defaultFieldParser;
100 analyzer = sfp.analyzer; 107 analyzer = sfp.analyzer;
101 } 108 }
109 this.analyzer = analyzer;
102 IndexWriterConfig conf = new IndexWriterConfig(version,analyzer); 110 IndexWriterConfig conf = new IndexWriterConfig(version,analyzer);
103 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); 111 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
104 conf.setIndexDeletionPolicy(snapshotDeletionPolicy); 112 conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
105 writer = new IndexWriter(dir,conf); 113 writer = new IndexWriter(dir,conf);
106 writer.commit(); // commit index creation 114 writer.commit(); // commit index creation
544 } 552 }
545 } 553 }
546 return table; 554 return table;
547 } 555 }
548 556
557
558 public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter) throws ParseException {
559 Query query = SaneQueryParser.parseQuery(mfp,queryStr);
560 Formatter fmt = new Formatter() {
561 public String highlightTerm(String originalText,TokenGroup tokenGroup) {
562 if( tokenGroup.getTotalScore() <= 0 )
563 return originalText;
564 try {
565 return (String)Luan.first(formatter.call(luan,new Object[]{originalText}));
566 } catch(LuanException e) {
567 throw new LuanRuntimeException(e);
568 }
569 }
570 };
571 Highlighter hl = new Highlighter( fmt, new QueryScorer(query) );
572 hl.setTextFragmenter( new NullFragmenter() );
573 return new LuanFunction() {
574 @Override public String call(LuanState luan,Object[] args) throws LuanException {
575 String text = (String)args[0];
576 try {
577 String s = hl.getBestFragment(analyzer,null,text);
578 return s!=null ? s : text;
579 } catch(LuanRuntimeException e) {
580 throw (LuanException)e.getCause();
581 } catch(IOException e) {
582 throw new RuntimeException(e);
583 } catch(InvalidTokenOffsetsException e) {
584 throw new RuntimeException(e);
585 }
586 }
587 };
588 }
549 } 589 }