comparison src/luan/modules/lucene/LuceneIndex.java @ 1248:475905984870

improve lucene highlighter and allow bbcode_to_text quoter to be nil
author Franklin Schmidt <fschmidt@gmail.com>
date Wed, 18 Jul 2018 01:38:53 -0600
parents a12dba1f0787
children 9fa8b8389578
comparison
equal deleted inserted replaced
1247:728d8e75f476 1248:475905984870
54 import org.apache.lucene.search.Scorer; 54 import org.apache.lucene.search.Scorer;
55 import org.apache.lucene.search.BooleanClause; 55 import org.apache.lucene.search.BooleanClause;
56 import org.apache.lucene.search.highlight.Formatter; 56 import org.apache.lucene.search.highlight.Formatter;
57 import org.apache.lucene.search.highlight.Highlighter; 57 import org.apache.lucene.search.highlight.Highlighter;
58 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; 58 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
59 import org.apache.lucene.search.highlight.Fragmenter;
59 import org.apache.lucene.search.highlight.NullFragmenter; 60 import org.apache.lucene.search.highlight.NullFragmenter;
61 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
60 import org.apache.lucene.search.highlight.QueryScorer; 62 import org.apache.lucene.search.highlight.QueryScorer;
61 import org.apache.lucene.search.highlight.TokenGroup; 63 import org.apache.lucene.search.highlight.TokenGroup;
62 import luan.modules.lucene.queryparser.SaneQueryParser; 64 import luan.modules.lucene.queryparser.SaneQueryParser;
63 import luan.modules.lucene.queryparser.FieldParser; 65 import luan.modules.lucene.queryparser.FieldParser;
64 import luan.modules.lucene.queryparser.MultiFieldParser; 66 import luan.modules.lucene.queryparser.MultiFieldParser;
575 } 577 }
576 return table; 578 return table;
577 } 579 }
578 580
579 581
580 public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter) throws ParseException { 582 private static final Formatter nullFormatter = new Formatter() {
583 public String highlightTerm(String originalText,TokenGroup tokenGroup) {
584 return originalText;
585 }
586 };
587
588 public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter,final Integer fragmentSize,String dotdotdot) throws ParseException {
581 Query query = SaneQueryParser.parseQuery(mfp,queryStr); 589 Query query = SaneQueryParser.parseQuery(mfp,queryStr);
582 Formatter fmt = new Formatter() { 590 Formatter fmt = new Formatter() {
583 public String highlightTerm(String originalText,TokenGroup tokenGroup) { 591 public String highlightTerm(String originalText,TokenGroup tokenGroup) {
584 if( tokenGroup.getTotalScore() <= 0 ) 592 if( tokenGroup.getTotalScore() <= 0 )
585 return originalText; 593 return originalText;
586 try { 594 try {
587 return (String)Luan.first(formatter.call(luan,new Object[]{originalText})); 595 return (String)Luan.first(formatter.call(luan,new Object[]{originalText}));
588 } catch(LuanException e) { 596 } catch(LuanException e) {
589 throw new LuanRuntimeException(e); 597 throw new LuanRuntimeException(e);
590 } 598 }
591 } 599 }
592 }; 600 };
593 Highlighter hl = new Highlighter( fmt, new QueryScorer(query) ); 601 QueryScorer queryScorer = new QueryScorer(query);
602 final Highlighter chooser = fragmentSize==null ? null : new Highlighter(nullFormatter,queryScorer);
603 if( chooser != null )
604 chooser.setTextFragmenter( new SimpleSpanFragmenter(queryScorer,fragmentSize) );
605 final Highlighter hl = new Highlighter(fmt,queryScorer);
594 hl.setTextFragmenter( new NullFragmenter() ); 606 hl.setTextFragmenter( new NullFragmenter() );
595 return new LuanFunction() { 607 return new LuanFunction() {
596 @Override public String call(LuanState luan,Object[] args) throws LuanException { 608 @Override public String call(LuanState luan,Object[] args) throws LuanException {
597 String text = (String)args[0]; 609 String text = (String)args[0];
598 try { 610 try {
611 if( chooser != null ) {
612 String s = chooser.getBestFragment(analyzer,null,text);
613 if( s != null ) {
614 if( dotdotdot != null ) {
615 boolean atStart = text.startsWith(s);
616 boolean atEnd = text.endsWith(s);
617 if( !atStart )
618 s = dotdotdot + s;
619 if( !atEnd )
620 s = s + dotdotdot;
621 }
622 text = s;
623 } else if( text.length() > fragmentSize ) {
624 text = text.substring(0,fragmentSize);
625 if( dotdotdot != null )
626 text += "...";
627 }
628 }
599 String s = hl.getBestFragment(analyzer,null,text); 629 String s = hl.getBestFragment(analyzer,null,text);
600 return s!=null ? s : text; 630 return s!=null ? s : text;
601 } catch(LuanRuntimeException e) { 631 } catch(LuanRuntimeException e) {
602 throw (LuanException)e.getCause(); 632 throw (LuanException)e.getCause();
603 } catch(IOException e) { 633 } catch(IOException e) {