Mercurial Hosting > luan
changeset 624:8281a248c47e
add lucene highlighter
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 07 Jan 2016 18:46:07 -0700 |
parents | d592bf0947a9 |
children | a3c1e11fb6aa |
files | lucene/ext/lucene-highlighter-4.9.0.jar lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneIndex.java |
diffstat | 3 files changed, 41 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/lucene/src/luan/modules/lucene/Lucene.luan Thu Jan 07 03:58:51 2016 -0700 +++ b/lucene/src/luan/modules/lucene/Lucene.luan Thu Jan 07 18:46:07 2016 -0700 @@ -42,6 +42,7 @@ index.close = java_index.close index.ensure_open = java_index.ensure_open index.next_id = java_index.nextId + index.highlighter = java_index.highlighter function index.search(query, from, to, sort) local results = {}
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java Thu Jan 07 03:58:51 2016 -0700 +++ b/lucene/src/luan/modules/lucene/LuceneIndex.java Thu Jan 07 18:46:07 2016 -0700 @@ -50,6 +50,12 @@ import org.apache.lucene.search.Collector; import org.apache.lucene.search.Scorer; import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.search.highlight.NullFragmenter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.TokenGroup; import sane.lucene.queryparser.SaneQueryParser; import sane.lucene.queryparser.FieldParser; import sane.lucene.queryparser.MultiFieldParser; @@ -85,6 +91,7 @@ private boolean isClosed = false; private final MultiFieldParser mfp; public final LuanTable indexed_only_fields = new LuanTable(); + private final Analyzer analyzer; public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException { mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); @@ -99,6 +106,7 @@ StringFieldParser sfp = (StringFieldParser)defaultFieldParser; analyzer = sfp.analyzer; } + this.analyzer = analyzer; IndexWriterConfig conf = new IndexWriterConfig(version,analyzer); snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); conf.setIndexDeletionPolicy(snapshotDeletionPolicy); @@ -546,4 +554,36 @@ return table; } + + public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter) throws ParseException { + Query query = SaneQueryParser.parseQuery(mfp,queryStr); + Formatter fmt = new Formatter() { + public String highlightTerm(String originalText,TokenGroup tokenGroup) { + if( tokenGroup.getTotalScore() <= 0 ) + return originalText; + try { + return (String)Luan.first(formatter.call(luan,new Object[]{originalText})); + } catch(LuanException e) { + throw new LuanRuntimeException(e); + } + } + }; + Highlighter hl = new Highlighter( fmt, new QueryScorer(query) ); + hl.setTextFragmenter( new NullFragmenter() ); + return new LuanFunction() { + @Override public String call(LuanState luan,Object[] args) throws LuanException { + String text = (String)args[0]; + try { + String s = hl.getBestFragment(analyzer,null,text); + return s!=null ? s : text; + } catch(LuanRuntimeException e) { + throw (LuanException)e.getCause(); + } catch(IOException e) { + throw new RuntimeException(e); + } catch(InvalidTokenOffsetsException e) { + throw new RuntimeException(e); + } + } + }; + } }