changeset 624:8281a248c47e

add lucene highlighter
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 07 Jan 2016 18:46:07 -0700
parents d592bf0947a9
children a3c1e11fb6aa
files lucene/ext/lucene-highlighter-4.9.0.jar lucene/src/luan/modules/lucene/Lucene.luan lucene/src/luan/modules/lucene/LuceneIndex.java
diffstat 3 files changed, 41 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
diff -r d592bf0947a9 -r 8281a248c47e lucene/ext/lucene-highlighter-4.9.0.jar
Binary file lucene/ext/lucene-highlighter-4.9.0.jar has changed
diff -r d592bf0947a9 -r 8281a248c47e lucene/src/luan/modules/lucene/Lucene.luan
--- a/lucene/src/luan/modules/lucene/Lucene.luan	Thu Jan 07 03:58:51 2016 -0700
+++ b/lucene/src/luan/modules/lucene/Lucene.luan	Thu Jan 07 18:46:07 2016 -0700
@@ -42,6 +42,7 @@
 	index.close = java_index.close
 	index.ensure_open = java_index.ensure_open
 	index.next_id = java_index.nextId
+	index.highlighter = java_index.highlighter
 
 	function index.search(query, from, to, sort)
 		local results = {}
diff -r d592bf0947a9 -r 8281a248c47e lucene/src/luan/modules/lucene/LuceneIndex.java
--- a/lucene/src/luan/modules/lucene/LuceneIndex.java	Thu Jan 07 03:58:51 2016 -0700
+++ b/lucene/src/luan/modules/lucene/LuceneIndex.java	Thu Jan 07 18:46:07 2016 -0700
@@ -50,6 +50,12 @@
 import org.apache.lucene.search.Collector;
 import org.apache.lucene.search.Scorer;
 import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.TokenGroup;
 import sane.lucene.queryparser.SaneQueryParser;
 import sane.lucene.queryparser.FieldParser;
 import sane.lucene.queryparser.MultiFieldParser;
@@ -85,6 +91,7 @@
 	private boolean isClosed = false;
 	private final MultiFieldParser mfp;
 	public final LuanTable indexed_only_fields = new LuanTable();
+	private final Analyzer analyzer;
 
 	public LuceneIndex(LuanState luan,String indexDirStr,FieldParser defaultFieldParser,String[] defaultFields) throws LuanException, IOException {
 		mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields);
@@ -99,6 +106,7 @@
 			StringFieldParser sfp = (StringFieldParser)defaultFieldParser;
 			analyzer = sfp.analyzer;
 		}
+		this.analyzer = analyzer;
 		IndexWriterConfig conf = new IndexWriterConfig(version,analyzer);
 		snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
 		conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
@@ -546,4 +554,36 @@
 		return table;
 	}
 
+
+	public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter) throws ParseException {
+		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
+		Formatter fmt = new Formatter() {
+			public String highlightTerm(String originalText,TokenGroup tokenGroup) {
+				if( tokenGroup.getTotalScore() <= 0 )
+					return originalText;
+				try {
+					return (String)Luan.first(formatter.call(luan,new Object[]{originalText}));
+				} catch(LuanException e) {
+					throw new LuanRuntimeException(e);
+				}
+			}
+		};
+		Highlighter hl = new Highlighter( fmt, new QueryScorer(query) );
+		hl.setTextFragmenter( new NullFragmenter() );
+		return new LuanFunction() {
+			@Override public String call(LuanState luan,Object[] args) throws LuanException {
+				String text = (String)args[0];
+				try {
+					String s = hl.getBestFragment(analyzer,null,text);
+					return s!=null ? s : text;
+				} catch(LuanRuntimeException e) {
+					throw (LuanException)e.getCause();
+				} catch(IOException e) {
+					throw new RuntimeException(e);
+				} catch(InvalidTokenOffsetsException e) {
+					throw new RuntimeException(e);
+				}
+			}
+		};
+	}
 }