changeset 1248:475905984870

improve lucene highlighter and allow bbcode_to_text quoter to be nil
author Franklin Schmidt <fschmidt@gmail.com>
date Wed, 18 Jul 2018 01:38:53 -0600
parents 728d8e75f476
children 0064f0d4facc
files src/luan/modules/lucene/LuceneIndex.java src/luan/modules/parsers/BBCode.java
diffstat 2 files changed, 39 insertions(+), 5 deletions(-) [+]
line wrap: on
line diff
diff -r 728d8e75f476 -r 475905984870 src/luan/modules/lucene/LuceneIndex.java
--- a/src/luan/modules/lucene/LuceneIndex.java	Tue Jul 17 22:17:01 2018 -0600
+++ b/src/luan/modules/lucene/LuceneIndex.java	Wed Jul 18 01:38:53 2018 -0600
@@ -56,7 +56,9 @@
 import org.apache.lucene.search.highlight.Formatter;
 import org.apache.lucene.search.highlight.Highlighter;
 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.search.highlight.Fragmenter;
 import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
 import org.apache.lucene.search.highlight.QueryScorer;
 import org.apache.lucene.search.highlight.TokenGroup;
 import luan.modules.lucene.queryparser.SaneQueryParser;
@@ -577,11 +579,17 @@
 	}
 
 
-	public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter) throws ParseException {
+	private static final Formatter nullFormatter = new Formatter() {
+		public String highlightTerm(String originalText,TokenGroup tokenGroup) {
+			return originalText;
+		}
+	};
+
+	public LuanFunction highlighter(LuanState luan,String queryStr,LuanFunction formatter,final Integer fragmentSize,String dotdotdot) throws ParseException {
 		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
 		Formatter fmt = new Formatter() {
 			public String highlightTerm(String originalText,TokenGroup tokenGroup) {
-				if( tokenGroup.getTotalScore() <= 0 )
+			if( tokenGroup.getTotalScore() <= 0 )
 					return originalText;
 				try {
 					return (String)Luan.first(formatter.call(luan,new Object[]{originalText}));
@@ -590,12 +598,34 @@
 				}
 			}
 		};
-		Highlighter hl = new Highlighter( fmt, new QueryScorer(query) );
+		QueryScorer queryScorer = new QueryScorer(query);
+		final Highlighter chooser = fragmentSize==null ? null : new Highlighter(nullFormatter,queryScorer);
+		if( chooser != null )
+			chooser.setTextFragmenter( new SimpleSpanFragmenter(queryScorer,fragmentSize) );
+		final Highlighter hl = new Highlighter(fmt,queryScorer);
 		hl.setTextFragmenter( new NullFragmenter() );
 		return new LuanFunction() {
 			@Override public String call(LuanState luan,Object[] args) throws LuanException {
 				String text = (String)args[0];
 				try {
+					if( chooser != null ) {
+						String s = chooser.getBestFragment(analyzer,null,text);
+						if( s != null ) {
+							if( dotdotdot != null ) {
+								boolean atStart = text.startsWith(s);
+								boolean atEnd = text.endsWith(s);
+								if( !atStart )
+									s = dotdotdot + s;
+								if( !atEnd )
+									s = s + dotdotdot;
+							}
+							text = s;
+						} else if( text.length() > fragmentSize ) {
+							text = text.substring(0,fragmentSize);
+							if( dotdotdot != null )
+								text += "...";
+						}
+					}
 					String s = hl.getBestFragment(analyzer,null,text);
 					return s!=null ? s : text;
 				} catch(LuanRuntimeException e) {
diff -r 728d8e75f476 -r 475905984870 src/luan/modules/parsers/BBCode.java
--- a/src/luan/modules/parsers/BBCode.java	Tue Jul 17 22:17:01 2018 -0600
+++ b/src/luan/modules/parsers/BBCode.java	Wed Jul 18 01:38:53 2018 -0600
@@ -280,8 +280,12 @@
 	}
 
 	private String quote(Object... args) throws LuanException {
-		if( quoter==null )
-			throw new LuanException("BBCode quoter function not defined");
+		if( quoter==null ) {
+			if( toHtml )
+				throw new LuanException("BBCode quoter function not defined");
+			else
+				return "";
+		}
 		Object obj = quoter.call(luan,args);
 		if( !(obj instanceof String) )
 			throw new LuanException("BBCode quoter function returned "+Luan.type(obj)+" but string required");