Mercurial Hosting > luan
changeset 1344:dc2af9d5463b
move queryparser to lib
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Mon, 25 Feb 2019 07:00:55 -0700 |
parents | 7d9a1f8894b0 |
children | 6f8988830098 |
files | src/luan/lib/queryparser/FieldParser.java src/luan/lib/queryparser/MultiFieldParser.java src/luan/lib/queryparser/NumberFieldParser.java src/luan/lib/queryparser/SaneQueryParser.java src/luan/lib/queryparser/StringFieldParser.java src/luan/lib/queryparser/SynonymParser.java src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/LuceneIndex.java src/luan/modules/lucene/queryparser/FieldParser.java src/luan/modules/lucene/queryparser/MultiFieldParser.java src/luan/modules/lucene/queryparser/NumberFieldParser.java src/luan/modules/lucene/queryparser/SaneQueryParser.java src/luan/modules/lucene/queryparser/StringFieldParser.java src/luan/modules/lucene/queryparser/SynonymParser.java |
diffstat | 14 files changed, 606 insertions(+), 606 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/FieldParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,12 @@ +package luan.lib.queryparser; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import luan.lib.parser.ParseException; + + +public interface FieldParser { + public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException; + public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException; + public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException; +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/MultiFieldParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,86 @@ +package luan.lib.queryparser; + +import java.util.Map; +import java.util.HashMap; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.SortField; +import luan.lib.parser.ParseException; + + +public class MultiFieldParser implements FieldParser { + + /** + * maps field name to FieldParser + */ + public final Map<String,FieldParser> fields = new HashMap<String,FieldParser>(); + public boolean allowUnspecifiedFields = false; + private final FieldParser defaultFieldParser; + private final String[] defaultFields; + + public MultiFieldParser() { + this.defaultFieldParser = null; + this.defaultFields = null; + } + + public MultiFieldParser(FieldParser defaultFieldParser,String... defaultFields) { + this.defaultFieldParser = defaultFieldParser; + this.defaultFields = defaultFields; + for( String field : defaultFields ) { + fields.put(field,defaultFieldParser); + } + } + + @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + if( field == null ) { + if( defaultFieldParser == null ) + throw qp.exception("no defaults were specified, so a field is required"); + if( defaultFields.length == 1 ) + return defaultFieldParser.getQuery(qp,defaultFields[0],query); + BooleanQuery bq = new BooleanQuery(); + for( String f : defaultFields ) { + bq.add( defaultFieldParser.getQuery(qp,f,query), BooleanClause.Occur.SHOULD ); + } + return bq; + } else { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getQuery(qp,field,query); + if( allowUnspecifiedFields ) + return defaultFieldParser.getQuery(qp,field,query); + throw qp.exception("unrecognized field '"+field+"'"); + } + } + + @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + if( field == null ) { + if( defaultFieldParser == null ) + throw qp.exception("no defaults were specified, so a field is required"); + if( defaultFields.length == 1 ) + return defaultFieldParser.getRangeQuery(qp,defaultFields[0],minQuery,maxQuery,includeMin,includeMax); + BooleanQuery bq = new BooleanQuery(); + for( String f : defaultFields ) { + bq.add( defaultFieldParser.getRangeQuery(qp,f,minQuery,maxQuery,includeMin,includeMax), BooleanClause.Occur.SHOULD ); + } + return bq; + } else { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + if( allowUnspecifiedFields ) + return defaultFieldParser.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + throw qp.exception("field '"+field+"' not specified"); + } + } + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getSortField(qp,field,reverse); + if( allowUnspecifiedFields ) + return defaultFieldParser.getSortField(qp,field,reverse); + throw qp.exception("field '"+field+"' not specified"); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/NumberFieldParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,84 @@ +package luan.lib.queryparser; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.SortField; +import luan.lib.parser.ParseException; + + +public abstract class NumberFieldParser implements FieldParser { + + @Override public final Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + return getRangeQuery(qp,field,query,query,true,true); + } + + @Override public final Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + try { + return getRangeQuery(field,minQuery,maxQuery,includeMin,includeMax); + } catch(NumberFormatException e) { + throw qp.exception(e); + } + } + + abstract protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax); + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { + return new SortField( field, sortType(), reverse ); + } + + abstract protected SortField.Type sortType(); + + + public static final FieldParser INT = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + int min = Integer.parseInt(minQuery); + int max = Integer.parseInt(maxQuery); + return NumericRangeQuery.newIntRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.INT; + } + }; + + public static final FieldParser LONG = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + long min = Long.parseLong(minQuery); + long max = Long.parseLong(maxQuery); + return NumericRangeQuery.newLongRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.LONG; + } + }; + + public static final FieldParser FLOAT = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + float min = Float.parseFloat(minQuery); + float max = Float.parseFloat(maxQuery); + return NumericRangeQuery.newFloatRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.FLOAT; + } + }; + + public static final FieldParser DOUBLE = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + double min = Double.parseDouble(minQuery); + double max = Double.parseDouble(maxQuery); + return NumericRangeQuery.newDoubleRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.DOUBLE; + } + }; + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/SaneQueryParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,260 @@ +package luan.lib.queryparser; + +import java.util.List; +import java.util.ArrayList; +import java.util.regex.Pattern; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import luan.lib.parser.Parser; +import luan.lib.parser.ParseException; + + +public class SaneQueryParser { + + public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { + return new SaneQueryParser(fieldParser,query).parseQuery(); + } + + private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]"); + + public static String literal(String s) { + return specialChar.matcher(s).replaceAll("\\\\$0"); + } + + public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { + return new SaneQueryParser(fieldParser,sort).parseSort(); + } + + + private static final String NOT_IN_TERM = " \t\r\n\":[]{}^+-()"; + private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; + private final FieldParser fieldParser; + private final Parser parser; + + private SaneQueryParser(FieldParser fieldParser,String query) { + this.fieldParser = fieldParser; + this.parser = new Parser(query); + parser.begin(); + } + + ParseException exception(String msg) { + parser.failure(); + return new ParseException(parser,msg); + } + + ParseException exception(Exception cause) { + parser.failure(); + return new ParseException(parser,cause); + } + + private Query parseQuery() throws ParseException { + Spaces(); + BooleanQuery bq = new BooleanQuery(); + while( !parser.endOfInput() ) { + bq.add( Term(null) ); + } + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + return new MatchAllDocsQuery(); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return bc.getQuery(); + } + default: + return bq; + } + } + + private BooleanClause Term(String defaultField) throws ParseException { + BooleanClause.Occur occur; + if( parser.match('+') ) { + occur = BooleanClause.Occur.MUST; + Spaces(); + } else if( parser.match('-') ) { + occur = BooleanClause.Occur.MUST_NOT; + Spaces(); + } else { + occur = BooleanClause.Occur.SHOULD; + } + String field = QueryField(); + if( field == null ) + field = defaultField; + Query query = NestedTerm(field); + if( query == null ) + query = RangeTerm(field); + if( query == null ) { + parser.begin(); + String match = SimpleTerm(); + query = fieldParser.getQuery(this,field,match); + parser.success(); + } + if( parser.match('^') ) { + Spaces(); + int start = parser.begin(); + try { + while( parser.anyOf("0123456789.") ); + String match = parser.textFrom(start); + float boost = Float.parseFloat(match); + query.setBoost(boost); + } catch(NumberFormatException e) { + throw exception(e); + } + parser.success(); + Spaces(); + } + BooleanClause bc = new BooleanClause(query,occur); + return bc; + } + + private Query NestedTerm(String field) throws ParseException { + parser.begin(); + if( !parser.match('(') ) + return parser.failure(null); + BooleanQuery bq = new BooleanQuery(); + while( !parser.match(')') ) { + if( parser.endOfInput() ) + throw exception("unclosed parentheses"); + bq.add( Term(field) ); + } + Spaces(); + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + throw exception("empty parentheses"); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return parser.success(bc.getQuery()); + } + default: + return parser.success(bq); + } + } + + private Query RangeTerm(String field) throws ParseException { + parser.begin(); + if( !parser.anyOf("[{") ) + return parser.failure(null); + boolean includeMin = parser.lastChar() == '['; + Spaces(); + String minQuery = SimpleTerm(); + TO(); + String maxQuery = SimpleTerm(); + if( !parser.anyOf("]}") ) + throw exception("unclosed range"); + boolean includeMax = parser.lastChar() == ']'; + Spaces(); + Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); + return parser.success(query); + } + + private void TO() throws ParseException { + parser.begin(); + if( !(parser.match("TO") && Space()) ) + throw exception("'TO' expected"); + Spaces(); + parser.success(); + } + + private String SimpleTerm() throws ParseException { + parser.begin(); + String match; + if( parser.match('"') ) { + int start = parser.currentIndex() - 1; + while( !parser.match('"') ) { + if( parser.endOfInput() ) + throw exception("unclosed quotes"); + parser.anyChar(); + checkEscape(); + } + match = parser.textFrom(start); + Spaces(); + } else { + match = Unquoted(NOT_IN_TERM); + } + if( match.length() == 0 ) + throw exception("invalid input"); + return parser.success(match); + } + + private String QueryField() throws ParseException { + parser.begin(); + String match = Field(); + if( match==null || !parser.match(':') ) + return parser.failure((String)null); + Spaces(); + return parser.success(match); + } + + private String Field() throws ParseException { + parser.begin(); + String match = Unquoted(NOT_IN_FIELD); + if( match.length()==0 ) + return parser.failure((String)null); + match = StringFieldParser.escape(this,match); + return parser.success(match); + } + + private String Unquoted(String exclude) throws ParseException { + int start = parser.begin(); + while( parser.noneOf(exclude) ) { + checkEscape(); + } + String match = parser.textFrom(start); + Spaces(); + return parser.success(match); + } + + private void checkEscape() { + if( parser.lastChar() == '\\' ) + parser.anyChar(); + } + + private void Spaces() { + while( Space() ); + } + + private boolean Space() { + return parser.anyOf(" \t\r\n"); + } + + + // sort + + private Sort parseSort() throws ParseException { + Spaces(); + if( parser.endOfInput() ) + return null; + List<SortField> list = new ArrayList<SortField>(); + list.add( SortField() ); + while( !parser.endOfInput() ) { + parser.begin(); + if( !parser.match(',') ) + throw exception("',' expected"); + Spaces(); + parser.success(); + list.add( SortField() ); + } + return new Sort(list.toArray(new SortField[0])); + } + + private SortField SortField() throws ParseException { + parser.begin(); + String field = Field(); + if( field==null ) + throw exception("invalid input"); + boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); + Spaces(); + SortField sf = fieldParser.getSortField(this,field,reverse); + return parser.success(sf); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/StringFieldParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,113 @@ +package luan.lib.queryparser; + +import java.io.StringReader; +import java.io.IOException; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.SortField; +import org.apache.lucene.index.Term; +import luan.lib.parser.ParseException; + + +public class StringFieldParser implements FieldParser { + public int slop = 0; + public final Analyzer analyzer; + + public StringFieldParser(Analyzer analyzer) { + this.analyzer = analyzer; + } + + @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + String wildcard = wildcard(qp,query); + if( wildcard != null ) + return new WildcardQuery(new Term(field,wildcard)); + if( query.endsWith("*") && !query.endsWith("\\*") ) + return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); + query = escape(qp,query); + PhraseQuery pq = new PhraseQuery(); + try { + TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); + CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + int pos = -1; + while( ts.incrementToken() ) { + pos += posAttr.getPositionIncrement(); + pq.add( new Term(field,termAttr.toString()), pos ); + } + ts.end(); + ts.close(); + } catch(IOException e) { + throw new RuntimeException(e); + } + Term[] terms = pq.getTerms(); + if( terms.length==1 && pq.getPositions()[0]==0 ) + return new TermQuery(terms[0]); + return pq; + } + + @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + minQuery = escape(qp,minQuery); + maxQuery = escape(qp,maxQuery); + return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); + } + + static String escape(SaneQueryParser qp,String s) throws ParseException { + final char[] a = s.toCharArray(); + int i, n; + if( a[0] == '"' ) { + if( a[a.length-1] != '"' ) throw new RuntimeException(); + i = 1; + n = a.length - 1; + } else { + i = 0; + n = a.length; + } + StringBuilder sb = new StringBuilder(); + for( ; i<n; i++ ) { + char c = a[i]; + if( c == '\\' ) { + if( ++i == a.length ) + throw qp.exception("ends with '\\'"); + c = a[i]; + } + sb.append(c); + } + return sb.toString(); + } + + private static String wildcard(SaneQueryParser qp,String s) throws ParseException { + final char[] a = s.toCharArray(); + if( a[0] == '"' ) + return null; + boolean hasWildcard = false; + StringBuilder sb = new StringBuilder(); + for( int i=0; i<a.length; i++ ) { + char c = a[i]; + if( c=='?' || c=='*' && i<a.length-1 ) + hasWildcard = true; + if( c == '\\' ) { + if( ++i == a.length ) + throw qp.exception("ends with '\\'"); + c = a[i]; + if( c=='?' || c=='*' ) + sb.append('\\'); + } + sb.append(c); + } + return hasWildcard ? sb.toString() : null; + } + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { + return new SortField( field, SortField.Type.STRING, reverse ); + } + +}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/lib/queryparser/SynonymParser.java Mon Feb 25 07:00:55 2019 -0700 @@ -0,0 +1,43 @@ +package luan.lib.queryparser; + +import java.util.Map; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.SortField; +import luan.lib.parser.ParseException; + + +public class SynonymParser implements FieldParser { + private final FieldParser fp; + private final Map<String,String[]> synonymMap; + + public SynonymParser(FieldParser fp,Map<String,String[]> synonymMap) { + this.fp = fp; + this.synonymMap = synonymMap; + } + + protected String[] getSynonyms(String query) { + return synonymMap.get(query); + } + + public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + String[] synonyms = getSynonyms(query); + if( synonyms == null ) + return fp.getQuery(qp,field,query); + BooleanQuery bq = new BooleanQuery(); + bq.add( fp.getQuery(qp,field,query), BooleanClause.Occur.SHOULD ); + for( String s : synonyms ) { + bq.add( fp.getQuery(qp,field,s), BooleanClause.Occur.SHOULD ); + } + return bq; + } + + public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + } + + public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { + return fp.getSortField(qp,field,reverse); + } +}
--- a/src/luan/modules/lucene/Lucene.luan Fri Feb 22 10:12:05 2019 -0700 +++ b/src/luan/modules/lucene/Lucene.luan Mon Feb 25 07:00:55 2019 -0700 @@ -11,9 +11,9 @@ local matches = String.matches or error() local Rpc = require "luan:Rpc.luan" local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" -local NumberFieldParser = require "java:luan.modules.lucene.queryparser.NumberFieldParser" -local StringFieldParser = require "java:luan.modules.lucene.queryparser.StringFieldParser" -local SaneQueryParser = require "java:luan.modules.lucene.queryparser.SaneQueryParser" +local NumberFieldParser = require "java:luan.lib.queryparser.NumberFieldParser" +local StringFieldParser = require "java:luan.lib.queryparser.StringFieldParser" +local SaneQueryParser = require "java:luan.lib.queryparser.SaneQueryParser" local Version = require "java:org.apache.lucene.util.Version" local EnglishAnalyzer = require "java:org.apache.lucene.analysis.en.EnglishAnalyzer"
--- a/src/luan/modules/lucene/LuceneIndex.java Fri Feb 22 10:12:05 2019 -0700 +++ b/src/luan/modules/lucene/LuceneIndex.java Mon Feb 25 07:00:55 2019 -0700 @@ -62,11 +62,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.TokenGroup; -import luan.modules.lucene.queryparser.SaneQueryParser; -import luan.modules.lucene.queryparser.FieldParser; -import luan.modules.lucene.queryparser.MultiFieldParser; -import luan.modules.lucene.queryparser.StringFieldParser; -import luan.modules.lucene.queryparser.NumberFieldParser; +import luan.lib.queryparser.SaneQueryParser; +import luan.lib.queryparser.FieldParser; +import luan.lib.queryparser.MultiFieldParser; +import luan.lib.queryparser.StringFieldParser; +import luan.lib.queryparser.NumberFieldParser; import luan.lib.parser.ParseException; import luan.modules.Utils; import luan.Luan;
--- a/src/luan/modules/lucene/queryparser/FieldParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -package luan.modules.lucene.queryparser; - -import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; -import luan.lib.parser.ParseException; - - -public interface FieldParser { - public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException; - public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException; - public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException; -}
--- a/src/luan/modules/lucene/queryparser/MultiFieldParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -package luan.modules.lucene.queryparser; - -import java.util.Map; -import java.util.HashMap; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.SortField; -import luan.lib.parser.ParseException; - - -public class MultiFieldParser implements FieldParser { - - /** - * maps field name to FieldParser - */ - public final Map<String,FieldParser> fields = new HashMap<String,FieldParser>(); - public boolean allowUnspecifiedFields = false; - private final FieldParser defaultFieldParser; - private final String[] defaultFields; - - public MultiFieldParser() { - this.defaultFieldParser = null; - this.defaultFields = null; - } - - public MultiFieldParser(FieldParser defaultFieldParser,String... defaultFields) { - this.defaultFieldParser = defaultFieldParser; - this.defaultFields = defaultFields; - for( String field : defaultFields ) { - fields.put(field,defaultFieldParser); - } - } - - @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - if( field == null ) { - if( defaultFieldParser == null ) - throw qp.exception("no defaults were specified, so a field is required"); - if( defaultFields.length == 1 ) - return defaultFieldParser.getQuery(qp,defaultFields[0],query); - BooleanQuery bq = new BooleanQuery(); - for( String f : defaultFields ) { - bq.add( defaultFieldParser.getQuery(qp,f,query), BooleanClause.Occur.SHOULD ); - } - return bq; - } else { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getQuery(qp,field,query); - if( allowUnspecifiedFields ) - return defaultFieldParser.getQuery(qp,field,query); - throw qp.exception("unrecognized field '"+field+"'"); - } - } - - @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - if( field == null ) { - if( defaultFieldParser == null ) - throw qp.exception("no defaults were specified, so a field is required"); - if( defaultFields.length == 1 ) - return defaultFieldParser.getRangeQuery(qp,defaultFields[0],minQuery,maxQuery,includeMin,includeMax); - BooleanQuery bq = new BooleanQuery(); - for( String f : defaultFields ) { - bq.add( defaultFieldParser.getRangeQuery(qp,f,minQuery,maxQuery,includeMin,includeMax), BooleanClause.Occur.SHOULD ); - } - return bq; - } else { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - if( allowUnspecifiedFields ) - return defaultFieldParser.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - throw qp.exception("field '"+field+"' not specified"); - } - } - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getSortField(qp,field,reverse); - if( allowUnspecifiedFields ) - return defaultFieldParser.getSortField(qp,field,reverse); - throw qp.exception("field '"+field+"' not specified"); - } - -}
--- a/src/luan/modules/lucene/queryparser/NumberFieldParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,84 +0,0 @@ -package luan.modules.lucene.queryparser; - -import org.apache.lucene.search.Query; -import org.apache.lucene.search.NumericRangeQuery; -import org.apache.lucene.search.SortField; -import luan.lib.parser.ParseException; - - -public abstract class NumberFieldParser implements FieldParser { - - @Override public final Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - return getRangeQuery(qp,field,query,query,true,true); - } - - @Override public final Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - try { - return getRangeQuery(field,minQuery,maxQuery,includeMin,includeMax); - } catch(NumberFormatException e) { - throw qp.exception(e); - } - } - - abstract protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax); - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { - return new SortField( field, sortType(), reverse ); - } - - abstract protected SortField.Type sortType(); - - - public static final FieldParser INT = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - int min = Integer.parseInt(minQuery); - int max = Integer.parseInt(maxQuery); - return NumericRangeQuery.newIntRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.INT; - } - }; - - public static final FieldParser LONG = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - long min = Long.parseLong(minQuery); - long max = Long.parseLong(maxQuery); - return NumericRangeQuery.newLongRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.LONG; - } - }; - - public static final FieldParser FLOAT = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - float min = Float.parseFloat(minQuery); - float max = Float.parseFloat(maxQuery); - return NumericRangeQuery.newFloatRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.FLOAT; - } - }; - - public static final FieldParser DOUBLE = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - double min = Double.parseDouble(minQuery); - double max = Double.parseDouble(maxQuery); - return NumericRangeQuery.newDoubleRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.DOUBLE; - } - }; - -}
--- a/src/luan/modules/lucene/queryparser/SaneQueryParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,260 +0,0 @@ -package luan.modules.lucene.queryparser; - -import java.util.List; -import java.util.ArrayList; -import java.util.regex.Pattern; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import luan.lib.parser.Parser; -import luan.lib.parser.ParseException; - - -public class SaneQueryParser { - - public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { - return new SaneQueryParser(fieldParser,query).parseQuery(); - } - - private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]"); - - public static String literal(String s) { - return specialChar.matcher(s).replaceAll("\\\\$0"); - } - - public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { - return new SaneQueryParser(fieldParser,sort).parseSort(); - } - - - private static final String NOT_IN_TERM = " \t\r\n\":[]{}^+-()"; - private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; - private final FieldParser fieldParser; - private final Parser parser; - - private SaneQueryParser(FieldParser fieldParser,String query) { - this.fieldParser = fieldParser; - this.parser = new Parser(query); - parser.begin(); - } - - ParseException exception(String msg) { - parser.failure(); - return new ParseException(parser,msg); - } - - ParseException exception(Exception cause) { - parser.failure(); - return new ParseException(parser,cause); - } - - private Query parseQuery() throws ParseException { - Spaces(); - BooleanQuery bq = new BooleanQuery(); - while( !parser.endOfInput() ) { - bq.add( Term(null) ); - } - BooleanClause[] clauses = bq.getClauses(); - switch( clauses.length ) { - case 0: - return new MatchAllDocsQuery(); - case 1: - { - BooleanClause bc = clauses[0]; - if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) - return bc.getQuery(); - } - default: - return bq; - } - } - - private BooleanClause Term(String defaultField) throws ParseException { - BooleanClause.Occur occur; - if( parser.match('+') ) { - occur = BooleanClause.Occur.MUST; - Spaces(); - } else if( parser.match('-') ) { - occur = BooleanClause.Occur.MUST_NOT; - Spaces(); - } else { - occur = BooleanClause.Occur.SHOULD; - } - String field = QueryField(); - if( field == null ) - field = defaultField; - Query query = NestedTerm(field); - if( query == null ) - query = RangeTerm(field); - if( query == null ) { - parser.begin(); - String match = SimpleTerm(); - query = fieldParser.getQuery(this,field,match); - parser.success(); - } - if( parser.match('^') ) { - Spaces(); - int start = parser.begin(); - try { - while( parser.anyOf("0123456789.") ); - String match = parser.textFrom(start); - float boost = Float.parseFloat(match); - query.setBoost(boost); - } catch(NumberFormatException e) { - throw exception(e); - } - parser.success(); - Spaces(); - } - BooleanClause bc = new BooleanClause(query,occur); - return bc; - } - - private Query NestedTerm(String field) throws ParseException { - parser.begin(); - if( !parser.match('(') ) - return parser.failure(null); - BooleanQuery bq = new BooleanQuery(); - while( !parser.match(')') ) { - if( parser.endOfInput() ) - throw exception("unclosed parentheses"); - bq.add( Term(field) ); - } - Spaces(); - BooleanClause[] clauses = bq.getClauses(); - switch( clauses.length ) { - case 0: - throw exception("empty parentheses"); - case 1: - { - BooleanClause bc = clauses[0]; - if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) - return parser.success(bc.getQuery()); - } - default: - return parser.success(bq); - } - } - - private Query RangeTerm(String field) throws ParseException { - parser.begin(); - if( !parser.anyOf("[{") ) - return parser.failure(null); - boolean includeMin = parser.lastChar() == '['; - Spaces(); - String minQuery = SimpleTerm(); - TO(); - String maxQuery = SimpleTerm(); - if( !parser.anyOf("]}") ) - throw exception("unclosed range"); - boolean includeMax = parser.lastChar() == ']'; - Spaces(); - Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); - return parser.success(query); - } - - private void TO() throws ParseException { - parser.begin(); - if( !(parser.match("TO") && Space()) ) - throw exception("'TO' expected"); - Spaces(); - parser.success(); - } - - private String SimpleTerm() throws ParseException { - parser.begin(); - String match; - if( parser.match('"') ) { - int start = parser.currentIndex() - 1; - while( !parser.match('"') ) { - if( parser.endOfInput() ) - throw exception("unclosed quotes"); - parser.anyChar(); - checkEscape(); - } - match = parser.textFrom(start); - Spaces(); - } else { - match = Unquoted(NOT_IN_TERM); - } - if( match.length() == 0 ) - throw exception("invalid input"); - return parser.success(match); - } - - private String QueryField() throws ParseException { - parser.begin(); - String match = Field(); - if( match==null || !parser.match(':') ) - return parser.failure((String)null); - Spaces(); - return parser.success(match); - } - - private String Field() throws ParseException { - parser.begin(); - String match = Unquoted(NOT_IN_FIELD); - if( match.length()==0 ) - return parser.failure((String)null); - match = StringFieldParser.escape(this,match); - return parser.success(match); - } - - private String Unquoted(String exclude) throws ParseException { - int start = parser.begin(); - while( parser.noneOf(exclude) ) { - checkEscape(); - } - String match = parser.textFrom(start); - Spaces(); - return parser.success(match); - } - - private void checkEscape() { - if( parser.lastChar() == '\\' ) - parser.anyChar(); - } - - private void Spaces() { - while( Space() ); - } - - private boolean Space() { - return parser.anyOf(" \t\r\n"); - } - - - // sort - - private Sort parseSort() throws ParseException { - Spaces(); - if( parser.endOfInput() ) - return null; - List<SortField> list = new ArrayList<SortField>(); - list.add( SortField() ); - while( !parser.endOfInput() ) { - parser.begin(); - if( !parser.match(',') ) - throw exception("',' expected"); - Spaces(); - parser.success(); - list.add( SortField() ); - } - return new Sort(list.toArray(new SortField[0])); - } - - private SortField SortField() throws ParseException { - parser.begin(); - String field = Field(); - if( field==null ) - throw exception("invalid input"); - boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); - Spaces(); - SortField sf = fieldParser.getSortField(this,field,reverse); - return parser.success(sf); - } - -}
--- a/src/luan/modules/lucene/queryparser/StringFieldParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -package luan.modules.lucene.queryparser; - -import java.io.StringReader; -import java.io.IOException; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.SortField; -import org.apache.lucene.index.Term; -import luan.lib.parser.ParseException; - - -public class StringFieldParser implements FieldParser { - public int slop = 0; - public final Analyzer analyzer; - - public StringFieldParser(Analyzer analyzer) { - this.analyzer = analyzer; - } - - @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - String wildcard = wildcard(qp,query); - if( wildcard != null ) - return new WildcardQuery(new Term(field,wildcard)); - if( query.endsWith("*") && !query.endsWith("\\*") ) - return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); - query = escape(qp,query); - PhraseQuery pq = new PhraseQuery(); - try { - TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); - CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); - ts.reset(); - int pos = -1; - while( ts.incrementToken() ) { - pos += posAttr.getPositionIncrement(); - pq.add( new Term(field,termAttr.toString()), pos ); - } - ts.end(); - ts.close(); - } catch(IOException e) { - throw new RuntimeException(e); - } - Term[] terms = pq.getTerms(); - if( terms.length==1 && pq.getPositions()[0]==0 ) - return new TermQuery(terms[0]); - return pq; - } - - @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - minQuery = escape(qp,minQuery); - maxQuery = escape(qp,maxQuery); - return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); - } - - static String escape(SaneQueryParser qp,String s) throws ParseException { - final char[] a = s.toCharArray(); - int i, n; - if( a[0] == '"' ) { - if( a[a.length-1] != '"' ) throw new RuntimeException(); - i = 1; - n = a.length - 1; - } else { - i = 0; - n = a.length; - } - StringBuilder sb = new StringBuilder(); - for( ; i<n; i++ ) { - char c = a[i]; - if( c == '\\' ) { - if( ++i == a.length ) - throw qp.exception("ends with '\\'"); - c = a[i]; - } - sb.append(c); - } - return sb.toString(); - } - - private static String wildcard(SaneQueryParser qp,String s) throws ParseException { - final char[] a = s.toCharArray(); - if( a[0] == '"' ) - return null; - boolean hasWildcard = false; - StringBuilder sb = new StringBuilder(); - for( int i=0; i<a.length; i++ ) { - char c = a[i]; - if( c=='?' || c=='*' && i<a.length-1 ) - hasWildcard = true; - if( c == '\\' ) { - if( ++i == a.length ) - throw qp.exception("ends with '\\'"); - c = a[i]; - if( c=='?' || c=='*' ) - sb.append('\\'); - } - sb.append(c); - } - return hasWildcard ? sb.toString() : null; - } - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { - return new SortField( field, SortField.Type.STRING, reverse ); - } - -}
--- a/src/luan/modules/lucene/queryparser/SynonymParser.java Fri Feb 22 10:12:05 2019 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -package luan.modules.lucene.queryparser; - -import java.util.Map; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.SortField; -import luan.lib.parser.ParseException; - - -public class SynonymParser implements FieldParser { - private final FieldParser fp; - private final Map<String,String[]> synonymMap; - - public SynonymParser(FieldParser fp,Map<String,String[]> synonymMap) { - this.fp = fp; - this.synonymMap = synonymMap; - } - - protected String[] getSynonyms(String query) { - return synonymMap.get(query); - } - - public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - String[] synonyms = getSynonyms(query); - if( synonyms == null ) - return fp.getQuery(qp,field,query); - BooleanQuery bq = new BooleanQuery(); - bq.add( fp.getQuery(qp,field,query), BooleanClause.Occur.SHOULD ); - for( String s : synonyms ) { - bq.add( fp.getQuery(qp,field,s), BooleanClause.Occur.SHOULD ); - } - return bq; - } - - public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - } - - public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { - return fp.getSortField(qp,field,reverse); - } -}