Mercurial Hosting > luan
changeset 1458:6b6c11c9164e
goodjava.lucene
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 20 Mar 2020 10:58:53 -0600 |
parents | a84ce37f3892 |
children | b04b8fc5f4f4 |
files | src/goodjava/lucene/queryparser/FieldParser.java src/goodjava/lucene/queryparser/MultiFieldParser.java src/goodjava/lucene/queryparser/NumberFieldParser.java src/goodjava/lucene/queryparser/SaneQueryParser.java src/goodjava/lucene/queryparser/StringFieldParser.java src/goodjava/lucene/queryparser/SynonymParser.java src/goodjava/queryparser/FieldParser.java src/goodjava/queryparser/MultiFieldParser.java src/goodjava/queryparser/NumberFieldParser.java src/goodjava/queryparser/SaneQueryParser.java src/goodjava/queryparser/StringFieldParser.java src/goodjava/queryparser/SynonymParser.java src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/LuceneIndex.java |
diffstat | 14 files changed, 665 insertions(+), 665 deletions(-) [+] |
line wrap: on
line diff
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/FieldParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/FieldParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,12 @@ +package goodjava.lucene.queryparser; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.SortField; +import goodjava.parser.ParseException; + + +public interface FieldParser { + public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException; + public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException; + public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException; +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/MultiFieldParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/MultiFieldParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,86 @@ +package goodjava.lucene.queryparser; + +import java.util.Map; +import java.util.HashMap; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.SortField; +import goodjava.parser.ParseException; + + +public class MultiFieldParser implements FieldParser { + + /** + * maps field name to FieldParser + */ + public final Map<String,FieldParser> fields = new HashMap<String,FieldParser>(); + public boolean allowUnspecifiedFields = false; + private final FieldParser defaultFieldParser; + private final String[] defaultFields; + + public MultiFieldParser() { + this.defaultFieldParser = null; + this.defaultFields = null; + } + + public MultiFieldParser(FieldParser defaultFieldParser,String... defaultFields) { + this.defaultFieldParser = defaultFieldParser; + this.defaultFields = defaultFields; + for( String field : defaultFields ) { + fields.put(field,defaultFieldParser); + } + } + + @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + if( field == null ) { + if( defaultFieldParser == null ) + throw qp.exception("no defaults were specified, so a field is required"); + if( defaultFields.length == 1 ) + return defaultFieldParser.getQuery(qp,defaultFields[0],query); + BooleanQuery bq = new BooleanQuery(); + for( String f : defaultFields ) { + bq.add( defaultFieldParser.getQuery(qp,f,query), BooleanClause.Occur.SHOULD ); + } + return bq; + } else { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getQuery(qp,field,query); + if( allowUnspecifiedFields ) + return defaultFieldParser.getQuery(qp,field,query); + throw qp.exception("unrecognized field '"+field+"'"); + } + } + + @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + if( field == null ) { + if( defaultFieldParser == null ) + throw qp.exception("no defaults were specified, so a field is required"); + if( defaultFields.length == 1 ) + return defaultFieldParser.getRangeQuery(qp,defaultFields[0],minQuery,maxQuery,includeMin,includeMax); + BooleanQuery bq = new BooleanQuery(); + for( String f : defaultFields ) { + bq.add( defaultFieldParser.getRangeQuery(qp,f,minQuery,maxQuery,includeMin,includeMax), BooleanClause.Occur.SHOULD ); + } + return bq; + } else { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + if( allowUnspecifiedFields ) + return defaultFieldParser.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + throw qp.exception("field '"+field+"' not specified"); + } + } + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { + FieldParser fp = fields.get(field); + if( fp != null ) + return fp.getSortField(qp,field,reverse); + if( allowUnspecifiedFields ) + return defaultFieldParser.getSortField(qp,field,reverse); + throw qp.exception("field '"+field+"' not specified"); + } + +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/NumberFieldParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/NumberFieldParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,88 @@ +package goodjava.lucene.queryparser; + +import org.apache.lucene.search.Query; +import org.apache.lucene.search.NumericRangeQuery; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.index.Term; +import goodjava.parser.ParseException; + + +public abstract class NumberFieldParser implements FieldParser { + + @Override public final Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + if( query.equals("*") ) + return new PrefixQuery(new Term(field,"")); + return getRangeQuery(qp,field,query,query,true,true); + } + + @Override public final Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + try { + return getRangeQuery(field,minQuery,maxQuery,includeMin,includeMax); + } catch(NumberFormatException e) { + throw qp.exception(e); + } + } + + abstract protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax); + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { + return new SortField( field, sortType(), reverse ); + } + + abstract protected SortField.Type sortType(); + + + public static final FieldParser INT = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + Integer min = minQuery.equals("*") ? null : Integer.valueOf(minQuery); + Integer max = maxQuery.equals("*") ? null : Integer.valueOf(maxQuery); + return NumericRangeQuery.newIntRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.INT; + } + }; + + public static final FieldParser LONG = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + Long min = minQuery.equals("*") ? null : Long.valueOf(minQuery); + Long max = maxQuery.equals("*") ? null : Long.valueOf(maxQuery); + return NumericRangeQuery.newLongRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.LONG; + } + }; + + public static final FieldParser FLOAT = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + Float min = minQuery.equals("*") ? null : Float.valueOf(minQuery); + Float max = maxQuery.equals("*") ? null : Float.valueOf(maxQuery); + return NumericRangeQuery.newFloatRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.FLOAT; + } + }; + + public static final FieldParser DOUBLE = new NumberFieldParser() { + + @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { + Double min = minQuery.equals("*") ? null : Double.valueOf(minQuery); + Double max = maxQuery.equals("*") ? null : Double.valueOf(maxQuery); + return NumericRangeQuery.newDoubleRange(field,min,max,includeMin,includeMax); + } + + @Override protected SortField.Type sortType() { + return SortField.Type.DOUBLE; + } + }; + +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/SaneQueryParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/SaneQueryParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,316 @@ +package goodjava.lucene.queryparser; + +import java.util.List; +import java.util.ArrayList; +import java.util.regex.Pattern; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import goodjava.parser.Parser; +import goodjava.parser.ParseException; + + +public class SaneQueryParser { + + public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { + return new SaneQueryParser(fieldParser,query).parseQuery(); + } + + public static String quote(String s) { + s = s.replace("\\","\\\\"); + s = s.replace("\b","\\b"); + s = s.replace("\f","\\f"); + s = s.replace("\n","\\n"); + s = s.replace("\r","\\r"); + s = s.replace("\t","\\t"); + s = s.replace("\"","\\\""); + return "\""+s+"\""; + } + + public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { + return new SaneQueryParser(fieldParser,sort).parseSort(); + } + + + private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; + private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; + private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; + private final FieldParser fieldParser; + private final Parser parser; + + private SaneQueryParser(FieldParser fieldParser,String query) { + this.fieldParser = fieldParser; + this.parser = new Parser(query); + parser.begin(); + } + + ParseException exception(String msg) { + parser.failure(); + return new ParseException(parser,msg); + } + + ParseException exception(Exception cause) { + parser.failure(); + return new ParseException(parser,cause); + } + + private Query parseQuery() throws ParseException { + Spaces(); + BooleanQuery bq = new BooleanQuery(); + while( !parser.endOfInput() ) { + bq.add( Term(null) ); + } + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + return new MatchAllDocsQuery(); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return bc.getQuery(); + } + default: + return bq; + } + } + + private BooleanClause Term(String defaultField) throws ParseException { + BooleanClause.Occur occur; + if( parser.match('+') ) { + occur = BooleanClause.Occur.MUST; + Spaces(); + } else if( parser.match('-') ) { + occur = BooleanClause.Occur.MUST_NOT; + Spaces(); + } else { + occur = BooleanClause.Occur.SHOULD; + } + String field = QueryField(); + if( field == null ) + field = defaultField; + Query query = NestedTerm(field); + if( query == null ) + query = RangeTerm(field); + if( query == null ) { + parser.begin(); + String match = SimpleTerm(NOT_IN_TERM); + query = fieldParser.getQuery(this,field,match); + parser.success(); + } + if( parser.match('^') ) { + Spaces(); + int start = parser.begin(); + try { + while( parser.anyOf("0123456789.") ); + String match = parser.textFrom(start); + float boost = Float.parseFloat(match); + query.setBoost(boost); + } catch(NumberFormatException e) { + throw exception(e); + } + parser.success(); + Spaces(); + } + BooleanClause bc = new BooleanClause(query,occur); + return bc; + } + + private Query NestedTerm(String field) throws ParseException { + parser.begin(); + if( !parser.match('(') ) + return parser.failure(null); + BooleanQuery bq = new BooleanQuery(); + while( !parser.match(')') ) { + if( parser.endOfInput() ) + throw exception("unclosed parentheses"); + bq.add( Term(field) ); + } + Spaces(); + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + throw exception("empty parentheses"); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return parser.success(bc.getQuery()); + } + default: + return parser.success(bq); + } + } + + private Query RangeTerm(String field) throws ParseException { + parser.begin(); + if( !parser.anyOf("[{") ) + return parser.failure(null); + boolean includeMin = parser.lastChar() == '['; + Spaces(); + String minQuery = SimpleTerm(NOT_IN_RANGE); + TO(); + String maxQuery = SimpleTerm(NOT_IN_RANGE); + if( !parser.anyOf("]}") ) + throw exception("unclosed range"); + boolean includeMax = parser.lastChar() == ']'; + Spaces(); + Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); + return parser.success(query); + } + + private void TO() throws ParseException { + parser.begin(); + if( !(parser.match("TO") && Space()) ) + throw exception("'TO' expected"); + Spaces(); + parser.success(); + } + + private String SimpleTerm(String exclude) throws ParseException { + parser.begin(); + String match = Quoted(); + if( match==null ) + match = Unquoted(exclude); + if( match.length() == 0 ) + throw exception("invalid input"); + return parser.success(match); + } + + private String QueryField() throws ParseException { + parser.begin(); + String match = Field(); + if( match==null || !parser.match(':') ) + return parser.failure((String)null); + Spaces(); + return parser.success(match); + } + + private String Field() throws ParseException { + parser.begin(); + String match = Unquoted(NOT_IN_FIELD); + if( match.length()==0 ) + return parser.failure((String)null); + match = StringFieldParser.escape(this,match); + return parser.success(match); + } + + private String Quoted() throws ParseException { + parser.begin(); + if( !parser.match('"') ) + return parser.failure(null); + StringBuilder sb = new StringBuilder(); + while( parser.anyChar() ) { + char c = parser.lastChar(); + switch(c) { + case '"': + return parser.success(sb.toString()); + case '\\': + if( parser.anyChar() ) { + c = parser.lastChar(); + switch(c) { + case '"': + case '\\': + sb.append(c); + continue; + case 'b': + sb.append('\b'); + continue; + case 'f': + sb.append('\f'); + continue; + case 'n': + sb.append('\n'); + continue; + case 'r': + sb.append('\r'); + continue; + case 't': + sb.append('\t'); + continue; + case 'u': + int n = 0; + for( int i=0; i<4; i++ ) { + int d; + if( parser.inCharRange('0','9') ) { + d = parser.lastChar() - '0'; + } else if( parser.inCharRange('a','f') ) { + d = parser.lastChar() - 'a' + 10; + } else if( parser.inCharRange('A','F') ) { + d = parser.lastChar() - 'A' + 10; + } else { + throw exception("invalid hex digit"); + } + n = 16*n + d; + } + sb.append((char)n); + continue; + } + } + throw exception("invalid escape char"); + default: + sb.append(c); + } + } + parser.failure(); + throw exception("unclosed string"); + } + + private String Unquoted(String exclude) throws ParseException { + int start = parser.begin(); + while( parser.noneOf(exclude) ) { + checkEscape(); + } + String match = parser.textFrom(start); + Spaces(); + return parser.success(match); + } + + private void checkEscape() { + if( parser.lastChar() == '\\' ) + parser.anyChar(); + } + + private void Spaces() { + while( Space() ); + } + + private boolean Space() { + return parser.anyOf(" \t\r\n"); + } + + + // sort + + private Sort parseSort() throws ParseException { + Spaces(); + if( parser.endOfInput() ) + return null; + List<SortField> list = new ArrayList<SortField>(); + list.add( SortField() ); + while( !parser.endOfInput() ) { + parser.begin(); + if( !parser.match(',') ) + throw exception("',' expected"); + Spaces(); + parser.success(); + list.add( SortField() ); + } + return new Sort(list.toArray(new SortField[0])); + } + + private SortField SortField() throws ParseException { + parser.begin(); + String field = Field(); + if( field==null ) + throw exception("invalid input"); + boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); + Spaces(); + SortField sf = fieldParser.getSortField(this,field,reverse); + return parser.success(sf); + } + +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/StringFieldParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/StringFieldParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,113 @@ +package goodjava.lucene.queryparser; + +import java.io.StringReader; +import java.io.IOException; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TermRangeQuery; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.WildcardQuery; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.SortField; +import org.apache.lucene.index.Term; +import goodjava.parser.ParseException; + + +public class StringFieldParser implements FieldParser { + public int slop = 0; + public final Analyzer analyzer; + + public StringFieldParser(Analyzer analyzer) { + this.analyzer = analyzer; + } + + @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + String wildcard = wildcard(qp,query); + if( wildcard != null ) + return new WildcardQuery(new Term(field,wildcard)); + if( query.endsWith("*") && !query.endsWith("\\*") ) + return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); + query = escape(qp,query); + PhraseQuery pq = new PhraseQuery(); + try { + TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); + CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); + PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); + ts.reset(); + int pos = -1; + while( ts.incrementToken() ) { + pos += posAttr.getPositionIncrement(); + pq.add( new Term(field,termAttr.toString()), pos ); + } + ts.end(); + ts.close(); + } catch(IOException e) { + throw new RuntimeException(e); + } + Term[] terms = pq.getTerms(); + if( terms.length==1 && pq.getPositions()[0]==0 ) + return new TermQuery(terms[0]); + return pq; + } + + @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + minQuery = minQuery.equals("*") ? null : escape(qp,minQuery); + maxQuery = maxQuery.equals("*") ? null : escape(qp,maxQuery); + return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); + } + + static String escape(SaneQueryParser qp,String s) throws ParseException { + final char[] a = s.toCharArray(); + int i, n; + if( a[0] == '"' ) { + if( a[a.length-1] != '"' ) throw new RuntimeException(); + i = 1; + n = a.length - 1; + } else { + i = 0; + n = a.length; + } + StringBuilder sb = new StringBuilder(); + for( ; i<n; i++ ) { + char c = a[i]; + if( c == '\\' ) { + if( ++i == a.length ) + throw qp.exception("ends with '\\'"); + c = a[i]; + } + sb.append(c); + } + return sb.toString(); + } + + private static String wildcard(SaneQueryParser qp,String s) throws ParseException { + final char[] a = s.toCharArray(); + if( a[0] == '"' ) + return null; + boolean hasWildcard = false; + StringBuilder sb = new StringBuilder(); + for( int i=0; i<a.length; i++ ) { + char c = a[i]; + if( c=='?' || c=='*' && i<a.length-1 ) + hasWildcard = true; + if( c == '\\' ) { + if( ++i == a.length ) + throw qp.exception("ends with '\\'"); + c = a[i]; + if( c=='?' || c=='*' ) + sb.append('\\'); + } + sb.append(c); + } + return hasWildcard ? sb.toString() : null; + } + + @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { + return new SortField( field, SortField.Type.STRING, reverse ); + } + +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/lucene/queryparser/SynonymParser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/SynonymParser.java Fri Mar 20 10:58:53 2020 -0600 @@ -0,0 +1,43 @@ +package goodjava.lucene.queryparser; + +import java.util.Map; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.SortField; +import goodjava.parser.ParseException; + + +public class SynonymParser implements FieldParser { + private final FieldParser fp; + private final Map<String,String[]> synonymMap; + + public SynonymParser(FieldParser fp,Map<String,String[]> synonymMap) { + this.fp = fp; + this.synonymMap = synonymMap; + } + + protected String[] getSynonyms(String query) { + return synonymMap.get(query); + } + + public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { + String[] synonyms = getSynonyms(query); + if( synonyms == null ) + return fp.getQuery(qp,field,query); + BooleanQuery bq = new BooleanQuery(); + bq.add( fp.getQuery(qp,field,query), BooleanClause.Occur.SHOULD ); + for( String s : synonyms ) { + bq.add( fp.getQuery(qp,field,s), BooleanClause.Occur.SHOULD ); + } + return bq; + } + + public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { + return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); + } + + public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { + return fp.getSortField(qp,field,reverse); + } +}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/FieldParser.java --- a/src/goodjava/queryparser/FieldParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,12 +0,0 @@ -package goodjava.queryparser; - -import org.apache.lucene.search.Query; -import org.apache.lucene.search.SortField; -import goodjava.parser.ParseException; - - -public interface FieldParser { - public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException; - public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException; - public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException; -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/MultiFieldParser.java --- a/src/goodjava/queryparser/MultiFieldParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,86 +0,0 @@ -package goodjava.queryparser; - -import java.util.Map; -import java.util.HashMap; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.SortField; -import goodjava.parser.ParseException; - - -public class MultiFieldParser implements FieldParser { - - /** - * maps field name to FieldParser - */ - public final Map<String,FieldParser> fields = new HashMap<String,FieldParser>(); - public boolean allowUnspecifiedFields = false; - private final FieldParser defaultFieldParser; - private final String[] defaultFields; - - public MultiFieldParser() { - this.defaultFieldParser = null; - this.defaultFields = null; - } - - public MultiFieldParser(FieldParser defaultFieldParser,String... defaultFields) { - this.defaultFieldParser = defaultFieldParser; - this.defaultFields = defaultFields; - for( String field : defaultFields ) { - fields.put(field,defaultFieldParser); - } - } - - @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - if( field == null ) { - if( defaultFieldParser == null ) - throw qp.exception("no defaults were specified, so a field is required"); - if( defaultFields.length == 1 ) - return defaultFieldParser.getQuery(qp,defaultFields[0],query); - BooleanQuery bq = new BooleanQuery(); - for( String f : defaultFields ) { - bq.add( defaultFieldParser.getQuery(qp,f,query), BooleanClause.Occur.SHOULD ); - } - return bq; - } else { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getQuery(qp,field,query); - if( allowUnspecifiedFields ) - return defaultFieldParser.getQuery(qp,field,query); - throw qp.exception("unrecognized field '"+field+"'"); - } - } - - @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - if( field == null ) { - if( defaultFieldParser == null ) - throw qp.exception("no defaults were specified, so a field is required"); - if( defaultFields.length == 1 ) - return defaultFieldParser.getRangeQuery(qp,defaultFields[0],minQuery,maxQuery,includeMin,includeMax); - BooleanQuery bq = new BooleanQuery(); - for( String f : defaultFields ) { - bq.add( defaultFieldParser.getRangeQuery(qp,f,minQuery,maxQuery,includeMin,includeMax), BooleanClause.Occur.SHOULD ); - } - return bq; - } else { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - if( allowUnspecifiedFields ) - return defaultFieldParser.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - throw qp.exception("field '"+field+"' not specified"); - } - } - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { - FieldParser fp = fields.get(field); - if( fp != null ) - return fp.getSortField(qp,field,reverse); - if( allowUnspecifiedFields ) - return defaultFieldParser.getSortField(qp,field,reverse); - throw qp.exception("field '"+field+"' not specified"); - } - -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/NumberFieldParser.java --- a/src/goodjava/queryparser/NumberFieldParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,88 +0,0 @@ -package goodjava.queryparser; - -import org.apache.lucene.search.Query; -import org.apache.lucene.search.NumericRangeQuery; -import org.apache.lucene.search.SortField; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.index.Term; -import goodjava.parser.ParseException; - - -public abstract class NumberFieldParser implements FieldParser { - - @Override public final Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - if( query.equals("*") ) - return new PrefixQuery(new Term(field,"")); - return getRangeQuery(qp,field,query,query,true,true); - } - - @Override public final Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - try { - return getRangeQuery(field,minQuery,maxQuery,includeMin,includeMax); - } catch(NumberFormatException e) { - throw qp.exception(e); - } - } - - abstract protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax); - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { - return new SortField( field, sortType(), reverse ); - } - - abstract protected SortField.Type sortType(); - - - public static final FieldParser INT = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - Integer min = minQuery.equals("*") ? null : Integer.valueOf(minQuery); - Integer max = maxQuery.equals("*") ? null : Integer.valueOf(maxQuery); - return NumericRangeQuery.newIntRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.INT; - } - }; - - public static final FieldParser LONG = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - Long min = minQuery.equals("*") ? null : Long.valueOf(minQuery); - Long max = maxQuery.equals("*") ? null : Long.valueOf(maxQuery); - return NumericRangeQuery.newLongRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.LONG; - } - }; - - public static final FieldParser FLOAT = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - Float min = minQuery.equals("*") ? null : Float.valueOf(minQuery); - Float max = maxQuery.equals("*") ? null : Float.valueOf(maxQuery); - return NumericRangeQuery.newFloatRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.FLOAT; - } - }; - - public static final FieldParser DOUBLE = new NumberFieldParser() { - - @Override protected Query getRangeQuery(String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) { - Double min = minQuery.equals("*") ? null : Double.valueOf(minQuery); - Double max = maxQuery.equals("*") ? null : Double.valueOf(maxQuery); - return NumericRangeQuery.newDoubleRange(field,min,max,includeMin,includeMax); - } - - @Override protected SortField.Type sortType() { - return SortField.Type.DOUBLE; - } - }; - -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/SaneQueryParser.java --- a/src/goodjava/queryparser/SaneQueryParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,316 +0,0 @@ -package goodjava.queryparser; - -import java.util.List; -import java.util.ArrayList; -import java.util.regex.Pattern; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.MatchAllDocsQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.Sort; -import org.apache.lucene.search.SortField; -import goodjava.parser.Parser; -import goodjava.parser.ParseException; - - -public class SaneQueryParser { - - public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { - return new SaneQueryParser(fieldParser,query).parseQuery(); - } - - public static String quote(String s) { - s = s.replace("\\","\\\\"); - s = s.replace("\b","\\b"); - s = s.replace("\f","\\f"); - s = s.replace("\n","\\n"); - s = s.replace("\r","\\r"); - s = s.replace("\t","\\t"); - s = s.replace("\"","\\\""); - return "\""+s+"\""; - } - - public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { - return new SaneQueryParser(fieldParser,sort).parseSort(); - } - - - private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; - private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; - private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; - private final FieldParser fieldParser; - private final Parser parser; - - private SaneQueryParser(FieldParser fieldParser,String query) { - this.fieldParser = fieldParser; - this.parser = new Parser(query); - parser.begin(); - } - - ParseException exception(String msg) { - parser.failure(); - return new ParseException(parser,msg); - } - - ParseException exception(Exception cause) { - parser.failure(); - return new ParseException(parser,cause); - } - - private Query parseQuery() throws ParseException { - Spaces(); - BooleanQuery bq = new BooleanQuery(); - while( !parser.endOfInput() ) { - bq.add( Term(null) ); - } - BooleanClause[] clauses = bq.getClauses(); - switch( clauses.length ) { - case 0: - return new MatchAllDocsQuery(); - case 1: - { - BooleanClause bc = clauses[0]; - if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) - return bc.getQuery(); - } - default: - return bq; - } - } - - private BooleanClause Term(String defaultField) throws ParseException { - BooleanClause.Occur occur; - if( parser.match('+') ) { - occur = BooleanClause.Occur.MUST; - Spaces(); - } else if( parser.match('-') ) { - occur = BooleanClause.Occur.MUST_NOT; - Spaces(); - } else { - occur = BooleanClause.Occur.SHOULD; - } - String field = QueryField(); - if( field == null ) - field = defaultField; - Query query = NestedTerm(field); - if( query == null ) - query = RangeTerm(field); - if( query == null ) { - parser.begin(); - String match = SimpleTerm(NOT_IN_TERM); - query = fieldParser.getQuery(this,field,match); - parser.success(); - } - if( parser.match('^') ) { - Spaces(); - int start = parser.begin(); - try { - while( parser.anyOf("0123456789.") ); - String match = parser.textFrom(start); - float boost = Float.parseFloat(match); - query.setBoost(boost); - } catch(NumberFormatException e) { - throw exception(e); - } - parser.success(); - Spaces(); - } - BooleanClause bc = new BooleanClause(query,occur); - return bc; - } - - private Query NestedTerm(String field) throws ParseException { - parser.begin(); - if( !parser.match('(') ) - return parser.failure(null); - BooleanQuery bq = new BooleanQuery(); - while( !parser.match(')') ) { - if( parser.endOfInput() ) - throw exception("unclosed parentheses"); - bq.add( Term(field) ); - } - Spaces(); - BooleanClause[] clauses = bq.getClauses(); - switch( clauses.length ) { - case 0: - throw exception("empty parentheses"); - case 1: - { - BooleanClause bc = clauses[0]; - if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) - return parser.success(bc.getQuery()); - } - default: - return parser.success(bq); - } - } - - private Query RangeTerm(String field) throws ParseException { - parser.begin(); - if( !parser.anyOf("[{") ) - return parser.failure(null); - boolean includeMin = parser.lastChar() == '['; - Spaces(); - String minQuery = SimpleTerm(NOT_IN_RANGE); - TO(); - String maxQuery = SimpleTerm(NOT_IN_RANGE); - if( !parser.anyOf("]}") ) - throw exception("unclosed range"); - boolean includeMax = parser.lastChar() == ']'; - Spaces(); - Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); - return parser.success(query); - } - - private void TO() throws ParseException { - parser.begin(); - if( !(parser.match("TO") && Space()) ) - throw exception("'TO' expected"); - Spaces(); - parser.success(); - } - - private String SimpleTerm(String exclude) throws ParseException { - parser.begin(); - String match = Quoted(); - if( match==null ) - match = Unquoted(exclude); - if( match.length() == 0 ) - throw exception("invalid input"); - return parser.success(match); - } - - private String QueryField() throws ParseException { - parser.begin(); - String match = Field(); - if( match==null || !parser.match(':') ) - return parser.failure((String)null); - Spaces(); - return parser.success(match); - } - - private String Field() throws ParseException { - parser.begin(); - String match = Unquoted(NOT_IN_FIELD); - if( match.length()==0 ) - return parser.failure((String)null); - match = StringFieldParser.escape(this,match); - return parser.success(match); - } - - private String Quoted() throws ParseException { - parser.begin(); - if( !parser.match('"') ) - return parser.failure(null); - StringBuilder sb = new StringBuilder(); - while( parser.anyChar() ) { - char c = parser.lastChar(); - switch(c) { - case '"': - return parser.success(sb.toString()); - case '\\': - if( parser.anyChar() ) { - c = parser.lastChar(); - switch(c) { - case '"': - case '\\': - sb.append(c); - continue; - case 'b': - sb.append('\b'); - continue; - case 'f': - sb.append('\f'); - continue; - case 'n': - sb.append('\n'); - continue; - case 'r': - sb.append('\r'); - continue; - case 't': - sb.append('\t'); - continue; - case 'u': - int n = 0; - for( int i=0; i<4; i++ ) { - int d; - if( parser.inCharRange('0','9') ) { - d = parser.lastChar() - '0'; - } else if( parser.inCharRange('a','f') ) { - d = parser.lastChar() - 'a' + 10; - } else if( parser.inCharRange('A','F') ) { - d = parser.lastChar() - 'A' + 10; - } else { - throw exception("invalid hex digit"); - } - n = 16*n + d; - } - sb.append((char)n); - continue; - } - } - throw exception("invalid escape char"); - default: - sb.append(c); - } - } - parser.failure(); - throw exception("unclosed string"); - } - - private String Unquoted(String exclude) throws ParseException { - int start = parser.begin(); - while( parser.noneOf(exclude) ) { - checkEscape(); - } - String match = parser.textFrom(start); - Spaces(); - return parser.success(match); - } - - private void checkEscape() { - if( parser.lastChar() == '\\' ) - parser.anyChar(); - } - - private void Spaces() { - while( Space() ); - } - - private boolean Space() { - return parser.anyOf(" \t\r\n"); - } - - - // sort - - private Sort parseSort() throws ParseException { - Spaces(); - if( parser.endOfInput() ) - return null; - List<SortField> list = new ArrayList<SortField>(); - list.add( SortField() ); - while( !parser.endOfInput() ) { - parser.begin(); - if( !parser.match(',') ) - throw exception("',' expected"); - Spaces(); - parser.success(); - list.add( SortField() ); - } - return new Sort(list.toArray(new SortField[0])); - } - - private SortField SortField() throws ParseException { - parser.begin(); - String field = Field(); - if( field==null ) - throw exception("invalid input"); - boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); - Spaces(); - SortField sf = fieldParser.getSortField(this,field,reverse); - return parser.success(sf); - } - -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/StringFieldParser.java --- a/src/goodjava/queryparser/StringFieldParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,113 +0,0 @@ -package goodjava.queryparser; - -import java.io.StringReader; -import java.io.IOException; -import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.analysis.TokenStream; -import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; -import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.TermRangeQuery; -import org.apache.lucene.search.PhraseQuery; -import org.apache.lucene.search.WildcardQuery; -import org.apache.lucene.search.PrefixQuery; -import org.apache.lucene.search.SortField; -import org.apache.lucene.index.Term; -import goodjava.parser.ParseException; - - -public class StringFieldParser implements FieldParser { - public int slop = 0; - public final Analyzer analyzer; - - public StringFieldParser(Analyzer analyzer) { - this.analyzer = analyzer; - } - - @Override public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - String wildcard = wildcard(qp,query); - if( wildcard != null ) - return new WildcardQuery(new Term(field,wildcard)); - if( query.endsWith("*") && !query.endsWith("\\*") ) - return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); - query = escape(qp,query); - PhraseQuery pq = new PhraseQuery(); - try { - TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); - CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); - PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); - ts.reset(); - int pos = -1; - while( ts.incrementToken() ) { - pos += posAttr.getPositionIncrement(); - pq.add( new Term(field,termAttr.toString()), pos ); - } - ts.end(); - ts.close(); - } catch(IOException e) { - throw new RuntimeException(e); - } - Term[] terms = pq.getTerms(); - if( terms.length==1 && pq.getPositions()[0]==0 ) - return new TermQuery(terms[0]); - return pq; - } - - @Override public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - minQuery = minQuery.equals("*") ? null : escape(qp,minQuery); - maxQuery = maxQuery.equals("*") ? null : escape(qp,maxQuery); - return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); - } - - static String escape(SaneQueryParser qp,String s) throws ParseException { - final char[] a = s.toCharArray(); - int i, n; - if( a[0] == '"' ) { - if( a[a.length-1] != '"' ) throw new RuntimeException(); - i = 1; - n = a.length - 1; - } else { - i = 0; - n = a.length; - } - StringBuilder sb = new StringBuilder(); - for( ; i<n; i++ ) { - char c = a[i]; - if( c == '\\' ) { - if( ++i == a.length ) - throw qp.exception("ends with '\\'"); - c = a[i]; - } - sb.append(c); - } - return sb.toString(); - } - - private static String wildcard(SaneQueryParser qp,String s) throws ParseException { - final char[] a = s.toCharArray(); - if( a[0] == '"' ) - return null; - boolean hasWildcard = false; - StringBuilder sb = new StringBuilder(); - for( int i=0; i<a.length; i++ ) { - char c = a[i]; - if( c=='?' || c=='*' && i<a.length-1 ) - hasWildcard = true; - if( c == '\\' ) { - if( ++i == a.length ) - throw qp.exception("ends with '\\'"); - c = a[i]; - if( c=='?' || c=='*' ) - sb.append('\\'); - } - sb.append(c); - } - return hasWildcard ? sb.toString() : null; - } - - @Override public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) { - return new SortField( field, SortField.Type.STRING, reverse ); - } - -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/goodjava/queryparser/SynonymParser.java --- a/src/goodjava/queryparser/SynonymParser.java Mon Mar 09 18:27:19 2020 -0600 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,43 +0,0 @@ -package goodjava.queryparser; - -import java.util.Map; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.BooleanQuery; -import org.apache.lucene.search.BooleanClause; -import org.apache.lucene.search.SortField; -import goodjava.parser.ParseException; - - -public class SynonymParser implements FieldParser { - private final FieldParser fp; - private final Map<String,String[]> synonymMap; - - public SynonymParser(FieldParser fp,Map<String,String[]> synonymMap) { - this.fp = fp; - this.synonymMap = synonymMap; - } - - protected String[] getSynonyms(String query) { - return synonymMap.get(query); - } - - public Query getQuery(SaneQueryParser qp,String field,String query) throws ParseException { - String[] synonyms = getSynonyms(query); - if( synonyms == null ) - return fp.getQuery(qp,field,query); - BooleanQuery bq = new BooleanQuery(); - bq.add( fp.getQuery(qp,field,query), BooleanClause.Occur.SHOULD ); - for( String s : synonyms ) { - bq.add( fp.getQuery(qp,field,s), BooleanClause.Occur.SHOULD ); - } - return bq; - } - - public Query getRangeQuery(SaneQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { - return fp.getRangeQuery(qp,field,minQuery,maxQuery,includeMin,includeMax); - } - - public SortField getSortField(SaneQueryParser qp,String field,boolean reverse) throws ParseException { - return fp.getSortField(qp,field,reverse); - } -}
diff -r a84ce37f3892 -r 6b6c11c9164e src/luan/modules/lucene/Lucene.luan --- a/src/luan/modules/lucene/Lucene.luan Mon Mar 09 18:27:19 2020 -0600 +++ b/src/luan/modules/lucene/Lucene.luan Fri Mar 20 10:58:53 2020 -0600 @@ -15,8 +15,8 @@ local matches = String.matches or error() local Rpc = require "luan:Rpc.luan" local LuceneIndex = require "java:luan.modules.lucene.LuceneIndex" -local NumberFieldParser = require "java:goodjava.queryparser.NumberFieldParser" -local SaneQueryParser = require "java:goodjava.queryparser.SaneQueryParser" +local NumberFieldParser = require "java:goodjava.lucene.queryparser.NumberFieldParser" +local SaneQueryParser = require "java:goodjava.lucene.queryparser.SaneQueryParser" local Logging = require "luan:logging/Logging.luan" local logger = Logging.logger "Lucene"
diff -r a84ce37f3892 -r 6b6c11c9164e src/luan/modules/lucene/LuceneIndex.java --- a/src/luan/modules/lucene/LuceneIndex.java Mon Mar 09 18:27:19 2020 -0600 +++ b/src/luan/modules/lucene/LuceneIndex.java Fri Mar 20 10:58:53 2020 -0600 @@ -68,11 +68,11 @@ import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.TokenGroup; -import goodjava.queryparser.SaneQueryParser; -import goodjava.queryparser.FieldParser; -import goodjava.queryparser.MultiFieldParser; -import goodjava.queryparser.StringFieldParser; -import goodjava.queryparser.NumberFieldParser; +import goodjava.lucene.queryparser.SaneQueryParser; +import goodjava.lucene.queryparser.FieldParser; +import goodjava.lucene.queryparser.MultiFieldParser; +import goodjava.lucene.queryparser.StringFieldParser; +import goodjava.lucene.queryparser.NumberFieldParser; import goodjava.parser.ParseException; import luan.modules.Utils; import luan.Luan;