Mercurial Hosting > luan
diff src/goodjava/lucene/queryparser/GoodQueryParser.java @ 1459:b04b8fc5f4f4
GoodQueryParser
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 20 Mar 2020 11:06:53 -0600 |
parents | src/goodjava/lucene/queryparser/SaneQueryParser.java@6b6c11c9164e |
children | b1195cfe8712 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/queryparser/GoodQueryParser.java Fri Mar 20 11:06:53 2020 -0600 @@ -0,0 +1,316 @@ +package goodjava.lucene.queryparser; + +import java.util.List; +import java.util.ArrayList; +import java.util.regex.Pattern; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import goodjava.parser.Parser; +import goodjava.parser.ParseException; + + +public class GoodQueryParser { + + public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { + return new GoodQueryParser(fieldParser,query).parseQuery(); + } + + public static String quote(String s) { + s = s.replace("\\","\\\\"); + s = s.replace("\b","\\b"); + s = s.replace("\f","\\f"); + s = s.replace("\n","\\n"); + s = s.replace("\r","\\r"); + s = s.replace("\t","\\t"); + s = s.replace("\"","\\\""); + return "\""+s+"\""; + } + + public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { + return new GoodQueryParser(fieldParser,sort).parseSort(); + } + + + private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; + private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; + private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; + private final FieldParser fieldParser; + private final Parser parser; + + private GoodQueryParser(FieldParser fieldParser,String query) { + this.fieldParser = fieldParser; + this.parser = new Parser(query); + parser.begin(); + } + + ParseException exception(String msg) { + parser.failure(); + return new ParseException(parser,msg); + } + + ParseException exception(Exception cause) { + parser.failure(); + return new ParseException(parser,cause); + } + + private Query parseQuery() throws ParseException { + Spaces(); + BooleanQuery bq = new BooleanQuery(); + while( !parser.endOfInput() ) { + bq.add( Term(null) ); + } + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + return new MatchAllDocsQuery(); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return bc.getQuery(); + } + default: + return bq; + } + } + + private BooleanClause Term(String defaultField) throws ParseException { + BooleanClause.Occur occur; + if( parser.match('+') ) { + occur = BooleanClause.Occur.MUST; + Spaces(); + } else if( parser.match('-') ) { + occur = BooleanClause.Occur.MUST_NOT; + Spaces(); + } else { + occur = BooleanClause.Occur.SHOULD; + } + String field = QueryField(); + if( field == null ) + field = defaultField; + Query query = NestedTerm(field); + if( query == null ) + query = RangeTerm(field); + if( query == null ) { + parser.begin(); + String match = SimpleTerm(NOT_IN_TERM); + query = fieldParser.getQuery(this,field,match); + parser.success(); + } + if( parser.match('^') ) { + Spaces(); + int start = parser.begin(); + try { + while( parser.anyOf("0123456789.") ); + String match = parser.textFrom(start); + float boost = Float.parseFloat(match); + query.setBoost(boost); + } catch(NumberFormatException e) { + throw exception(e); + } + parser.success(); + Spaces(); + } + BooleanClause bc = new BooleanClause(query,occur); + return bc; + } + + private Query NestedTerm(String field) throws ParseException { + parser.begin(); + if( !parser.match('(') ) + return parser.failure(null); + BooleanQuery bq = new BooleanQuery(); + while( !parser.match(')') ) { + if( parser.endOfInput() ) + throw exception("unclosed parentheses"); + bq.add( Term(field) ); + } + Spaces(); + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + throw exception("empty parentheses"); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return parser.success(bc.getQuery()); + } + default: + return parser.success(bq); + } + } + + private Query RangeTerm(String field) throws ParseException { + parser.begin(); + if( !parser.anyOf("[{") ) + return parser.failure(null); + boolean includeMin = parser.lastChar() == '['; + Spaces(); + String minQuery = SimpleTerm(NOT_IN_RANGE); + TO(); + String maxQuery = SimpleTerm(NOT_IN_RANGE); + if( !parser.anyOf("]}") ) + throw exception("unclosed range"); + boolean includeMax = parser.lastChar() == ']'; + Spaces(); + Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); + return parser.success(query); + } + + private void TO() throws ParseException { + parser.begin(); + if( !(parser.match("TO") && Space()) ) + throw exception("'TO' expected"); + Spaces(); + parser.success(); + } + + private String SimpleTerm(String exclude) throws ParseException { + parser.begin(); + String match = Quoted(); + if( match==null ) + match = Unquoted(exclude); + if( match.length() == 0 ) + throw exception("invalid input"); + return parser.success(match); + } + + private String QueryField() throws ParseException { + parser.begin(); + String match = Field(); + if( match==null || !parser.match(':') ) + return parser.failure((String)null); + Spaces(); + return parser.success(match); + } + + private String Field() throws ParseException { + parser.begin(); + String match = Unquoted(NOT_IN_FIELD); + if( match.length()==0 ) + return parser.failure((String)null); + match = StringFieldParser.escape(this,match); + return parser.success(match); + } + + private String Quoted() throws ParseException { + parser.begin(); + if( !parser.match('"') ) + return parser.failure(null); + StringBuilder sb = new StringBuilder(); + while( parser.anyChar() ) { + char c = parser.lastChar(); + switch(c) { + case '"': + return parser.success(sb.toString()); + case '\\': + if( parser.anyChar() ) { + c = parser.lastChar(); + switch(c) { + case '"': + case '\\': + sb.append(c); + continue; + case 'b': + sb.append('\b'); + continue; + case 'f': + sb.append('\f'); + continue; + case 'n': + sb.append('\n'); + continue; + case 'r': + sb.append('\r'); + continue; + case 't': + sb.append('\t'); + continue; + case 'u': + int n = 0; + for( int i=0; i<4; i++ ) { + int d; + if( parser.inCharRange('0','9') ) { + d = parser.lastChar() - '0'; + } else if( parser.inCharRange('a','f') ) { + d = parser.lastChar() - 'a' + 10; + } else if( parser.inCharRange('A','F') ) { + d = parser.lastChar() - 'A' + 10; + } else { + throw exception("invalid hex digit"); + } + n = 16*n + d; + } + sb.append((char)n); + continue; + } + } + throw exception("invalid escape char"); + default: + sb.append(c); + } + } + parser.failure(); + throw exception("unclosed string"); + } + + private String Unquoted(String exclude) throws ParseException { + int start = parser.begin(); + while( parser.noneOf(exclude) ) { + checkEscape(); + } + String match = parser.textFrom(start); + Spaces(); + return parser.success(match); + } + + private void checkEscape() { + if( parser.lastChar() == '\\' ) + parser.anyChar(); + } + + private void Spaces() { + while( Space() ); + } + + private boolean Space() { + return parser.anyOf(" \t\r\n"); + } + + + // sort + + private Sort parseSort() throws ParseException { + Spaces(); + if( parser.endOfInput() ) + return null; + List<SortField> list = new ArrayList<SortField>(); + list.add( SortField() ); + while( !parser.endOfInput() ) { + parser.begin(); + if( !parser.match(',') ) + throw exception("',' expected"); + Spaces(); + parser.success(); + list.add( SortField() ); + } + return new Sort(list.toArray(new SortField[0])); + } + + private SortField SortField() throws ParseException { + parser.begin(); + String field = Field(); + if( field==null ) + throw exception("invalid input"); + boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); + Spaces(); + SortField sf = fieldParser.getSortField(this,field,reverse); + return parser.success(sf); + } + +}