Mercurial Hosting > luan
diff src/goodjava/queryparser/SaneQueryParser.java @ 1402:27efb1fcbcb5
move luan.lib to goodjava
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 17 Sep 2019 01:35:01 -0400 |
parents | src/luan/lib/queryparser/SaneQueryParser.java@77f2d091f17f |
children | e48290f3d9fb |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/queryparser/SaneQueryParser.java Tue Sep 17 01:35:01 2019 -0400 @@ -0,0 +1,261 @@ +package goodjava.queryparser; + +import java.util.List; +import java.util.ArrayList; +import java.util.regex.Pattern; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.MatchAllDocsQuery; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import goodjava.parser.Parser; +import goodjava.parser.ParseException; + + +public class SaneQueryParser { + + public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { + return new SaneQueryParser(fieldParser,query).parseQuery(); + } + + private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]"); + + public static String literal(String s) { + return specialChar.matcher(s).replaceAll("\\\\$0"); + } + + public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { + return new SaneQueryParser(fieldParser,sort).parseSort(); + } + + + private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; + private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; + private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; + private final FieldParser fieldParser; + private final Parser parser; + + private SaneQueryParser(FieldParser fieldParser,String query) { + this.fieldParser = fieldParser; + this.parser = new Parser(query); + parser.begin(); + } + + ParseException exception(String msg) { + parser.failure(); + return new ParseException(parser,msg); + } + + ParseException exception(Exception cause) { + parser.failure(); + return new ParseException(parser,cause); + } + + private Query parseQuery() throws ParseException { + Spaces(); + BooleanQuery bq = new BooleanQuery(); + while( !parser.endOfInput() ) { + bq.add( Term(null) ); + } + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + return new MatchAllDocsQuery(); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return bc.getQuery(); + } + default: + return bq; + } + } + + private BooleanClause Term(String defaultField) throws ParseException { + BooleanClause.Occur occur; + if( parser.match('+') ) { + occur = BooleanClause.Occur.MUST; + Spaces(); + } else if( parser.match('-') ) { + occur = BooleanClause.Occur.MUST_NOT; + Spaces(); + } else { + occur = BooleanClause.Occur.SHOULD; + } + String field = QueryField(); + if( field == null ) + field = defaultField; + Query query = NestedTerm(field); + if( query == null ) + query = RangeTerm(field); + if( query == null ) { + parser.begin(); + String match = SimpleTerm(NOT_IN_TERM); + query = fieldParser.getQuery(this,field,match); + parser.success(); + } + if( parser.match('^') ) { + Spaces(); + int start = parser.begin(); + try { + while( parser.anyOf("0123456789.") ); + String match = parser.textFrom(start); + float boost = Float.parseFloat(match); + query.setBoost(boost); + } catch(NumberFormatException e) { + throw exception(e); + } + parser.success(); + Spaces(); + } + BooleanClause bc = new BooleanClause(query,occur); + return bc; + } + + private Query NestedTerm(String field) throws ParseException { + parser.begin(); + if( !parser.match('(') ) + return parser.failure(null); + BooleanQuery bq = new BooleanQuery(); + while( !parser.match(')') ) { + if( parser.endOfInput() ) + throw exception("unclosed parentheses"); + bq.add( Term(field) ); + } + Spaces(); + BooleanClause[] clauses = bq.getClauses(); + switch( clauses.length ) { + case 0: + throw exception("empty parentheses"); + case 1: + { + BooleanClause bc = clauses[0]; + if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) + return parser.success(bc.getQuery()); + } + default: + return parser.success(bq); + } + } + + private Query RangeTerm(String field) throws ParseException { + parser.begin(); + if( !parser.anyOf("[{") ) + return parser.failure(null); + boolean includeMin = parser.lastChar() == '['; + Spaces(); + String minQuery = SimpleTerm(NOT_IN_RANGE); + TO(); + String maxQuery = SimpleTerm(NOT_IN_RANGE); + if( !parser.anyOf("]}") ) + throw exception("unclosed range"); + boolean includeMax = parser.lastChar() == ']'; + Spaces(); + Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); + return parser.success(query); + } + + private void TO() throws ParseException { + parser.begin(); + if( !(parser.match("TO") && Space()) ) + throw exception("'TO' expected"); + Spaces(); + parser.success(); + } + + private String SimpleTerm(String exclude) throws ParseException { + parser.begin(); + String match; + if( parser.match('"') ) { + int start = parser.currentIndex() - 1; + while( !parser.match('"') ) { + if( parser.endOfInput() ) + throw exception("unclosed quotes"); + parser.anyChar(); + checkEscape(); + } + match = parser.textFrom(start); + Spaces(); + } else { + match = Unquoted(exclude); + } + if( match.length() == 0 ) + throw exception("invalid input"); + return parser.success(match); + } + + private String QueryField() throws ParseException { + parser.begin(); + String match = Field(); + if( match==null || !parser.match(':') ) + return parser.failure((String)null); + Spaces(); + return parser.success(match); + } + + private String Field() throws ParseException { + parser.begin(); + String match = Unquoted(NOT_IN_FIELD); + if( match.length()==0 ) + return parser.failure((String)null); + match = StringFieldParser.escape(this,match); + return parser.success(match); + } + + private String Unquoted(String exclude) throws ParseException { + int start = parser.begin(); + while( parser.noneOf(exclude) ) { + checkEscape(); + } + String match = parser.textFrom(start); + Spaces(); + return parser.success(match); + } + + private void checkEscape() { + if( parser.lastChar() == '\\' ) + parser.anyChar(); + } + + private void Spaces() { + while( Space() ); + } + + private boolean Space() { + return parser.anyOf(" \t\r\n"); + } + + + // sort + + private Sort parseSort() throws ParseException { + Spaces(); + if( parser.endOfInput() ) + return null; + List<SortField> list = new ArrayList<SortField>(); + list.add( SortField() ); + while( !parser.endOfInput() ) { + parser.begin(); + if( !parser.match(',') ) + throw exception("',' expected"); + Spaces(); + parser.success(); + list.add( SortField() ); + } + return new Sort(list.toArray(new SortField[0])); + } + + private SortField SortField() throws ParseException { + parser.begin(); + String field = Field(); + if( field==null ) + throw exception("invalid input"); + boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); + Spaces(); + SortField sf = fieldParser.getSortField(this,field,reverse); + return parser.success(sf); + } + +}