Mercurial Hosting > luan
comparison src/goodjava/queryparser/SaneQueryParser.java @ 1402:27efb1fcbcb5
move luan.lib to goodjava
| author | Franklin Schmidt <fschmidt@gmail.com> | 
|---|---|
| date | Tue, 17 Sep 2019 01:35:01 -0400 | 
| parents | src/luan/lib/queryparser/SaneQueryParser.java@77f2d091f17f | 
| children | e48290f3d9fb | 
   comparison
  equal
  deleted
  inserted
  replaced
| 1401:ef1620aa99cb | 1402:27efb1fcbcb5 | 
|---|---|
| 1 package goodjava.queryparser; | |
| 2 | |
| 3 import java.util.List; | |
| 4 import java.util.ArrayList; | |
| 5 import java.util.regex.Pattern; | |
| 6 import org.apache.lucene.search.Query; | |
| 7 import org.apache.lucene.search.MatchAllDocsQuery; | |
| 8 import org.apache.lucene.search.BooleanClause; | |
| 9 import org.apache.lucene.search.BooleanQuery; | |
| 10 import org.apache.lucene.search.Sort; | |
| 11 import org.apache.lucene.search.SortField; | |
| 12 import goodjava.parser.Parser; | |
| 13 import goodjava.parser.ParseException; | |
| 14 | |
| 15 | |
| 16 public class SaneQueryParser { | |
| 17 | |
| 18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { | |
| 19 return new SaneQueryParser(fieldParser,query).parseQuery(); | |
| 20 } | |
| 21 | |
| 22 private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]"); | |
| 23 | |
| 24 public static String literal(String s) { | |
| 25 return specialChar.matcher(s).replaceAll("\\\\$0"); | |
| 26 } | |
| 27 | |
| 28 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { | |
| 29 return new SaneQueryParser(fieldParser,sort).parseSort(); | |
| 30 } | |
| 31 | |
| 32 | |
| 33 private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; | |
| 34 private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; | |
| 35 private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; | |
| 36 private final FieldParser fieldParser; | |
| 37 private final Parser parser; | |
| 38 | |
| 39 private SaneQueryParser(FieldParser fieldParser,String query) { | |
| 40 this.fieldParser = fieldParser; | |
| 41 this.parser = new Parser(query); | |
| 42 parser.begin(); | |
| 43 } | |
| 44 | |
| 45 ParseException exception(String msg) { | |
| 46 parser.failure(); | |
| 47 return new ParseException(parser,msg); | |
| 48 } | |
| 49 | |
| 50 ParseException exception(Exception cause) { | |
| 51 parser.failure(); | |
| 52 return new ParseException(parser,cause); | |
| 53 } | |
| 54 | |
| 55 private Query parseQuery() throws ParseException { | |
| 56 Spaces(); | |
| 57 BooleanQuery bq = new BooleanQuery(); | |
| 58 while( !parser.endOfInput() ) { | |
| 59 bq.add( Term(null) ); | |
| 60 } | |
| 61 BooleanClause[] clauses = bq.getClauses(); | |
| 62 switch( clauses.length ) { | |
| 63 case 0: | |
| 64 return new MatchAllDocsQuery(); | |
| 65 case 1: | |
| 66 { | |
| 67 BooleanClause bc = clauses[0]; | |
| 68 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
| 69 return bc.getQuery(); | |
| 70 } | |
| 71 default: | |
| 72 return bq; | |
| 73 } | |
| 74 } | |
| 75 | |
| 76 private BooleanClause Term(String defaultField) throws ParseException { | |
| 77 BooleanClause.Occur occur; | |
| 78 if( parser.match('+') ) { | |
| 79 occur = BooleanClause.Occur.MUST; | |
| 80 Spaces(); | |
| 81 } else if( parser.match('-') ) { | |
| 82 occur = BooleanClause.Occur.MUST_NOT; | |
| 83 Spaces(); | |
| 84 } else { | |
| 85 occur = BooleanClause.Occur.SHOULD; | |
| 86 } | |
| 87 String field = QueryField(); | |
| 88 if( field == null ) | |
| 89 field = defaultField; | |
| 90 Query query = NestedTerm(field); | |
| 91 if( query == null ) | |
| 92 query = RangeTerm(field); | |
| 93 if( query == null ) { | |
| 94 parser.begin(); | |
| 95 String match = SimpleTerm(NOT_IN_TERM); | |
| 96 query = fieldParser.getQuery(this,field,match); | |
| 97 parser.success(); | |
| 98 } | |
| 99 if( parser.match('^') ) { | |
| 100 Spaces(); | |
| 101 int start = parser.begin(); | |
| 102 try { | |
| 103 while( parser.anyOf("0123456789.") ); | |
| 104 String match = parser.textFrom(start); | |
| 105 float boost = Float.parseFloat(match); | |
| 106 query.setBoost(boost); | |
| 107 } catch(NumberFormatException e) { | |
| 108 throw exception(e); | |
| 109 } | |
| 110 parser.success(); | |
| 111 Spaces(); | |
| 112 } | |
| 113 BooleanClause bc = new BooleanClause(query,occur); | |
| 114 return bc; | |
| 115 } | |
| 116 | |
| 117 private Query NestedTerm(String field) throws ParseException { | |
| 118 parser.begin(); | |
| 119 if( !parser.match('(') ) | |
| 120 return parser.failure(null); | |
| 121 BooleanQuery bq = new BooleanQuery(); | |
| 122 while( !parser.match(')') ) { | |
| 123 if( parser.endOfInput() ) | |
| 124 throw exception("unclosed parentheses"); | |
| 125 bq.add( Term(field) ); | |
| 126 } | |
| 127 Spaces(); | |
| 128 BooleanClause[] clauses = bq.getClauses(); | |
| 129 switch( clauses.length ) { | |
| 130 case 0: | |
| 131 throw exception("empty parentheses"); | |
| 132 case 1: | |
| 133 { | |
| 134 BooleanClause bc = clauses[0]; | |
| 135 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
| 136 return parser.success(bc.getQuery()); | |
| 137 } | |
| 138 default: | |
| 139 return parser.success(bq); | |
| 140 } | |
| 141 } | |
| 142 | |
| 143 private Query RangeTerm(String field) throws ParseException { | |
| 144 parser.begin(); | |
| 145 if( !parser.anyOf("[{") ) | |
| 146 return parser.failure(null); | |
| 147 boolean includeMin = parser.lastChar() == '['; | |
| 148 Spaces(); | |
| 149 String minQuery = SimpleTerm(NOT_IN_RANGE); | |
| 150 TO(); | |
| 151 String maxQuery = SimpleTerm(NOT_IN_RANGE); | |
| 152 if( !parser.anyOf("]}") ) | |
| 153 throw exception("unclosed range"); | |
| 154 boolean includeMax = parser.lastChar() == ']'; | |
| 155 Spaces(); | |
| 156 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); | |
| 157 return parser.success(query); | |
| 158 } | |
| 159 | |
| 160 private void TO() throws ParseException { | |
| 161 parser.begin(); | |
| 162 if( !(parser.match("TO") && Space()) ) | |
| 163 throw exception("'TO' expected"); | |
| 164 Spaces(); | |
| 165 parser.success(); | |
| 166 } | |
| 167 | |
| 168 private String SimpleTerm(String exclude) throws ParseException { | |
| 169 parser.begin(); | |
| 170 String match; | |
| 171 if( parser.match('"') ) { | |
| 172 int start = parser.currentIndex() - 1; | |
| 173 while( !parser.match('"') ) { | |
| 174 if( parser.endOfInput() ) | |
| 175 throw exception("unclosed quotes"); | |
| 176 parser.anyChar(); | |
| 177 checkEscape(); | |
| 178 } | |
| 179 match = parser.textFrom(start); | |
| 180 Spaces(); | |
| 181 } else { | |
| 182 match = Unquoted(exclude); | |
| 183 } | |
| 184 if( match.length() == 0 ) | |
| 185 throw exception("invalid input"); | |
| 186 return parser.success(match); | |
| 187 } | |
| 188 | |
| 189 private String QueryField() throws ParseException { | |
| 190 parser.begin(); | |
| 191 String match = Field(); | |
| 192 if( match==null || !parser.match(':') ) | |
| 193 return parser.failure((String)null); | |
| 194 Spaces(); | |
| 195 return parser.success(match); | |
| 196 } | |
| 197 | |
| 198 private String Field() throws ParseException { | |
| 199 parser.begin(); | |
| 200 String match = Unquoted(NOT_IN_FIELD); | |
| 201 if( match.length()==0 ) | |
| 202 return parser.failure((String)null); | |
| 203 match = StringFieldParser.escape(this,match); | |
| 204 return parser.success(match); | |
| 205 } | |
| 206 | |
| 207 private String Unquoted(String exclude) throws ParseException { | |
| 208 int start = parser.begin(); | |
| 209 while( parser.noneOf(exclude) ) { | |
| 210 checkEscape(); | |
| 211 } | |
| 212 String match = parser.textFrom(start); | |
| 213 Spaces(); | |
| 214 return parser.success(match); | |
| 215 } | |
| 216 | |
| 217 private void checkEscape() { | |
| 218 if( parser.lastChar() == '\\' ) | |
| 219 parser.anyChar(); | |
| 220 } | |
| 221 | |
| 222 private void Spaces() { | |
| 223 while( Space() ); | |
| 224 } | |
| 225 | |
| 226 private boolean Space() { | |
| 227 return parser.anyOf(" \t\r\n"); | |
| 228 } | |
| 229 | |
| 230 | |
| 231 // sort | |
| 232 | |
| 233 private Sort parseSort() throws ParseException { | |
| 234 Spaces(); | |
| 235 if( parser.endOfInput() ) | |
| 236 return null; | |
| 237 List<SortField> list = new ArrayList<SortField>(); | |
| 238 list.add( SortField() ); | |
| 239 while( !parser.endOfInput() ) { | |
| 240 parser.begin(); | |
| 241 if( !parser.match(',') ) | |
| 242 throw exception("',' expected"); | |
| 243 Spaces(); | |
| 244 parser.success(); | |
| 245 list.add( SortField() ); | |
| 246 } | |
| 247 return new Sort(list.toArray(new SortField[0])); | |
| 248 } | |
| 249 | |
| 250 private SortField SortField() throws ParseException { | |
| 251 parser.begin(); | |
| 252 String field = Field(); | |
| 253 if( field==null ) | |
| 254 throw exception("invalid input"); | |
| 255 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); | |
| 256 Spaces(); | |
| 257 SortField sf = fieldParser.getSortField(this,field,reverse); | |
| 258 return parser.success(sf); | |
| 259 } | |
| 260 | |
| 261 } | 
