Mercurial Hosting > luan
comparison src/goodjava/lucene/queryparser/GoodQueryParser.java @ 1459:b04b8fc5f4f4
GoodQueryParser
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Fri, 20 Mar 2020 11:06:53 -0600 |
| parents | src/goodjava/lucene/queryparser/SaneQueryParser.java@6b6c11c9164e |
| children | b1195cfe8712 |
comparison
equal
deleted
inserted
replaced
| 1458:6b6c11c9164e | 1459:b04b8fc5f4f4 |
|---|---|
| 1 package goodjava.lucene.queryparser; | |
| 2 | |
| 3 import java.util.List; | |
| 4 import java.util.ArrayList; | |
| 5 import java.util.regex.Pattern; | |
| 6 import org.apache.lucene.search.Query; | |
| 7 import org.apache.lucene.search.MatchAllDocsQuery; | |
| 8 import org.apache.lucene.search.BooleanClause; | |
| 9 import org.apache.lucene.search.BooleanQuery; | |
| 10 import org.apache.lucene.search.Sort; | |
| 11 import org.apache.lucene.search.SortField; | |
| 12 import goodjava.parser.Parser; | |
| 13 import goodjava.parser.ParseException; | |
| 14 | |
| 15 | |
| 16 public class GoodQueryParser { | |
| 17 | |
| 18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { | |
| 19 return new GoodQueryParser(fieldParser,query).parseQuery(); | |
| 20 } | |
| 21 | |
| 22 public static String quote(String s) { | |
| 23 s = s.replace("\\","\\\\"); | |
| 24 s = s.replace("\b","\\b"); | |
| 25 s = s.replace("\f","\\f"); | |
| 26 s = s.replace("\n","\\n"); | |
| 27 s = s.replace("\r","\\r"); | |
| 28 s = s.replace("\t","\\t"); | |
| 29 s = s.replace("\"","\\\""); | |
| 30 return "\""+s+"\""; | |
| 31 } | |
| 32 | |
| 33 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { | |
| 34 return new GoodQueryParser(fieldParser,sort).parseSort(); | |
| 35 } | |
| 36 | |
| 37 | |
| 38 private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; | |
| 39 private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; | |
| 40 private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; | |
| 41 private final FieldParser fieldParser; | |
| 42 private final Parser parser; | |
| 43 | |
| 44 private GoodQueryParser(FieldParser fieldParser,String query) { | |
| 45 this.fieldParser = fieldParser; | |
| 46 this.parser = new Parser(query); | |
| 47 parser.begin(); | |
| 48 } | |
| 49 | |
| 50 ParseException exception(String msg) { | |
| 51 parser.failure(); | |
| 52 return new ParseException(parser,msg); | |
| 53 } | |
| 54 | |
| 55 ParseException exception(Exception cause) { | |
| 56 parser.failure(); | |
| 57 return new ParseException(parser,cause); | |
| 58 } | |
| 59 | |
| 60 private Query parseQuery() throws ParseException { | |
| 61 Spaces(); | |
| 62 BooleanQuery bq = new BooleanQuery(); | |
| 63 while( !parser.endOfInput() ) { | |
| 64 bq.add( Term(null) ); | |
| 65 } | |
| 66 BooleanClause[] clauses = bq.getClauses(); | |
| 67 switch( clauses.length ) { | |
| 68 case 0: | |
| 69 return new MatchAllDocsQuery(); | |
| 70 case 1: | |
| 71 { | |
| 72 BooleanClause bc = clauses[0]; | |
| 73 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
| 74 return bc.getQuery(); | |
| 75 } | |
| 76 default: | |
| 77 return bq; | |
| 78 } | |
| 79 } | |
| 80 | |
| 81 private BooleanClause Term(String defaultField) throws ParseException { | |
| 82 BooleanClause.Occur occur; | |
| 83 if( parser.match('+') ) { | |
| 84 occur = BooleanClause.Occur.MUST; | |
| 85 Spaces(); | |
| 86 } else if( parser.match('-') ) { | |
| 87 occur = BooleanClause.Occur.MUST_NOT; | |
| 88 Spaces(); | |
| 89 } else { | |
| 90 occur = BooleanClause.Occur.SHOULD; | |
| 91 } | |
| 92 String field = QueryField(); | |
| 93 if( field == null ) | |
| 94 field = defaultField; | |
| 95 Query query = NestedTerm(field); | |
| 96 if( query == null ) | |
| 97 query = RangeTerm(field); | |
| 98 if( query == null ) { | |
| 99 parser.begin(); | |
| 100 String match = SimpleTerm(NOT_IN_TERM); | |
| 101 query = fieldParser.getQuery(this,field,match); | |
| 102 parser.success(); | |
| 103 } | |
| 104 if( parser.match('^') ) { | |
| 105 Spaces(); | |
| 106 int start = parser.begin(); | |
| 107 try { | |
| 108 while( parser.anyOf("0123456789.") ); | |
| 109 String match = parser.textFrom(start); | |
| 110 float boost = Float.parseFloat(match); | |
| 111 query.setBoost(boost); | |
| 112 } catch(NumberFormatException e) { | |
| 113 throw exception(e); | |
| 114 } | |
| 115 parser.success(); | |
| 116 Spaces(); | |
| 117 } | |
| 118 BooleanClause bc = new BooleanClause(query,occur); | |
| 119 return bc; | |
| 120 } | |
| 121 | |
| 122 private Query NestedTerm(String field) throws ParseException { | |
| 123 parser.begin(); | |
| 124 if( !parser.match('(') ) | |
| 125 return parser.failure(null); | |
| 126 BooleanQuery bq = new BooleanQuery(); | |
| 127 while( !parser.match(')') ) { | |
| 128 if( parser.endOfInput() ) | |
| 129 throw exception("unclosed parentheses"); | |
| 130 bq.add( Term(field) ); | |
| 131 } | |
| 132 Spaces(); | |
| 133 BooleanClause[] clauses = bq.getClauses(); | |
| 134 switch( clauses.length ) { | |
| 135 case 0: | |
| 136 throw exception("empty parentheses"); | |
| 137 case 1: | |
| 138 { | |
| 139 BooleanClause bc = clauses[0]; | |
| 140 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
| 141 return parser.success(bc.getQuery()); | |
| 142 } | |
| 143 default: | |
| 144 return parser.success(bq); | |
| 145 } | |
| 146 } | |
| 147 | |
| 148 private Query RangeTerm(String field) throws ParseException { | |
| 149 parser.begin(); | |
| 150 if( !parser.anyOf("[{") ) | |
| 151 return parser.failure(null); | |
| 152 boolean includeMin = parser.lastChar() == '['; | |
| 153 Spaces(); | |
| 154 String minQuery = SimpleTerm(NOT_IN_RANGE); | |
| 155 TO(); | |
| 156 String maxQuery = SimpleTerm(NOT_IN_RANGE); | |
| 157 if( !parser.anyOf("]}") ) | |
| 158 throw exception("unclosed range"); | |
| 159 boolean includeMax = parser.lastChar() == ']'; | |
| 160 Spaces(); | |
| 161 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); | |
| 162 return parser.success(query); | |
| 163 } | |
| 164 | |
| 165 private void TO() throws ParseException { | |
| 166 parser.begin(); | |
| 167 if( !(parser.match("TO") && Space()) ) | |
| 168 throw exception("'TO' expected"); | |
| 169 Spaces(); | |
| 170 parser.success(); | |
| 171 } | |
| 172 | |
| 173 private String SimpleTerm(String exclude) throws ParseException { | |
| 174 parser.begin(); | |
| 175 String match = Quoted(); | |
| 176 if( match==null ) | |
| 177 match = Unquoted(exclude); | |
| 178 if( match.length() == 0 ) | |
| 179 throw exception("invalid input"); | |
| 180 return parser.success(match); | |
| 181 } | |
| 182 | |
| 183 private String QueryField() throws ParseException { | |
| 184 parser.begin(); | |
| 185 String match = Field(); | |
| 186 if( match==null || !parser.match(':') ) | |
| 187 return parser.failure((String)null); | |
| 188 Spaces(); | |
| 189 return parser.success(match); | |
| 190 } | |
| 191 | |
| 192 private String Field() throws ParseException { | |
| 193 parser.begin(); | |
| 194 String match = Unquoted(NOT_IN_FIELD); | |
| 195 if( match.length()==0 ) | |
| 196 return parser.failure((String)null); | |
| 197 match = StringFieldParser.escape(this,match); | |
| 198 return parser.success(match); | |
| 199 } | |
| 200 | |
| 201 private String Quoted() throws ParseException { | |
| 202 parser.begin(); | |
| 203 if( !parser.match('"') ) | |
| 204 return parser.failure(null); | |
| 205 StringBuilder sb = new StringBuilder(); | |
| 206 while( parser.anyChar() ) { | |
| 207 char c = parser.lastChar(); | |
| 208 switch(c) { | |
| 209 case '"': | |
| 210 return parser.success(sb.toString()); | |
| 211 case '\\': | |
| 212 if( parser.anyChar() ) { | |
| 213 c = parser.lastChar(); | |
| 214 switch(c) { | |
| 215 case '"': | |
| 216 case '\\': | |
| 217 sb.append(c); | |
| 218 continue; | |
| 219 case 'b': | |
| 220 sb.append('\b'); | |
| 221 continue; | |
| 222 case 'f': | |
| 223 sb.append('\f'); | |
| 224 continue; | |
| 225 case 'n': | |
| 226 sb.append('\n'); | |
| 227 continue; | |
| 228 case 'r': | |
| 229 sb.append('\r'); | |
| 230 continue; | |
| 231 case 't': | |
| 232 sb.append('\t'); | |
| 233 continue; | |
| 234 case 'u': | |
| 235 int n = 0; | |
| 236 for( int i=0; i<4; i++ ) { | |
| 237 int d; | |
| 238 if( parser.inCharRange('0','9') ) { | |
| 239 d = parser.lastChar() - '0'; | |
| 240 } else if( parser.inCharRange('a','f') ) { | |
| 241 d = parser.lastChar() - 'a' + 10; | |
| 242 } else if( parser.inCharRange('A','F') ) { | |
| 243 d = parser.lastChar() - 'A' + 10; | |
| 244 } else { | |
| 245 throw exception("invalid hex digit"); | |
| 246 } | |
| 247 n = 16*n + d; | |
| 248 } | |
| 249 sb.append((char)n); | |
| 250 continue; | |
| 251 } | |
| 252 } | |
| 253 throw exception("invalid escape char"); | |
| 254 default: | |
| 255 sb.append(c); | |
| 256 } | |
| 257 } | |
| 258 parser.failure(); | |
| 259 throw exception("unclosed string"); | |
| 260 } | |
| 261 | |
| 262 private String Unquoted(String exclude) throws ParseException { | |
| 263 int start = parser.begin(); | |
| 264 while( parser.noneOf(exclude) ) { | |
| 265 checkEscape(); | |
| 266 } | |
| 267 String match = parser.textFrom(start); | |
| 268 Spaces(); | |
| 269 return parser.success(match); | |
| 270 } | |
| 271 | |
| 272 private void checkEscape() { | |
| 273 if( parser.lastChar() == '\\' ) | |
| 274 parser.anyChar(); | |
| 275 } | |
| 276 | |
| 277 private void Spaces() { | |
| 278 while( Space() ); | |
| 279 } | |
| 280 | |
| 281 private boolean Space() { | |
| 282 return parser.anyOf(" \t\r\n"); | |
| 283 } | |
| 284 | |
| 285 | |
| 286 // sort | |
| 287 | |
| 288 private Sort parseSort() throws ParseException { | |
| 289 Spaces(); | |
| 290 if( parser.endOfInput() ) | |
| 291 return null; | |
| 292 List<SortField> list = new ArrayList<SortField>(); | |
| 293 list.add( SortField() ); | |
| 294 while( !parser.endOfInput() ) { | |
| 295 parser.begin(); | |
| 296 if( !parser.match(',') ) | |
| 297 throw exception("',' expected"); | |
| 298 Spaces(); | |
| 299 parser.success(); | |
| 300 list.add( SortField() ); | |
| 301 } | |
| 302 return new Sort(list.toArray(new SortField[0])); | |
| 303 } | |
| 304 | |
| 305 private SortField SortField() throws ParseException { | |
| 306 parser.begin(); | |
| 307 String field = Field(); | |
| 308 if( field==null ) | |
| 309 throw exception("invalid input"); | |
| 310 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); | |
| 311 Spaces(); | |
| 312 SortField sf = fieldParser.getSortField(this,field,reverse); | |
| 313 return parser.success(sf); | |
| 314 } | |
| 315 | |
| 316 } |
