Mercurial Hosting > luan
comparison src/goodjava/lucene/queryparser/GoodQueryParser.java @ 1459:b04b8fc5f4f4
GoodQueryParser
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 20 Mar 2020 11:06:53 -0600 |
parents | src/goodjava/lucene/queryparser/SaneQueryParser.java@6b6c11c9164e |
children | b1195cfe8712 |
comparison
equal
deleted
inserted
replaced
1458:6b6c11c9164e | 1459:b04b8fc5f4f4 |
---|---|
1 package goodjava.lucene.queryparser; | |
2 | |
3 import java.util.List; | |
4 import java.util.ArrayList; | |
5 import java.util.regex.Pattern; | |
6 import org.apache.lucene.search.Query; | |
7 import org.apache.lucene.search.MatchAllDocsQuery; | |
8 import org.apache.lucene.search.BooleanClause; | |
9 import org.apache.lucene.search.BooleanQuery; | |
10 import org.apache.lucene.search.Sort; | |
11 import org.apache.lucene.search.SortField; | |
12 import goodjava.parser.Parser; | |
13 import goodjava.parser.ParseException; | |
14 | |
15 | |
16 public class GoodQueryParser { | |
17 | |
18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { | |
19 return new GoodQueryParser(fieldParser,query).parseQuery(); | |
20 } | |
21 | |
22 public static String quote(String s) { | |
23 s = s.replace("\\","\\\\"); | |
24 s = s.replace("\b","\\b"); | |
25 s = s.replace("\f","\\f"); | |
26 s = s.replace("\n","\\n"); | |
27 s = s.replace("\r","\\r"); | |
28 s = s.replace("\t","\\t"); | |
29 s = s.replace("\"","\\\""); | |
30 return "\""+s+"\""; | |
31 } | |
32 | |
33 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { | |
34 return new GoodQueryParser(fieldParser,sort).parseSort(); | |
35 } | |
36 | |
37 | |
38 private static final String NOT_IN_RANGE = " \t\r\n\":[]{}^+()"; | |
39 private static final String NOT_IN_TERM = NOT_IN_RANGE + "-"; | |
40 private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; | |
41 private final FieldParser fieldParser; | |
42 private final Parser parser; | |
43 | |
44 private GoodQueryParser(FieldParser fieldParser,String query) { | |
45 this.fieldParser = fieldParser; | |
46 this.parser = new Parser(query); | |
47 parser.begin(); | |
48 } | |
49 | |
50 ParseException exception(String msg) { | |
51 parser.failure(); | |
52 return new ParseException(parser,msg); | |
53 } | |
54 | |
55 ParseException exception(Exception cause) { | |
56 parser.failure(); | |
57 return new ParseException(parser,cause); | |
58 } | |
59 | |
60 private Query parseQuery() throws ParseException { | |
61 Spaces(); | |
62 BooleanQuery bq = new BooleanQuery(); | |
63 while( !parser.endOfInput() ) { | |
64 bq.add( Term(null) ); | |
65 } | |
66 BooleanClause[] clauses = bq.getClauses(); | |
67 switch( clauses.length ) { | |
68 case 0: | |
69 return new MatchAllDocsQuery(); | |
70 case 1: | |
71 { | |
72 BooleanClause bc = clauses[0]; | |
73 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
74 return bc.getQuery(); | |
75 } | |
76 default: | |
77 return bq; | |
78 } | |
79 } | |
80 | |
81 private BooleanClause Term(String defaultField) throws ParseException { | |
82 BooleanClause.Occur occur; | |
83 if( parser.match('+') ) { | |
84 occur = BooleanClause.Occur.MUST; | |
85 Spaces(); | |
86 } else if( parser.match('-') ) { | |
87 occur = BooleanClause.Occur.MUST_NOT; | |
88 Spaces(); | |
89 } else { | |
90 occur = BooleanClause.Occur.SHOULD; | |
91 } | |
92 String field = QueryField(); | |
93 if( field == null ) | |
94 field = defaultField; | |
95 Query query = NestedTerm(field); | |
96 if( query == null ) | |
97 query = RangeTerm(field); | |
98 if( query == null ) { | |
99 parser.begin(); | |
100 String match = SimpleTerm(NOT_IN_TERM); | |
101 query = fieldParser.getQuery(this,field,match); | |
102 parser.success(); | |
103 } | |
104 if( parser.match('^') ) { | |
105 Spaces(); | |
106 int start = parser.begin(); | |
107 try { | |
108 while( parser.anyOf("0123456789.") ); | |
109 String match = parser.textFrom(start); | |
110 float boost = Float.parseFloat(match); | |
111 query.setBoost(boost); | |
112 } catch(NumberFormatException e) { | |
113 throw exception(e); | |
114 } | |
115 parser.success(); | |
116 Spaces(); | |
117 } | |
118 BooleanClause bc = new BooleanClause(query,occur); | |
119 return bc; | |
120 } | |
121 | |
122 private Query NestedTerm(String field) throws ParseException { | |
123 parser.begin(); | |
124 if( !parser.match('(') ) | |
125 return parser.failure(null); | |
126 BooleanQuery bq = new BooleanQuery(); | |
127 while( !parser.match(')') ) { | |
128 if( parser.endOfInput() ) | |
129 throw exception("unclosed parentheses"); | |
130 bq.add( Term(field) ); | |
131 } | |
132 Spaces(); | |
133 BooleanClause[] clauses = bq.getClauses(); | |
134 switch( clauses.length ) { | |
135 case 0: | |
136 throw exception("empty parentheses"); | |
137 case 1: | |
138 { | |
139 BooleanClause bc = clauses[0]; | |
140 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
141 return parser.success(bc.getQuery()); | |
142 } | |
143 default: | |
144 return parser.success(bq); | |
145 } | |
146 } | |
147 | |
148 private Query RangeTerm(String field) throws ParseException { | |
149 parser.begin(); | |
150 if( !parser.anyOf("[{") ) | |
151 return parser.failure(null); | |
152 boolean includeMin = parser.lastChar() == '['; | |
153 Spaces(); | |
154 String minQuery = SimpleTerm(NOT_IN_RANGE); | |
155 TO(); | |
156 String maxQuery = SimpleTerm(NOT_IN_RANGE); | |
157 if( !parser.anyOf("]}") ) | |
158 throw exception("unclosed range"); | |
159 boolean includeMax = parser.lastChar() == ']'; | |
160 Spaces(); | |
161 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); | |
162 return parser.success(query); | |
163 } | |
164 | |
165 private void TO() throws ParseException { | |
166 parser.begin(); | |
167 if( !(parser.match("TO") && Space()) ) | |
168 throw exception("'TO' expected"); | |
169 Spaces(); | |
170 parser.success(); | |
171 } | |
172 | |
173 private String SimpleTerm(String exclude) throws ParseException { | |
174 parser.begin(); | |
175 String match = Quoted(); | |
176 if( match==null ) | |
177 match = Unquoted(exclude); | |
178 if( match.length() == 0 ) | |
179 throw exception("invalid input"); | |
180 return parser.success(match); | |
181 } | |
182 | |
183 private String QueryField() throws ParseException { | |
184 parser.begin(); | |
185 String match = Field(); | |
186 if( match==null || !parser.match(':') ) | |
187 return parser.failure((String)null); | |
188 Spaces(); | |
189 return parser.success(match); | |
190 } | |
191 | |
192 private String Field() throws ParseException { | |
193 parser.begin(); | |
194 String match = Unquoted(NOT_IN_FIELD); | |
195 if( match.length()==0 ) | |
196 return parser.failure((String)null); | |
197 match = StringFieldParser.escape(this,match); | |
198 return parser.success(match); | |
199 } | |
200 | |
201 private String Quoted() throws ParseException { | |
202 parser.begin(); | |
203 if( !parser.match('"') ) | |
204 return parser.failure(null); | |
205 StringBuilder sb = new StringBuilder(); | |
206 while( parser.anyChar() ) { | |
207 char c = parser.lastChar(); | |
208 switch(c) { | |
209 case '"': | |
210 return parser.success(sb.toString()); | |
211 case '\\': | |
212 if( parser.anyChar() ) { | |
213 c = parser.lastChar(); | |
214 switch(c) { | |
215 case '"': | |
216 case '\\': | |
217 sb.append(c); | |
218 continue; | |
219 case 'b': | |
220 sb.append('\b'); | |
221 continue; | |
222 case 'f': | |
223 sb.append('\f'); | |
224 continue; | |
225 case 'n': | |
226 sb.append('\n'); | |
227 continue; | |
228 case 'r': | |
229 sb.append('\r'); | |
230 continue; | |
231 case 't': | |
232 sb.append('\t'); | |
233 continue; | |
234 case 'u': | |
235 int n = 0; | |
236 for( int i=0; i<4; i++ ) { | |
237 int d; | |
238 if( parser.inCharRange('0','9') ) { | |
239 d = parser.lastChar() - '0'; | |
240 } else if( parser.inCharRange('a','f') ) { | |
241 d = parser.lastChar() - 'a' + 10; | |
242 } else if( parser.inCharRange('A','F') ) { | |
243 d = parser.lastChar() - 'A' + 10; | |
244 } else { | |
245 throw exception("invalid hex digit"); | |
246 } | |
247 n = 16*n + d; | |
248 } | |
249 sb.append((char)n); | |
250 continue; | |
251 } | |
252 } | |
253 throw exception("invalid escape char"); | |
254 default: | |
255 sb.append(c); | |
256 } | |
257 } | |
258 parser.failure(); | |
259 throw exception("unclosed string"); | |
260 } | |
261 | |
262 private String Unquoted(String exclude) throws ParseException { | |
263 int start = parser.begin(); | |
264 while( parser.noneOf(exclude) ) { | |
265 checkEscape(); | |
266 } | |
267 String match = parser.textFrom(start); | |
268 Spaces(); | |
269 return parser.success(match); | |
270 } | |
271 | |
272 private void checkEscape() { | |
273 if( parser.lastChar() == '\\' ) | |
274 parser.anyChar(); | |
275 } | |
276 | |
277 private void Spaces() { | |
278 while( Space() ); | |
279 } | |
280 | |
281 private boolean Space() { | |
282 return parser.anyOf(" \t\r\n"); | |
283 } | |
284 | |
285 | |
286 // sort | |
287 | |
288 private Sort parseSort() throws ParseException { | |
289 Spaces(); | |
290 if( parser.endOfInput() ) | |
291 return null; | |
292 List<SortField> list = new ArrayList<SortField>(); | |
293 list.add( SortField() ); | |
294 while( !parser.endOfInput() ) { | |
295 parser.begin(); | |
296 if( !parser.match(',') ) | |
297 throw exception("',' expected"); | |
298 Spaces(); | |
299 parser.success(); | |
300 list.add( SortField() ); | |
301 } | |
302 return new Sort(list.toArray(new SortField[0])); | |
303 } | |
304 | |
305 private SortField SortField() throws ParseException { | |
306 parser.begin(); | |
307 String field = Field(); | |
308 if( field==null ) | |
309 throw exception("invalid input"); | |
310 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); | |
311 Spaces(); | |
312 SortField sf = fieldParser.getSortField(this,field,reverse); | |
313 return parser.success(sf); | |
314 } | |
315 | |
316 } |