comparison src/luan/lib/queryparser/SaneQueryParser.java @ 1344:dc2af9d5463b

move queryparser to lib
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 25 Feb 2019 07:00:55 -0700
parents src/luan/modules/lucene/queryparser/SaneQueryParser.java@88b5b81cad4a
children 77f2d091f17f
comparison
equal deleted inserted replaced
1343:7d9a1f8894b0 1344:dc2af9d5463b
1 package luan.lib.queryparser;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.regex.Pattern;
6 import org.apache.lucene.search.Query;
7 import org.apache.lucene.search.MatchAllDocsQuery;
8 import org.apache.lucene.search.BooleanClause;
9 import org.apache.lucene.search.BooleanQuery;
10 import org.apache.lucene.search.Sort;
11 import org.apache.lucene.search.SortField;
12 import luan.lib.parser.Parser;
13 import luan.lib.parser.ParseException;
14
15
16 public class SaneQueryParser {
17
18 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException {
19 return new SaneQueryParser(fieldParser,query).parseQuery();
20 }
21
22 private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]");
23
24 public static String literal(String s) {
25 return specialChar.matcher(s).replaceAll("\\\\$0");
26 }
27
28 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException {
29 return new SaneQueryParser(fieldParser,sort).parseSort();
30 }
31
32
33 private static final String NOT_IN_TERM = " \t\r\n\":[]{}^+-()";
34 private static final String NOT_IN_FIELD = NOT_IN_TERM + ",";
35 private final FieldParser fieldParser;
36 private final Parser parser;
37
38 private SaneQueryParser(FieldParser fieldParser,String query) {
39 this.fieldParser = fieldParser;
40 this.parser = new Parser(query);
41 parser.begin();
42 }
43
44 ParseException exception(String msg) {
45 parser.failure();
46 return new ParseException(parser,msg);
47 }
48
49 ParseException exception(Exception cause) {
50 parser.failure();
51 return new ParseException(parser,cause);
52 }
53
54 private Query parseQuery() throws ParseException {
55 Spaces();
56 BooleanQuery bq = new BooleanQuery();
57 while( !parser.endOfInput() ) {
58 bq.add( Term(null) );
59 }
60 BooleanClause[] clauses = bq.getClauses();
61 switch( clauses.length ) {
62 case 0:
63 return new MatchAllDocsQuery();
64 case 1:
65 {
66 BooleanClause bc = clauses[0];
67 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
68 return bc.getQuery();
69 }
70 default:
71 return bq;
72 }
73 }
74
75 private BooleanClause Term(String defaultField) throws ParseException {
76 BooleanClause.Occur occur;
77 if( parser.match('+') ) {
78 occur = BooleanClause.Occur.MUST;
79 Spaces();
80 } else if( parser.match('-') ) {
81 occur = BooleanClause.Occur.MUST_NOT;
82 Spaces();
83 } else {
84 occur = BooleanClause.Occur.SHOULD;
85 }
86 String field = QueryField();
87 if( field == null )
88 field = defaultField;
89 Query query = NestedTerm(field);
90 if( query == null )
91 query = RangeTerm(field);
92 if( query == null ) {
93 parser.begin();
94 String match = SimpleTerm();
95 query = fieldParser.getQuery(this,field,match);
96 parser.success();
97 }
98 if( parser.match('^') ) {
99 Spaces();
100 int start = parser.begin();
101 try {
102 while( parser.anyOf("0123456789.") );
103 String match = parser.textFrom(start);
104 float boost = Float.parseFloat(match);
105 query.setBoost(boost);
106 } catch(NumberFormatException e) {
107 throw exception(e);
108 }
109 parser.success();
110 Spaces();
111 }
112 BooleanClause bc = new BooleanClause(query,occur);
113 return bc;
114 }
115
116 private Query NestedTerm(String field) throws ParseException {
117 parser.begin();
118 if( !parser.match('(') )
119 return parser.failure(null);
120 BooleanQuery bq = new BooleanQuery();
121 while( !parser.match(')') ) {
122 if( parser.endOfInput() )
123 throw exception("unclosed parentheses");
124 bq.add( Term(field) );
125 }
126 Spaces();
127 BooleanClause[] clauses = bq.getClauses();
128 switch( clauses.length ) {
129 case 0:
130 throw exception("empty parentheses");
131 case 1:
132 {
133 BooleanClause bc = clauses[0];
134 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
135 return parser.success(bc.getQuery());
136 }
137 default:
138 return parser.success(bq);
139 }
140 }
141
142 private Query RangeTerm(String field) throws ParseException {
143 parser.begin();
144 if( !parser.anyOf("[{") )
145 return parser.failure(null);
146 boolean includeMin = parser.lastChar() == '[';
147 Spaces();
148 String minQuery = SimpleTerm();
149 TO();
150 String maxQuery = SimpleTerm();
151 if( !parser.anyOf("]}") )
152 throw exception("unclosed range");
153 boolean includeMax = parser.lastChar() == ']';
154 Spaces();
155 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax);
156 return parser.success(query);
157 }
158
159 private void TO() throws ParseException {
160 parser.begin();
161 if( !(parser.match("TO") && Space()) )
162 throw exception("'TO' expected");
163 Spaces();
164 parser.success();
165 }
166
167 private String SimpleTerm() throws ParseException {
168 parser.begin();
169 String match;
170 if( parser.match('"') ) {
171 int start = parser.currentIndex() - 1;
172 while( !parser.match('"') ) {
173 if( parser.endOfInput() )
174 throw exception("unclosed quotes");
175 parser.anyChar();
176 checkEscape();
177 }
178 match = parser.textFrom(start);
179 Spaces();
180 } else {
181 match = Unquoted(NOT_IN_TERM);
182 }
183 if( match.length() == 0 )
184 throw exception("invalid input");
185 return parser.success(match);
186 }
187
188 private String QueryField() throws ParseException {
189 parser.begin();
190 String match = Field();
191 if( match==null || !parser.match(':') )
192 return parser.failure((String)null);
193 Spaces();
194 return parser.success(match);
195 }
196
197 private String Field() throws ParseException {
198 parser.begin();
199 String match = Unquoted(NOT_IN_FIELD);
200 if( match.length()==0 )
201 return parser.failure((String)null);
202 match = StringFieldParser.escape(this,match);
203 return parser.success(match);
204 }
205
206 private String Unquoted(String exclude) throws ParseException {
207 int start = parser.begin();
208 while( parser.noneOf(exclude) ) {
209 checkEscape();
210 }
211 String match = parser.textFrom(start);
212 Spaces();
213 return parser.success(match);
214 }
215
216 private void checkEscape() {
217 if( parser.lastChar() == '\\' )
218 parser.anyChar();
219 }
220
221 private void Spaces() {
222 while( Space() );
223 }
224
225 private boolean Space() {
226 return parser.anyOf(" \t\r\n");
227 }
228
229
230 // sort
231
232 private Sort parseSort() throws ParseException {
233 Spaces();
234 if( parser.endOfInput() )
235 return null;
236 List<SortField> list = new ArrayList<SortField>();
237 list.add( SortField() );
238 while( !parser.endOfInput() ) {
239 parser.begin();
240 if( !parser.match(',') )
241 throw exception("',' expected");
242 Spaces();
243 parser.success();
244 list.add( SortField() );
245 }
246 return new Sort(list.toArray(new SortField[0]));
247 }
248
249 private SortField SortField() throws ParseException {
250 parser.begin();
251 String field = Field();
252 if( field==null )
253 throw exception("invalid input");
254 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc");
255 Spaces();
256 SortField sf = fieldParser.getSortField(this,field,reverse);
257 return parser.success(sf);
258 }
259
260 }