comparison lucene/src/luan/modules/lucene/queryparser/SaneQueryParser.java @ 730:01e68da6983b

add sane-lucene-queryparser source to luan
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 10 Jun 2016 15:41:15 -0600
parents
children
comparison
equal deleted inserted replaced
729:4ce68aad92b7 730:01e68da6983b
1 package luan.modules.lucene.queryparser;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.regex.Pattern;
6 import org.apache.lucene.search.Query;
7 import org.apache.lucene.search.MatchAllDocsQuery;
8 import org.apache.lucene.search.BooleanClause;
9 import org.apache.lucene.search.BooleanQuery;
10 import org.apache.lucene.search.Sort;
11 import org.apache.lucene.search.SortField;
12
13
14 public class SaneQueryParser {
15
16 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException {
17 return new SaneQueryParser(fieldParser,query).parseQuery();
18 }
19
20 private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]");
21
22 public static String literal(String s) {
23 return specialChar.matcher(s).replaceAll("\\\\$0");
24 }
25
26 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException {
27 return new SaneQueryParser(fieldParser,sort).parseSort();
28 }
29
30
31 private static final String NOT_IN_TERM = " \t\r\n\":[]{}^+-()";
32 private static final String NOT_IN_FIELD = NOT_IN_TERM + ",";
33 private final FieldParser fieldParser;
34 final Parser parser;
35
36 private SaneQueryParser(FieldParser fieldParser,String query) {
37 this.fieldParser = fieldParser;
38 this.parser = new Parser(query);
39 }
40
41 private Query parseQuery() throws ParseException {
42 Spaces();
43 BooleanQuery bq = new BooleanQuery();
44 while( !parser.endOfInput() ) {
45 bq.add( Term(null) );
46 }
47 BooleanClause[] clauses = bq.getClauses();
48 switch( clauses.length ) {
49 case 0:
50 return new MatchAllDocsQuery();
51 case 1:
52 {
53 BooleanClause bc = clauses[0];
54 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
55 return bc.getQuery();
56 }
57 default:
58 return bq;
59 }
60 }
61
62 private BooleanClause Term(String defaultField) throws ParseException {
63 BooleanClause.Occur occur;
64 if( parser.match('+') ) {
65 occur = BooleanClause.Occur.MUST;
66 Spaces();
67 } else if( parser.match('-') ) {
68 occur = BooleanClause.Occur.MUST_NOT;
69 Spaces();
70 } else {
71 occur = BooleanClause.Occur.SHOULD;
72 }
73 String field = QueryField();
74 if( field == null )
75 field = defaultField;
76 Query query = NestedTerm(field);
77 if( query == null )
78 query = RangeTerm(field);
79 if( query == null ) {
80 parser.begin();
81 String match = SimpleTerm();
82 query = fieldParser.getQuery(this,field,match);
83 parser.success();
84 }
85 if( parser.match('^') ) {
86 Spaces();
87 int start = parser.begin();
88 try {
89 while( parser.anyOf("0123456789.") );
90 String match = parser.textFrom(start);
91 float boost = Float.parseFloat(match);
92 query.setBoost(boost);
93 } catch(NumberFormatException e) {
94 throw new ParseException(this,e);
95 }
96 parser.success();
97 Spaces();
98 }
99 BooleanClause bc = new BooleanClause(query,occur);
100 return bc;
101 }
102
103 private Query NestedTerm(String field) throws ParseException {
104 parser.begin();
105 if( !parser.match('(') )
106 return parser.failure(null);
107 BooleanQuery bq = new BooleanQuery();
108 while( !parser.match(')') ) {
109 if( parser.endOfInput() )
110 throw new ParseException(this,"unclosed parentheses");
111 bq.add( Term(field) );
112 }
113 Spaces();
114 BooleanClause[] clauses = bq.getClauses();
115 switch( clauses.length ) {
116 case 0:
117 throw new ParseException(this,"empty parentheses");
118 case 1:
119 {
120 BooleanClause bc = clauses[0];
121 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT )
122 return parser.success(bc.getQuery());
123 }
124 default:
125 return parser.success(bq);
126 }
127 }
128
129 private Query RangeTerm(String field) throws ParseException {
130 parser.begin();
131 if( !parser.anyOf("[{") )
132 return parser.failure(null);
133 boolean includeMin = parser.lastChar() == '[';
134 Spaces();
135 String minQuery = SimpleTerm();
136 TO();
137 String maxQuery = SimpleTerm();
138 if( !parser.anyOf("]}") )
139 throw new ParseException(this,"unclosed range");
140 boolean includeMax = parser.lastChar() == ']';
141 Spaces();
142 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax);
143 return parser.success(query);
144 }
145
146 private void TO() throws ParseException {
147 parser.begin();
148 if( !(parser.match("TO") && Space()) )
149 throw new ParseException(this,"'TO' expected");
150 Spaces();
151 parser.success();
152 }
153
154 private String SimpleTerm() throws ParseException {
155 parser.begin();
156 String match;
157 if( parser.match('"') ) {
158 int start = parser.currentIndex() - 1;
159 while( !parser.match('"') ) {
160 if( parser.endOfInput() )
161 throw new ParseException(this,"unclosed quotes");
162 parser.anyChar();
163 checkEscape();
164 }
165 match = parser.textFrom(start);
166 Spaces();
167 } else {
168 match = Unquoted(NOT_IN_TERM);
169 }
170 if( match.length() == 0 )
171 throw new ParseException(this);
172 return parser.success(match);
173 }
174
175 private String QueryField() throws ParseException {
176 parser.begin();
177 String match = Field();
178 if( match==null || !parser.match(':') )
179 return parser.failure((String)null);
180 Spaces();
181 return parser.success(match);
182 }
183
184 private String Field() throws ParseException {
185 parser.begin();
186 String match = Unquoted(NOT_IN_FIELD);
187 if( match.length()==0 )
188 return parser.failure((String)null);
189 match = StringFieldParser.escape(this,match);
190 return parser.success(match);
191 }
192
193 private String Unquoted(String exclude) throws ParseException {
194 int start = parser.begin();
195 while( parser.noneOf(exclude) ) {
196 checkEscape();
197 }
198 String match = parser.textFrom(start);
199 Spaces();
200 return parser.success(match);
201 }
202
203 private void checkEscape() {
204 if( parser.lastChar() == '\\' )
205 parser.anyChar();
206 }
207
208 private void Spaces() {
209 while( Space() );
210 }
211
212 private boolean Space() {
213 return parser.anyOf(" \t\r\n");
214 }
215
216
217 // sort
218
219 private Sort parseSort() throws ParseException {
220 Spaces();
221 if( parser.endOfInput() )
222 return null;
223 List<SortField> list = new ArrayList<SortField>();
224 list.add( SortField() );
225 while( !parser.endOfInput() ) {
226 parser.begin();
227 if( !parser.match(',') )
228 throw new ParseException(this,"',' expected");
229 Spaces();
230 parser.success();
231 list.add( SortField() );
232 }
233 return new Sort(list.toArray(new SortField[0]));
234 }
235
236 private SortField SortField() throws ParseException {
237 parser.begin();
238 String field = Field();
239 if( field==null )
240 throw new ParseException(this);
241 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc");
242 Spaces();
243 SortField sf = fieldParser.getSortField(this,field,reverse);
244 return parser.success(sf);
245 }
246
247 }