Mercurial Hosting > luan
comparison lucene/src/luan/modules/lucene/queryparser/SaneQueryParser.java @ 730:01e68da6983b
add sane-lucene-queryparser source to luan
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 10 Jun 2016 15:41:15 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
729:4ce68aad92b7 | 730:01e68da6983b |
---|---|
1 package luan.modules.lucene.queryparser; | |
2 | |
3 import java.util.List; | |
4 import java.util.ArrayList; | |
5 import java.util.regex.Pattern; | |
6 import org.apache.lucene.search.Query; | |
7 import org.apache.lucene.search.MatchAllDocsQuery; | |
8 import org.apache.lucene.search.BooleanClause; | |
9 import org.apache.lucene.search.BooleanQuery; | |
10 import org.apache.lucene.search.Sort; | |
11 import org.apache.lucene.search.SortField; | |
12 | |
13 | |
14 public class SaneQueryParser { | |
15 | |
16 public static Query parseQuery(FieldParser fieldParser,String query) throws ParseException { | |
17 return new SaneQueryParser(fieldParser,query).parseQuery(); | |
18 } | |
19 | |
20 private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]"); | |
21 | |
22 public static String literal(String s) { | |
23 return specialChar.matcher(s).replaceAll("\\\\$0"); | |
24 } | |
25 | |
26 public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException { | |
27 return new SaneQueryParser(fieldParser,sort).parseSort(); | |
28 } | |
29 | |
30 | |
31 private static final String NOT_IN_TERM = " \t\r\n\":[]{}^+-()"; | |
32 private static final String NOT_IN_FIELD = NOT_IN_TERM + ","; | |
33 private final FieldParser fieldParser; | |
34 final Parser parser; | |
35 | |
36 private SaneQueryParser(FieldParser fieldParser,String query) { | |
37 this.fieldParser = fieldParser; | |
38 this.parser = new Parser(query); | |
39 } | |
40 | |
41 private Query parseQuery() throws ParseException { | |
42 Spaces(); | |
43 BooleanQuery bq = new BooleanQuery(); | |
44 while( !parser.endOfInput() ) { | |
45 bq.add( Term(null) ); | |
46 } | |
47 BooleanClause[] clauses = bq.getClauses(); | |
48 switch( clauses.length ) { | |
49 case 0: | |
50 return new MatchAllDocsQuery(); | |
51 case 1: | |
52 { | |
53 BooleanClause bc = clauses[0]; | |
54 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
55 return bc.getQuery(); | |
56 } | |
57 default: | |
58 return bq; | |
59 } | |
60 } | |
61 | |
62 private BooleanClause Term(String defaultField) throws ParseException { | |
63 BooleanClause.Occur occur; | |
64 if( parser.match('+') ) { | |
65 occur = BooleanClause.Occur.MUST; | |
66 Spaces(); | |
67 } else if( parser.match('-') ) { | |
68 occur = BooleanClause.Occur.MUST_NOT; | |
69 Spaces(); | |
70 } else { | |
71 occur = BooleanClause.Occur.SHOULD; | |
72 } | |
73 String field = QueryField(); | |
74 if( field == null ) | |
75 field = defaultField; | |
76 Query query = NestedTerm(field); | |
77 if( query == null ) | |
78 query = RangeTerm(field); | |
79 if( query == null ) { | |
80 parser.begin(); | |
81 String match = SimpleTerm(); | |
82 query = fieldParser.getQuery(this,field,match); | |
83 parser.success(); | |
84 } | |
85 if( parser.match('^') ) { | |
86 Spaces(); | |
87 int start = parser.begin(); | |
88 try { | |
89 while( parser.anyOf("0123456789.") ); | |
90 String match = parser.textFrom(start); | |
91 float boost = Float.parseFloat(match); | |
92 query.setBoost(boost); | |
93 } catch(NumberFormatException e) { | |
94 throw new ParseException(this,e); | |
95 } | |
96 parser.success(); | |
97 Spaces(); | |
98 } | |
99 BooleanClause bc = new BooleanClause(query,occur); | |
100 return bc; | |
101 } | |
102 | |
103 private Query NestedTerm(String field) throws ParseException { | |
104 parser.begin(); | |
105 if( !parser.match('(') ) | |
106 return parser.failure(null); | |
107 BooleanQuery bq = new BooleanQuery(); | |
108 while( !parser.match(')') ) { | |
109 if( parser.endOfInput() ) | |
110 throw new ParseException(this,"unclosed parentheses"); | |
111 bq.add( Term(field) ); | |
112 } | |
113 Spaces(); | |
114 BooleanClause[] clauses = bq.getClauses(); | |
115 switch( clauses.length ) { | |
116 case 0: | |
117 throw new ParseException(this,"empty parentheses"); | |
118 case 1: | |
119 { | |
120 BooleanClause bc = clauses[0]; | |
121 if( bc.getOccur() != BooleanClause.Occur.MUST_NOT ) | |
122 return parser.success(bc.getQuery()); | |
123 } | |
124 default: | |
125 return parser.success(bq); | |
126 } | |
127 } | |
128 | |
129 private Query RangeTerm(String field) throws ParseException { | |
130 parser.begin(); | |
131 if( !parser.anyOf("[{") ) | |
132 return parser.failure(null); | |
133 boolean includeMin = parser.lastChar() == '['; | |
134 Spaces(); | |
135 String minQuery = SimpleTerm(); | |
136 TO(); | |
137 String maxQuery = SimpleTerm(); | |
138 if( !parser.anyOf("]}") ) | |
139 throw new ParseException(this,"unclosed range"); | |
140 boolean includeMax = parser.lastChar() == ']'; | |
141 Spaces(); | |
142 Query query = fieldParser.getRangeQuery(this,field,minQuery,maxQuery,includeMin,includeMax); | |
143 return parser.success(query); | |
144 } | |
145 | |
146 private void TO() throws ParseException { | |
147 parser.begin(); | |
148 if( !(parser.match("TO") && Space()) ) | |
149 throw new ParseException(this,"'TO' expected"); | |
150 Spaces(); | |
151 parser.success(); | |
152 } | |
153 | |
154 private String SimpleTerm() throws ParseException { | |
155 parser.begin(); | |
156 String match; | |
157 if( parser.match('"') ) { | |
158 int start = parser.currentIndex() - 1; | |
159 while( !parser.match('"') ) { | |
160 if( parser.endOfInput() ) | |
161 throw new ParseException(this,"unclosed quotes"); | |
162 parser.anyChar(); | |
163 checkEscape(); | |
164 } | |
165 match = parser.textFrom(start); | |
166 Spaces(); | |
167 } else { | |
168 match = Unquoted(NOT_IN_TERM); | |
169 } | |
170 if( match.length() == 0 ) | |
171 throw new ParseException(this); | |
172 return parser.success(match); | |
173 } | |
174 | |
175 private String QueryField() throws ParseException { | |
176 parser.begin(); | |
177 String match = Field(); | |
178 if( match==null || !parser.match(':') ) | |
179 return parser.failure((String)null); | |
180 Spaces(); | |
181 return parser.success(match); | |
182 } | |
183 | |
184 private String Field() throws ParseException { | |
185 parser.begin(); | |
186 String match = Unquoted(NOT_IN_FIELD); | |
187 if( match.length()==0 ) | |
188 return parser.failure((String)null); | |
189 match = StringFieldParser.escape(this,match); | |
190 return parser.success(match); | |
191 } | |
192 | |
193 private String Unquoted(String exclude) throws ParseException { | |
194 int start = parser.begin(); | |
195 while( parser.noneOf(exclude) ) { | |
196 checkEscape(); | |
197 } | |
198 String match = parser.textFrom(start); | |
199 Spaces(); | |
200 return parser.success(match); | |
201 } | |
202 | |
203 private void checkEscape() { | |
204 if( parser.lastChar() == '\\' ) | |
205 parser.anyChar(); | |
206 } | |
207 | |
208 private void Spaces() { | |
209 while( Space() ); | |
210 } | |
211 | |
212 private boolean Space() { | |
213 return parser.anyOf(" \t\r\n"); | |
214 } | |
215 | |
216 | |
217 // sort | |
218 | |
219 private Sort parseSort() throws ParseException { | |
220 Spaces(); | |
221 if( parser.endOfInput() ) | |
222 return null; | |
223 List<SortField> list = new ArrayList<SortField>(); | |
224 list.add( SortField() ); | |
225 while( !parser.endOfInput() ) { | |
226 parser.begin(); | |
227 if( !parser.match(',') ) | |
228 throw new ParseException(this,"',' expected"); | |
229 Spaces(); | |
230 parser.success(); | |
231 list.add( SortField() ); | |
232 } | |
233 return new Sort(list.toArray(new SortField[0])); | |
234 } | |
235 | |
236 private SortField SortField() throws ParseException { | |
237 parser.begin(); | |
238 String field = Field(); | |
239 if( field==null ) | |
240 throw new ParseException(this); | |
241 boolean reverse = !parser.matchIgnoreCase("asc") && parser.matchIgnoreCase("desc"); | |
242 Spaces(); | |
243 SortField sf = fieldParser.getSortField(this,field,reverse); | |
244 return parser.success(sf); | |
245 } | |
246 | |
247 } |