Mercurial Hosting > nabble
comparison src/nabble/model/NodeSearcher.java @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children | 72765b66e2c3 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7ecd1a4ef557 |
---|---|
1 package nabble.model; | |
2 | |
3 import org.apache.lucene.analysis.Analyzer; | |
4 import org.apache.lucene.analysis.Token; | |
5 import org.apache.lucene.analysis.TokenFilter; | |
6 import org.apache.lucene.analysis.TokenStream; | |
7 import org.apache.lucene.analysis.snowball.SnowballAnalyzer; | |
8 import org.apache.lucene.document.Document; | |
9 import org.apache.lucene.document.NumberTools; | |
10 import org.apache.lucene.index.Term; | |
11 import org.apache.lucene.queryParser.MultiFieldQueryParser; | |
12 import org.apache.lucene.queryParser.ParseException; | |
13 import org.apache.lucene.queryParser.QueryParser; | |
14 import org.apache.lucene.search.BooleanClause; | |
15 import org.apache.lucene.search.BooleanQuery; | |
16 import org.apache.lucene.search.CachingWrapperFilter; | |
17 import org.apache.lucene.search.ConstantScoreQuery; | |
18 import org.apache.lucene.search.Filter; | |
19 import nabble.model.lucene.HitCollector; | |
20 import nabble.model.lucene.LuceneSearcher; | |
21 import org.apache.lucene.search.PhraseQuery; | |
22 import org.apache.lucene.search.Query; | |
23 import org.apache.lucene.search.QueryWrapperFilter; | |
24 import org.apache.lucene.search.Sort; | |
25 import org.apache.lucene.search.SortField; | |
26 import org.apache.lucene.search.TermQuery; | |
27 import org.apache.lucene.search.TopDocs; | |
28 import org.apache.lucene.search.highlight.Formatter; | |
29 import org.apache.lucene.search.highlight.Highlighter; | |
30 import org.apache.lucene.search.highlight.NullFragmenter; | |
31 import org.apache.lucene.search.highlight.QueryScorer; | |
32 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; | |
33 import org.apache.lucene.search.highlight.SimpleSpanFragmenter; | |
34 import org.apache.lucene.search.highlight.TokenGroup; | |
35 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; | |
36 import org.apache.lucene.util.Version; | |
37 import org.slf4j.Logger; | |
38 import org.slf4j.LoggerFactory; | |
39 | |
40 import java.io.IOException; | |
41 import java.io.StringReader; | |
42 import java.util.ArrayList; | |
43 import java.util.Collections; | |
44 import java.util.Date; | |
45 import java.util.HashSet; | |
46 import java.util.List; | |
47 import java.util.Set; | |
48 | |
49 | |
50 public final class NodeSearcher { | |
51 private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class); | |
52 | |
53 public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT)); | |
54 | |
55 public static class Builder { | |
56 private static final String[] nodeSearchFields = new String[]{ | |
57 Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD | |
58 }; | |
59 | |
60 private final SiteImpl site; | |
61 private final BooleanQuery query = new BooleanQuery(); | |
62 private Query textQuery = null; | |
63 private boolean isAuthenticated = false; | |
64 private final long nodeId; | |
65 private User currentUser; | |
66 private String userSearchId = null; | |
67 private Sort sort = null; | |
68 private Filter filter = null; | |
69 private Date from = null; | |
70 private Date to = null; | |
71 | |
72 public Builder(Node node) { | |
73 this(node.getSite(),node.getId()); | |
74 } | |
75 | |
76 public Builder(Site site,long nodeId) { | |
77 if( nodeId == 0L ) | |
78 throw new RuntimeException(); | |
79 this.site = (SiteImpl)site; | |
80 this.nodeId = nodeId; | |
81 Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId))); | |
82 query.add(query2,BooleanClause.Occur.MUST); | |
83 } | |
84 | |
85 public void setCurrentUser(User user) { | |
86 this.isAuthenticated = true; | |
87 this.currentUser = user; | |
88 } | |
89 | |
90 private BooleanQuery getQuery() { | |
91 if( !isAuthenticated ) | |
92 return query; | |
93 if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) ) | |
94 return query; | |
95 BooleanQuery q = new BooleanQuery(); | |
96 q.add(query, BooleanClause.Occur.MUST); | |
97 if( currentUser != null ) { | |
98 NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); | |
99 q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST); | |
100 return q; | |
101 } | |
102 q.add(publicQuery, BooleanClause.Occur.MUST); | |
103 return q; | |
104 } | |
105 | |
106 public void addQuery(Query query2) { | |
107 query.add(query2,BooleanClause.Occur.MUST); | |
108 } | |
109 | |
110 public void addLine(String line) throws ParseException { | |
111 if( textQuery != null ) | |
112 throw new RuntimeException(); | |
113 textQuery = parse(line,nodeSearchFields); | |
114 if( textQuery != null ) | |
115 query.add(textQuery,BooleanClause.Occur.MUST); | |
116 } | |
117 | |
118 public void addUser(Person user) { | |
119 if( user==null ) | |
120 return; | |
121 addUser(user.getSearchId()); | |
122 } | |
123 | |
124 public void addUser(String userSearchId) { | |
125 this.userSearchId = userSearchId; | |
126 Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId)); | |
127 query.add(query2,BooleanClause.Occur.MUST); | |
128 } | |
129 | |
130 public void addUsers(List<? extends Person> visitors) { | |
131 if (visitors != null && visitors.size() > 0) { | |
132 BooleanQuery usersClause = new BooleanQuery(); | |
133 for (Person v : visitors) { | |
134 Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId())); | |
135 usersClause.add(q, BooleanClause.Occur.SHOULD); | |
136 } | |
137 query.add(usersClause, BooleanClause.Occur.MUST); | |
138 } | |
139 } | |
140 | |
141 void addExcludeUser(String userSearchId) { | |
142 BooleanClause excludeUserClause = new BooleanClause( | |
143 new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)), | |
144 BooleanClause.Occur.MUST_NOT); | |
145 query.add(excludeUserClause); | |
146 } | |
147 | |
148 public void setUserSearchId(String userSearchId) { | |
149 this.userSearchId = userSearchId; | |
150 } | |
151 | |
152 private final static Query appQuery = | |
153 new ConstantScoreQuery( | |
154 new CachingWrapperFilter( | |
155 new QueryWrapperFilter( | |
156 new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString())) | |
157 ) | |
158 ) | |
159 ) | |
160 ; | |
161 | |
162 public void addNodeKind(Node.Kind kind) { | |
163 query.add(appQuery, | |
164 kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT); | |
165 } | |
166 | |
167 private final static Query publicQuery = | |
168 new ConstantScoreQuery( | |
169 new CachingWrapperFilter( | |
170 new QueryWrapperFilter( | |
171 new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none")) | |
172 ) | |
173 ) | |
174 ) | |
175 ; | |
176 | |
177 public void excludePrivate() { | |
178 query.add(publicQuery,BooleanClause.Occur.MUST); | |
179 } | |
180 | |
181 public void setSort(Sort sort) { | |
182 this.sort = sort; | |
183 } | |
184 | |
185 public void setFilter(Filter filter) { | |
186 this.filter = filter; | |
187 } | |
188 | |
189 public void setDateRange(Date from, Date to) { | |
190 if( sort != SORT_BY_DATE ) | |
191 throw new UnsupportedOperationException(); | |
192 this.from = from; | |
193 this.to = to; | |
194 } | |
195 | |
196 public NodeSearcher build() { | |
197 return new NodeSearcher(this); | |
198 } | |
199 } | |
200 | |
201 private final SiteImpl site; | |
202 private final BooleanQuery query; | |
203 private final Query textQuery; | |
204 private final Sort sort; | |
205 private final Filter filter; | |
206 private final Date from; | |
207 private final Date to; | |
208 private Set<String> searchTerms = null; | |
209 private int totalHits = -1; | |
210 private final QueryScorer scorer; | |
211 | |
212 private NodeSearcher(Builder builder) { | |
213 this.site = builder.site; | |
214 this.query = builder.getQuery(); | |
215 this.textQuery = builder.textQuery; | |
216 this.sort = builder.sort; | |
217 this.filter = builder.filter; | |
218 this.from = builder.from; | |
219 this.to = builder.to; | |
220 this.scorer = new QueryScorer(query); | |
221 } | |
222 | |
223 public BooleanQuery getQuery() { | |
224 return query; | |
225 } | |
226 | |
227 static Query parse(String line, String[] fields) throws ParseException { | |
228 if( line == null || line.length() == 0 ) | |
229 return null; | |
230 line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation | |
231 MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer); | |
232 parser.setDefaultOperator(QueryParser.AND_OPERATOR); | |
233 return parser.parse(line); | |
234 } | |
235 | |
236 public String toString() { | |
237 return query.toString(); | |
238 } | |
239 | |
240 public Set<String> getSearchTerms() { | |
241 if( searchTerms==null ) { | |
242 searchTerms = new HashSet<String>(); | |
243 if( textQuery != null ) | |
244 searchTerms(searchTerms,textQuery); | |
245 } | |
246 return searchTerms; | |
247 } | |
248 | |
249 private static void searchTerms(Set<String> searchTerms,Query query) { | |
250 if( query instanceof BooleanQuery ) { | |
251 BooleanQuery q = (BooleanQuery)query; | |
252 BooleanClause[] clauses = q.getClauses(); | |
253 for (BooleanClause clause : clauses) { | |
254 if (!clause.isProhibited()) | |
255 searchTerms(searchTerms, clause.getQuery()); | |
256 } | |
257 } else if( query instanceof TermQuery ) { | |
258 TermQuery q = (TermQuery)query; | |
259 searchTerms.add( q.getTerm().text() ); | |
260 } else if( query instanceof PhraseQuery ) { | |
261 PhraseQuery q = (PhraseQuery)query; | |
262 Term[] terms = q.getTerms(); | |
263 for (Term term : terms) { | |
264 searchTerms.add(term.text()); | |
265 } | |
266 } | |
267 } | |
268 | |
269 public String highlight(String text,String pre,String post) { | |
270 try { | |
271 Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer ); | |
272 hl.setTextFragmenter( new NullFragmenter() ); | |
273 String s = hl.getBestFragment(Lucene.analyzer,null,text); | |
274 return s != null ? s : text; | |
275 } catch(IOException e) { | |
276 throw new RuntimeException(e); | |
277 } catch(InvalidTokenOffsetsException e) { | |
278 throw new RuntimeException(e); | |
279 } | |
280 } | |
281 | |
282 public static String getStartingFragment(String text,int size,String dotdotdot) { | |
283 if (text.length() <= size) return text; | |
284 int end = text.lastIndexOf(' ', size); | |
285 if (end < 0) end = size; | |
286 String fragment = text.substring(0, end); | |
287 if (dotdotdot != null && fragment.length() < text.length()) | |
288 fragment = fragment + dotdotdot; | |
289 return fragment; | |
290 } | |
291 | |
292 private static final Formatter nullFormatter = new Formatter() { | |
293 public String highlightTerm(String originalText,TokenGroup tokenGroup) { | |
294 return originalText; | |
295 } | |
296 }; | |
297 | |
298 public String getFragment(String text,int size,String dotdotdot) { | |
299 try { | |
300 Highlighter hl = new Highlighter(nullFormatter,scorer); | |
301 hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) ); | |
302 String s = hl.getBestFragment(Lucene.analyzer,null,text); | |
303 if( s == null ) | |
304 s = getStartingFragment(text,size,dotdotdot); | |
305 if( dotdotdot != null && s.length() < text.length() ) { | |
306 boolean atStart = text.startsWith(s); | |
307 boolean atEnd = text.endsWith(s); | |
308 if( !atStart ) | |
309 s = dotdotdot + s; | |
310 if( !atEnd ) | |
311 s = s + dotdotdot; | |
312 } | |
313 return s; | |
314 } catch(IOException e) { | |
315 throw new RuntimeException(e); | |
316 } catch(InvalidTokenOffsetsException e) { | |
317 throw new RuntimeException(e); | |
318 } | |
319 } | |
320 | |
321 private static class DoneException extends RuntimeException {} | |
322 | |
323 public boolean hasNodes() { | |
324 try { | |
325 LuceneSearcher searcher = Lucene.newSearcher(site); | |
326 try { | |
327 try { | |
328 searcher.search( query, new HitCollector() { | |
329 protected void process(Document doc) { | |
330 throw new DoneException(); | |
331 } | |
332 } ); | |
333 return false; | |
334 } catch(DoneException e) { | |
335 return true; | |
336 } | |
337 } finally { | |
338 searcher.close(); | |
339 } | |
340 } catch(IOException e) { | |
341 throw new RuntimeException(e); | |
342 } | |
343 } | |
344 | |
345 public interface Handler { | |
346 public void handle(long nodeId); | |
347 } | |
348 | |
349 public void forEach(final Handler h) { | |
350 try { | |
351 final LuceneSearcher searcher = Lucene.newSearcher(site); | |
352 try { | |
353 searcher.search( query, new HitCollector() { | |
354 protected void process(Document doc) { | |
355 h.handle( Lucene.getNodeId(doc) ); | |
356 } | |
357 } ); | |
358 } finally { | |
359 searcher.close(); | |
360 } | |
361 } catch(IOException e) { | |
362 throw new RuntimeException(e); | |
363 } | |
364 } | |
365 | |
366 public int getTotalHits() { | |
367 if( totalHits == -1 ) { | |
368 try { | |
369 LuceneSearcher searcher = Lucene.newSearcher(site); | |
370 try { | |
371 TopDocs hits = searcher.search(query, filter, 0); | |
372 totalHits = hits.totalHits; | |
373 } finally { | |
374 searcher.close(); | |
375 } | |
376 } catch (BooleanQuery.TooManyClauses e) { | |
377 throw new RuntimeException("Your search will give too many matches."); | |
378 } catch(IOException e) { | |
379 throw new RuntimeException(e); | |
380 } | |
381 } | |
382 return totalHits; | |
383 } | |
384 | |
385 public List<Node> getNodes(int i, int n) throws TooManyClauses { | |
386 try { | |
387 LuceneSearcher searcher = Lucene.newSearcher(site); | |
388 try { | |
389 TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort); | |
390 totalHits = hits.totalHits; | |
391 int lim = hits.scoreDocs.length; | |
392 if( lim <= i ) | |
393 return Collections.emptyList(); | |
394 List<Node> a = new ArrayList<Node>(); | |
395 for (int j=i; j<lim; j++) { | |
396 try { | |
397 int docId = hits.scoreDocs[j].doc; | |
398 Node node = Lucene.getNode(site, searcher, docId); | |
399 if (node != null) { | |
400 a.add(node); | |
401 } | |
402 } catch(IOException e) { | |
403 logger.error(e.toString()); | |
404 } | |
405 } | |
406 return a; | |
407 } finally { | |
408 searcher.close(); | |
409 } | |
410 } catch (BooleanQuery.TooManyClauses e) { | |
411 throw new TooManyClauses(e); | |
412 } catch (IOException e) { | |
413 throw new RuntimeException(e); | |
414 } | |
415 } | |
416 | |
417 public static final class TooManyClauses extends RuntimeException { | |
418 TooManyClauses(BooleanQuery.TooManyClauses e) { | |
419 super(e); | |
420 } | |
421 } | |
422 | |
423 } |