comparison src/nabble/model/NodeSearcher.java @ 0:7ecd1a4ef557

add content
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 21 Mar 2019 19:15:52 -0600
parents
children 72765b66e2c3
comparison
equal deleted inserted replaced
-1:000000000000 0:7ecd1a4ef557
1 package nabble.model;
2
3 import org.apache.lucene.analysis.Analyzer;
4 import org.apache.lucene.analysis.Token;
5 import org.apache.lucene.analysis.TokenFilter;
6 import org.apache.lucene.analysis.TokenStream;
7 import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
8 import org.apache.lucene.document.Document;
9 import org.apache.lucene.document.NumberTools;
10 import org.apache.lucene.index.Term;
11 import org.apache.lucene.queryParser.MultiFieldQueryParser;
12 import org.apache.lucene.queryParser.ParseException;
13 import org.apache.lucene.queryParser.QueryParser;
14 import org.apache.lucene.search.BooleanClause;
15 import org.apache.lucene.search.BooleanQuery;
16 import org.apache.lucene.search.CachingWrapperFilter;
17 import org.apache.lucene.search.ConstantScoreQuery;
18 import org.apache.lucene.search.Filter;
19 import nabble.model.lucene.HitCollector;
20 import nabble.model.lucene.LuceneSearcher;
21 import org.apache.lucene.search.PhraseQuery;
22 import org.apache.lucene.search.Query;
23 import org.apache.lucene.search.QueryWrapperFilter;
24 import org.apache.lucene.search.Sort;
25 import org.apache.lucene.search.SortField;
26 import org.apache.lucene.search.TermQuery;
27 import org.apache.lucene.search.TopDocs;
28 import org.apache.lucene.search.highlight.Formatter;
29 import org.apache.lucene.search.highlight.Highlighter;
30 import org.apache.lucene.search.highlight.NullFragmenter;
31 import org.apache.lucene.search.highlight.QueryScorer;
32 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
33 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
34 import org.apache.lucene.search.highlight.TokenGroup;
35 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
36 import org.apache.lucene.util.Version;
37 import org.slf4j.Logger;
38 import org.slf4j.LoggerFactory;
39
40 import java.io.IOException;
41 import java.io.StringReader;
42 import java.util.ArrayList;
43 import java.util.Collections;
44 import java.util.Date;
45 import java.util.HashSet;
46 import java.util.List;
47 import java.util.Set;
48
49
50 public final class NodeSearcher {
51 private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class);
52
53 public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT));
54
55 public static class Builder {
56 private static final String[] nodeSearchFields = new String[]{
57 Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD
58 };
59
60 private final SiteImpl site;
61 private final BooleanQuery query = new BooleanQuery();
62 private Query textQuery = null;
63 private boolean isAuthenticated = false;
64 private final long nodeId;
65 private User currentUser;
66 private String userSearchId = null;
67 private Sort sort = null;
68 private Filter filter = null;
69 private Date from = null;
70 private Date to = null;
71
72 public Builder(Node node) {
73 this(node.getSite(),node.getId());
74 }
75
76 public Builder(Site site,long nodeId) {
77 if( nodeId == 0L )
78 throw new RuntimeException();
79 this.site = (SiteImpl)site;
80 this.nodeId = nodeId;
81 Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId)));
82 query.add(query2,BooleanClause.Occur.MUST);
83 }
84
85 public void setCurrentUser(User user) {
86 this.isAuthenticated = true;
87 this.currentUser = user;
88 }
89
90 private BooleanQuery getQuery() {
91 if( !isAuthenticated )
92 return query;
93 if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) )
94 return query;
95 BooleanQuery q = new BooleanQuery();
96 q.add(query, BooleanClause.Occur.MUST);
97 if( currentUser != null ) {
98 NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
99 q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST);
100 return q;
101 }
102 q.add(publicQuery, BooleanClause.Occur.MUST);
103 return q;
104 }
105
106 public void addQuery(Query query2) {
107 query.add(query2,BooleanClause.Occur.MUST);
108 }
109
110 public void addLine(String line) throws ParseException {
111 if( textQuery != null )
112 throw new RuntimeException();
113 textQuery = parse(line,nodeSearchFields);
114 if( textQuery != null )
115 query.add(textQuery,BooleanClause.Occur.MUST);
116 }
117
118 public void addUser(Person user) {
119 if( user==null )
120 return;
121 addUser(user.getSearchId());
122 }
123
124 public void addUser(String userSearchId) {
125 this.userSearchId = userSearchId;
126 Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId));
127 query.add(query2,BooleanClause.Occur.MUST);
128 }
129
130 public void addUsers(List<? extends Person> visitors) {
131 if (visitors != null && visitors.size() > 0) {
132 BooleanQuery usersClause = new BooleanQuery();
133 for (Person v : visitors) {
134 Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId()));
135 usersClause.add(q, BooleanClause.Occur.SHOULD);
136 }
137 query.add(usersClause, BooleanClause.Occur.MUST);
138 }
139 }
140
141 void addExcludeUser(String userSearchId) {
142 BooleanClause excludeUserClause = new BooleanClause(
143 new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)),
144 BooleanClause.Occur.MUST_NOT);
145 query.add(excludeUserClause);
146 }
147
148 public void setUserSearchId(String userSearchId) {
149 this.userSearchId = userSearchId;
150 }
151
152 private final static Query appQuery =
153 new ConstantScoreQuery(
154 new CachingWrapperFilter(
155 new QueryWrapperFilter(
156 new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString()))
157 )
158 )
159 )
160 ;
161
162 public void addNodeKind(Node.Kind kind) {
163 query.add(appQuery,
164 kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT);
165 }
166
167 private final static Query publicQuery =
168 new ConstantScoreQuery(
169 new CachingWrapperFilter(
170 new QueryWrapperFilter(
171 new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none"))
172 )
173 )
174 )
175 ;
176
177 public void excludePrivate() {
178 query.add(publicQuery,BooleanClause.Occur.MUST);
179 }
180
181 public void setSort(Sort sort) {
182 this.sort = sort;
183 }
184
185 public void setFilter(Filter filter) {
186 this.filter = filter;
187 }
188
189 public void setDateRange(Date from, Date to) {
190 if( sort != SORT_BY_DATE )
191 throw new UnsupportedOperationException();
192 this.from = from;
193 this.to = to;
194 }
195
196 public NodeSearcher build() {
197 return new NodeSearcher(this);
198 }
199 }
200
201 private final SiteImpl site;
202 private final BooleanQuery query;
203 private final Query textQuery;
204 private final Sort sort;
205 private final Filter filter;
206 private final Date from;
207 private final Date to;
208 private Set<String> searchTerms = null;
209 private int totalHits = -1;
210 private final QueryScorer scorer;
211
212 private NodeSearcher(Builder builder) {
213 this.site = builder.site;
214 this.query = builder.getQuery();
215 this.textQuery = builder.textQuery;
216 this.sort = builder.sort;
217 this.filter = builder.filter;
218 this.from = builder.from;
219 this.to = builder.to;
220 this.scorer = new QueryScorer(query);
221 }
222
223 public BooleanQuery getQuery() {
224 return query;
225 }
226
227 static Query parse(String line, String[] fields) throws ParseException {
228 if( line == null || line.length() == 0 )
229 return null;
230 line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation
231 MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer);
232 parser.setDefaultOperator(QueryParser.AND_OPERATOR);
233 return parser.parse(line);
234 }
235
236 public String toString() {
237 return query.toString();
238 }
239
240 public Set<String> getSearchTerms() {
241 if( searchTerms==null ) {
242 searchTerms = new HashSet<String>();
243 if( textQuery != null )
244 searchTerms(searchTerms,textQuery);
245 }
246 return searchTerms;
247 }
248
249 private static void searchTerms(Set<String> searchTerms,Query query) {
250 if( query instanceof BooleanQuery ) {
251 BooleanQuery q = (BooleanQuery)query;
252 BooleanClause[] clauses = q.getClauses();
253 for (BooleanClause clause : clauses) {
254 if (!clause.isProhibited())
255 searchTerms(searchTerms, clause.getQuery());
256 }
257 } else if( query instanceof TermQuery ) {
258 TermQuery q = (TermQuery)query;
259 searchTerms.add( q.getTerm().text() );
260 } else if( query instanceof PhraseQuery ) {
261 PhraseQuery q = (PhraseQuery)query;
262 Term[] terms = q.getTerms();
263 for (Term term : terms) {
264 searchTerms.add(term.text());
265 }
266 }
267 }
268
269 public String highlight(String text,String pre,String post) {
270 try {
271 Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer );
272 hl.setTextFragmenter( new NullFragmenter() );
273 String s = hl.getBestFragment(Lucene.analyzer,null,text);
274 return s != null ? s : text;
275 } catch(IOException e) {
276 throw new RuntimeException(e);
277 } catch(InvalidTokenOffsetsException e) {
278 throw new RuntimeException(e);
279 }
280 }
281
282 public static String getStartingFragment(String text,int size,String dotdotdot) {
283 if (text.length() <= size) return text;
284 int end = text.lastIndexOf(' ', size);
285 if (end < 0) end = size;
286 String fragment = text.substring(0, end);
287 if (dotdotdot != null && fragment.length() < text.length())
288 fragment = fragment + dotdotdot;
289 return fragment;
290 }
291
292 private static final Formatter nullFormatter = new Formatter() {
293 public String highlightTerm(String originalText,TokenGroup tokenGroup) {
294 return originalText;
295 }
296 };
297
298 public String getFragment(String text,int size,String dotdotdot) {
299 try {
300 Highlighter hl = new Highlighter(nullFormatter,scorer);
301 hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) );
302 String s = hl.getBestFragment(Lucene.analyzer,null,text);
303 if( s == null )
304 s = getStartingFragment(text,size,dotdotdot);
305 if( dotdotdot != null && s.length() < text.length() ) {
306 boolean atStart = text.startsWith(s);
307 boolean atEnd = text.endsWith(s);
308 if( !atStart )
309 s = dotdotdot + s;
310 if( !atEnd )
311 s = s + dotdotdot;
312 }
313 return s;
314 } catch(IOException e) {
315 throw new RuntimeException(e);
316 } catch(InvalidTokenOffsetsException e) {
317 throw new RuntimeException(e);
318 }
319 }
320
321 private static class DoneException extends RuntimeException {}
322
323 public boolean hasNodes() {
324 try {
325 LuceneSearcher searcher = Lucene.newSearcher(site);
326 try {
327 try {
328 searcher.search( query, new HitCollector() {
329 protected void process(Document doc) {
330 throw new DoneException();
331 }
332 } );
333 return false;
334 } catch(DoneException e) {
335 return true;
336 }
337 } finally {
338 searcher.close();
339 }
340 } catch(IOException e) {
341 throw new RuntimeException(e);
342 }
343 }
344
345 public interface Handler {
346 public void handle(long nodeId);
347 }
348
349 public void forEach(final Handler h) {
350 try {
351 final LuceneSearcher searcher = Lucene.newSearcher(site);
352 try {
353 searcher.search( query, new HitCollector() {
354 protected void process(Document doc) {
355 h.handle( Lucene.getNodeId(doc) );
356 }
357 } );
358 } finally {
359 searcher.close();
360 }
361 } catch(IOException e) {
362 throw new RuntimeException(e);
363 }
364 }
365
366 public int getTotalHits() {
367 if( totalHits == -1 ) {
368 try {
369 LuceneSearcher searcher = Lucene.newSearcher(site);
370 try {
371 TopDocs hits = searcher.search(query, filter, 0);
372 totalHits = hits.totalHits;
373 } finally {
374 searcher.close();
375 }
376 } catch (BooleanQuery.TooManyClauses e) {
377 throw new RuntimeException("Your search will give too many matches.");
378 } catch(IOException e) {
379 throw new RuntimeException(e);
380 }
381 }
382 return totalHits;
383 }
384
385 public List<Node> getNodes(int i, int n) throws TooManyClauses {
386 try {
387 LuceneSearcher searcher = Lucene.newSearcher(site);
388 try {
389 TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort);
390 totalHits = hits.totalHits;
391 int lim = hits.scoreDocs.length;
392 if( lim <= i )
393 return Collections.emptyList();
394 List<Node> a = new ArrayList<Node>();
395 for (int j=i; j<lim; j++) {
396 try {
397 int docId = hits.scoreDocs[j].doc;
398 Node node = Lucene.getNode(site, searcher, docId);
399 if (node != null) {
400 a.add(node);
401 }
402 } catch(IOException e) {
403 logger.error(e.toString());
404 }
405 }
406 return a;
407 } finally {
408 searcher.close();
409 }
410 } catch (BooleanQuery.TooManyClauses e) {
411 throw new TooManyClauses(e);
412 } catch (IOException e) {
413 throw new RuntimeException(e);
414 }
415 }
416
417 public static final class TooManyClauses extends RuntimeException {
418 TooManyClauses(BooleanQuery.TooManyClauses e) {
419 super(e);
420 }
421 }
422
423 }