0
|
1 package nabble.model;
|
|
2
|
|
3 import org.apache.lucene.analysis.Analyzer;
|
|
4 import org.apache.lucene.analysis.Token;
|
|
5 import org.apache.lucene.analysis.TokenFilter;
|
|
6 import org.apache.lucene.analysis.TokenStream;
|
|
7 import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
|
|
8 import org.apache.lucene.document.Document;
|
|
9 import org.apache.lucene.document.NumberTools;
|
|
10 import org.apache.lucene.index.Term;
|
|
11 import org.apache.lucene.queryParser.MultiFieldQueryParser;
|
|
12 import org.apache.lucene.queryParser.ParseException;
|
|
13 import org.apache.lucene.queryParser.QueryParser;
|
|
14 import org.apache.lucene.search.BooleanClause;
|
|
15 import org.apache.lucene.search.BooleanQuery;
|
|
16 import org.apache.lucene.search.CachingWrapperFilter;
|
|
17 import org.apache.lucene.search.ConstantScoreQuery;
|
|
18 import org.apache.lucene.search.Filter;
|
|
19 import nabble.model.lucene.HitCollector;
|
|
20 import nabble.model.lucene.LuceneSearcher;
|
|
21 import org.apache.lucene.search.PhraseQuery;
|
|
22 import org.apache.lucene.search.Query;
|
|
23 import org.apache.lucene.search.QueryWrapperFilter;
|
|
24 import org.apache.lucene.search.Sort;
|
|
25 import org.apache.lucene.search.SortField;
|
|
26 import org.apache.lucene.search.TermQuery;
|
|
27 import org.apache.lucene.search.TopDocs;
|
|
28 import org.apache.lucene.search.highlight.Formatter;
|
|
29 import org.apache.lucene.search.highlight.Highlighter;
|
|
30 import org.apache.lucene.search.highlight.NullFragmenter;
|
|
31 import org.apache.lucene.search.highlight.QueryScorer;
|
|
32 import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
|
|
33 import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
|
|
34 import org.apache.lucene.search.highlight.TokenGroup;
|
|
35 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
|
|
36 import org.apache.lucene.util.Version;
|
|
37 import org.slf4j.Logger;
|
|
38 import org.slf4j.LoggerFactory;
|
|
39
|
|
40 import java.io.IOException;
|
|
41 import java.io.StringReader;
|
|
42 import java.util.ArrayList;
|
|
43 import java.util.Collections;
|
|
44 import java.util.Date;
|
|
45 import java.util.HashSet;
|
|
46 import java.util.List;
|
|
47 import java.util.Set;
|
|
48
|
|
49
|
|
50 public final class NodeSearcher {
|
|
51 private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class);
|
|
52
|
|
53 public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT));
|
|
54
|
|
55 public static class Builder {
|
|
56 private static final String[] nodeSearchFields = new String[]{
|
|
57 Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD
|
|
58 };
|
|
59
|
|
60 private final SiteImpl site;
|
|
61 private final BooleanQuery query = new BooleanQuery();
|
|
62 private Query textQuery = null;
|
|
63 private boolean isAuthenticated = false;
|
|
64 private final long nodeId;
|
|
65 private User currentUser;
|
|
66 private String userSearchId = null;
|
|
67 private Sort sort = null;
|
|
68 private Filter filter = null;
|
|
69 private Date from = null;
|
|
70 private Date to = null;
|
|
71
|
|
72 public Builder(Node node) {
|
|
73 this(node.getSite(),node.getId());
|
|
74 }
|
|
75
|
|
76 public Builder(Site site,long nodeId) {
|
|
77 if( nodeId == 0L )
|
|
78 throw new RuntimeException();
|
|
79 this.site = (SiteImpl)site;
|
|
80 this.nodeId = nodeId;
|
|
81 Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId)));
|
|
82 query.add(query2,BooleanClause.Occur.MUST);
|
|
83 }
|
|
84
|
|
85 public void setCurrentUser(User user) {
|
|
86 this.isAuthenticated = true;
|
|
87 this.currentUser = user;
|
|
88 }
|
|
89
|
|
90 private BooleanQuery getQuery() {
|
|
91 if( !isAuthenticated )
|
|
92 return query;
|
|
93 if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) )
|
|
94 return query;
|
|
95 BooleanQuery q = new BooleanQuery();
|
|
96 q.add(query, BooleanClause.Occur.MUST);
|
|
97 if( currentUser != null ) {
|
|
98 NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
|
|
99 q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST);
|
|
100 return q;
|
|
101 }
|
|
102 q.add(publicQuery, BooleanClause.Occur.MUST);
|
|
103 return q;
|
|
104 }
|
|
105
|
|
106 public void addQuery(Query query2) {
|
|
107 query.add(query2,BooleanClause.Occur.MUST);
|
|
108 }
|
|
109
|
|
110 public void addLine(String line) throws ParseException {
|
|
111 if( textQuery != null )
|
|
112 throw new RuntimeException();
|
|
113 textQuery = parse(line,nodeSearchFields);
|
|
114 if( textQuery != null )
|
|
115 query.add(textQuery,BooleanClause.Occur.MUST);
|
|
116 }
|
|
117
|
|
118 public void addUser(Person user) {
|
|
119 if( user==null )
|
|
120 return;
|
|
121 addUser(user.getSearchId());
|
|
122 }
|
|
123
|
|
124 public void addUser(String userSearchId) {
|
|
125 this.userSearchId = userSearchId;
|
|
126 Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId));
|
|
127 query.add(query2,BooleanClause.Occur.MUST);
|
|
128 }
|
|
129
|
|
130 public void addUsers(List<? extends Person> visitors) {
|
|
131 if (visitors != null && visitors.size() > 0) {
|
|
132 BooleanQuery usersClause = new BooleanQuery();
|
|
133 for (Person v : visitors) {
|
|
134 Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId()));
|
|
135 usersClause.add(q, BooleanClause.Occur.SHOULD);
|
|
136 }
|
|
137 query.add(usersClause, BooleanClause.Occur.MUST);
|
|
138 }
|
|
139 }
|
|
140
|
|
141 void addExcludeUser(String userSearchId) {
|
|
142 BooleanClause excludeUserClause = new BooleanClause(
|
|
143 new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)),
|
|
144 BooleanClause.Occur.MUST_NOT);
|
|
145 query.add(excludeUserClause);
|
|
146 }
|
|
147
|
|
148 public void setUserSearchId(String userSearchId) {
|
|
149 this.userSearchId = userSearchId;
|
|
150 }
|
|
151
|
|
152 private final static Query appQuery =
|
|
153 new ConstantScoreQuery(
|
|
154 new CachingWrapperFilter(
|
|
155 new QueryWrapperFilter(
|
|
156 new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString()))
|
|
157 )
|
|
158 )
|
|
159 )
|
|
160 ;
|
|
161
|
|
162 public void addNodeKind(Node.Kind kind) {
|
|
163 query.add(appQuery,
|
|
164 kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT);
|
|
165 }
|
|
166
|
|
167 private final static Query publicQuery =
|
|
168 new ConstantScoreQuery(
|
|
169 new CachingWrapperFilter(
|
|
170 new QueryWrapperFilter(
|
|
171 new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none"))
|
|
172 )
|
|
173 )
|
|
174 )
|
|
175 ;
|
|
176
|
|
177 public void excludePrivate() {
|
|
178 query.add(publicQuery,BooleanClause.Occur.MUST);
|
|
179 }
|
|
180
|
|
181 public void setSort(Sort sort) {
|
|
182 this.sort = sort;
|
|
183 }
|
|
184
|
|
185 public void setFilter(Filter filter) {
|
|
186 this.filter = filter;
|
|
187 }
|
|
188
|
|
189 public void setDateRange(Date from, Date to) {
|
|
190 if( sort != SORT_BY_DATE )
|
|
191 throw new UnsupportedOperationException();
|
|
192 this.from = from;
|
|
193 this.to = to;
|
|
194 }
|
|
195
|
|
196 public NodeSearcher build() {
|
|
197 return new NodeSearcher(this);
|
|
198 }
|
|
199 }
|
|
200
|
|
201 private final SiteImpl site;
|
|
202 private final BooleanQuery query;
|
|
203 private final Query textQuery;
|
|
204 private final Sort sort;
|
|
205 private final Filter filter;
|
|
206 private final Date from;
|
|
207 private final Date to;
|
|
208 private Set<String> searchTerms = null;
|
|
209 private int totalHits = -1;
|
|
210 private final QueryScorer scorer;
|
|
211
|
|
212 private NodeSearcher(Builder builder) {
|
|
213 this.site = builder.site;
|
|
214 this.query = builder.getQuery();
|
|
215 this.textQuery = builder.textQuery;
|
|
216 this.sort = builder.sort;
|
|
217 this.filter = builder.filter;
|
|
218 this.from = builder.from;
|
|
219 this.to = builder.to;
|
|
220 this.scorer = new QueryScorer(query);
|
|
221 }
|
|
222
|
|
223 public BooleanQuery getQuery() {
|
|
224 return query;
|
|
225 }
|
|
226
|
|
227 static Query parse(String line, String[] fields) throws ParseException {
|
|
228 if( line == null || line.length() == 0 )
|
|
229 return null;
|
|
230 line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation
|
|
231 MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer);
|
|
232 parser.setDefaultOperator(QueryParser.AND_OPERATOR);
|
|
233 return parser.parse(line);
|
|
234 }
|
|
235
|
|
236 public String toString() {
|
|
237 return query.toString();
|
|
238 }
|
|
239
|
|
240 public Set<String> getSearchTerms() {
|
|
241 if( searchTerms==null ) {
|
|
242 searchTerms = new HashSet<String>();
|
|
243 if( textQuery != null )
|
|
244 searchTerms(searchTerms,textQuery);
|
|
245 }
|
|
246 return searchTerms;
|
|
247 }
|
|
248
|
|
249 private static void searchTerms(Set<String> searchTerms,Query query) {
|
|
250 if( query instanceof BooleanQuery ) {
|
|
251 BooleanQuery q = (BooleanQuery)query;
|
|
252 BooleanClause[] clauses = q.getClauses();
|
|
253 for (BooleanClause clause : clauses) {
|
|
254 if (!clause.isProhibited())
|
|
255 searchTerms(searchTerms, clause.getQuery());
|
|
256 }
|
|
257 } else if( query instanceof TermQuery ) {
|
|
258 TermQuery q = (TermQuery)query;
|
|
259 searchTerms.add( q.getTerm().text() );
|
|
260 } else if( query instanceof PhraseQuery ) {
|
|
261 PhraseQuery q = (PhraseQuery)query;
|
|
262 Term[] terms = q.getTerms();
|
|
263 for (Term term : terms) {
|
|
264 searchTerms.add(term.text());
|
|
265 }
|
|
266 }
|
|
267 }
|
|
268
|
|
269 public String highlight(String text,String pre,String post) {
|
|
270 try {
|
|
271 Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer );
|
|
272 hl.setTextFragmenter( new NullFragmenter() );
|
|
273 String s = hl.getBestFragment(Lucene.analyzer,null,text);
|
|
274 return s != null ? s : text;
|
|
275 } catch(IOException e) {
|
|
276 throw new RuntimeException(e);
|
|
277 } catch(InvalidTokenOffsetsException e) {
|
|
278 throw new RuntimeException(e);
|
|
279 }
|
|
280 }
|
|
281
|
|
282 public static String getStartingFragment(String text,int size,String dotdotdot) {
|
|
283 if (text.length() <= size) return text;
|
|
284 int end = text.lastIndexOf(' ', size);
|
|
285 if (end < 0) end = size;
|
|
286 String fragment = text.substring(0, end);
|
|
287 if (dotdotdot != null && fragment.length() < text.length())
|
|
288 fragment = fragment + dotdotdot;
|
|
289 return fragment;
|
|
290 }
|
|
291
|
|
292 private static final Formatter nullFormatter = new Formatter() {
|
|
293 public String highlightTerm(String originalText,TokenGroup tokenGroup) {
|
|
294 return originalText;
|
|
295 }
|
|
296 };
|
|
297
|
|
298 public String getFragment(String text,int size,String dotdotdot) {
|
|
299 try {
|
|
300 Highlighter hl = new Highlighter(nullFormatter,scorer);
|
|
301 hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) );
|
|
302 String s = hl.getBestFragment(Lucene.analyzer,null,text);
|
|
303 if( s == null )
|
|
304 s = getStartingFragment(text,size,dotdotdot);
|
|
305 if( dotdotdot != null && s.length() < text.length() ) {
|
|
306 boolean atStart = text.startsWith(s);
|
|
307 boolean atEnd = text.endsWith(s);
|
|
308 if( !atStart )
|
|
309 s = dotdotdot + s;
|
|
310 if( !atEnd )
|
|
311 s = s + dotdotdot;
|
|
312 }
|
|
313 return s;
|
|
314 } catch(IOException e) {
|
|
315 throw new RuntimeException(e);
|
|
316 } catch(InvalidTokenOffsetsException e) {
|
|
317 throw new RuntimeException(e);
|
|
318 }
|
|
319 }
|
|
320
|
|
321 private static class DoneException extends RuntimeException {}
|
|
322
|
|
323 public boolean hasNodes() {
|
|
324 try {
|
|
325 LuceneSearcher searcher = Lucene.newSearcher(site);
|
|
326 try {
|
|
327 try {
|
|
328 searcher.search( query, new HitCollector() {
|
|
329 protected void process(Document doc) {
|
|
330 throw new DoneException();
|
|
331 }
|
|
332 } );
|
|
333 return false;
|
|
334 } catch(DoneException e) {
|
|
335 return true;
|
|
336 }
|
|
337 } finally {
|
|
338 searcher.close();
|
|
339 }
|
|
340 } catch(IOException e) {
|
|
341 throw new RuntimeException(e);
|
|
342 }
|
|
343 }
|
|
344
|
|
345 public interface Handler {
|
|
346 public void handle(long nodeId);
|
|
347 }
|
|
348
|
|
349 public void forEach(final Handler h) {
|
|
350 try {
|
|
351 final LuceneSearcher searcher = Lucene.newSearcher(site);
|
|
352 try {
|
|
353 searcher.search( query, new HitCollector() {
|
|
354 protected void process(Document doc) {
|
|
355 h.handle( Lucene.getNodeId(doc) );
|
|
356 }
|
|
357 } );
|
|
358 } finally {
|
|
359 searcher.close();
|
|
360 }
|
|
361 } catch(IOException e) {
|
|
362 throw new RuntimeException(e);
|
|
363 }
|
|
364 }
|
|
365
|
|
366 public int getTotalHits() {
|
|
367 if( totalHits == -1 ) {
|
|
368 try {
|
|
369 LuceneSearcher searcher = Lucene.newSearcher(site);
|
|
370 try {
|
|
371 TopDocs hits = searcher.search(query, filter, 0);
|
|
372 totalHits = hits.totalHits;
|
|
373 } finally {
|
|
374 searcher.close();
|
|
375 }
|
|
376 } catch (BooleanQuery.TooManyClauses e) {
|
|
377 throw new RuntimeException("Your search will give too many matches.");
|
|
378 } catch(IOException e) {
|
|
379 throw new RuntimeException(e);
|
|
380 }
|
|
381 }
|
|
382 return totalHits;
|
|
383 }
|
|
384
|
|
385 public List<Node> getNodes(int i, int n) throws TooManyClauses {
|
|
386 try {
|
|
387 LuceneSearcher searcher = Lucene.newSearcher(site);
|
|
388 try {
|
|
389 TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort);
|
|
390 totalHits = hits.totalHits;
|
|
391 int lim = hits.scoreDocs.length;
|
|
392 if( lim <= i )
|
|
393 return Collections.emptyList();
|
|
394 List<Node> a = new ArrayList<Node>();
|
|
395 for (int j=i; j<lim; j++) {
|
|
396 try {
|
|
397 int docId = hits.scoreDocs[j].doc;
|
|
398 Node node = Lucene.getNode(site, searcher, docId);
|
|
399 if (node != null) {
|
|
400 a.add(node);
|
|
401 }
|
|
402 } catch(IOException e) {
|
|
403 logger.error(e.toString());
|
|
404 }
|
|
405 }
|
|
406 return a;
|
|
407 } finally {
|
|
408 searcher.close();
|
|
409 }
|
|
410 } catch (BooleanQuery.TooManyClauses e) {
|
|
411 throw new TooManyClauses(e);
|
|
412 } catch (IOException e) {
|
|
413 throw new RuntimeException(e);
|
|
414 }
|
|
415 }
|
|
416
|
|
417 public static final class TooManyClauses extends RuntimeException {
|
|
418 TooManyClauses(BooleanQuery.TooManyClauses e) {
|
|
419 super(e);
|
|
420 }
|
|
421 }
|
|
422
|
|
423 }
|