Mercurial Hosting > nabble
comparison src/nabble/model/NodeSearcher.java @ 0:7ecd1a4ef557
add content
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Thu, 21 Mar 2019 19:15:52 -0600 |
| parents | |
| children | 72765b66e2c3 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:7ecd1a4ef557 |
|---|---|
| 1 package nabble.model; | |
| 2 | |
| 3 import org.apache.lucene.analysis.Analyzer; | |
| 4 import org.apache.lucene.analysis.Token; | |
| 5 import org.apache.lucene.analysis.TokenFilter; | |
| 6 import org.apache.lucene.analysis.TokenStream; | |
| 7 import org.apache.lucene.analysis.snowball.SnowballAnalyzer; | |
| 8 import org.apache.lucene.document.Document; | |
| 9 import org.apache.lucene.document.NumberTools; | |
| 10 import org.apache.lucene.index.Term; | |
| 11 import org.apache.lucene.queryParser.MultiFieldQueryParser; | |
| 12 import org.apache.lucene.queryParser.ParseException; | |
| 13 import org.apache.lucene.queryParser.QueryParser; | |
| 14 import org.apache.lucene.search.BooleanClause; | |
| 15 import org.apache.lucene.search.BooleanQuery; | |
| 16 import org.apache.lucene.search.CachingWrapperFilter; | |
| 17 import org.apache.lucene.search.ConstantScoreQuery; | |
| 18 import org.apache.lucene.search.Filter; | |
| 19 import nabble.model.lucene.HitCollector; | |
| 20 import nabble.model.lucene.LuceneSearcher; | |
| 21 import org.apache.lucene.search.PhraseQuery; | |
| 22 import org.apache.lucene.search.Query; | |
| 23 import org.apache.lucene.search.QueryWrapperFilter; | |
| 24 import org.apache.lucene.search.Sort; | |
| 25 import org.apache.lucene.search.SortField; | |
| 26 import org.apache.lucene.search.TermQuery; | |
| 27 import org.apache.lucene.search.TopDocs; | |
| 28 import org.apache.lucene.search.highlight.Formatter; | |
| 29 import org.apache.lucene.search.highlight.Highlighter; | |
| 30 import org.apache.lucene.search.highlight.NullFragmenter; | |
| 31 import org.apache.lucene.search.highlight.QueryScorer; | |
| 32 import org.apache.lucene.search.highlight.SimpleHTMLFormatter; | |
| 33 import org.apache.lucene.search.highlight.SimpleSpanFragmenter; | |
| 34 import org.apache.lucene.search.highlight.TokenGroup; | |
| 35 import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; | |
| 36 import org.apache.lucene.util.Version; | |
| 37 import org.slf4j.Logger; | |
| 38 import org.slf4j.LoggerFactory; | |
| 39 | |
| 40 import java.io.IOException; | |
| 41 import java.io.StringReader; | |
| 42 import java.util.ArrayList; | |
| 43 import java.util.Collections; | |
| 44 import java.util.Date; | |
| 45 import java.util.HashSet; | |
| 46 import java.util.List; | |
| 47 import java.util.Set; | |
| 48 | |
| 49 | |
| 50 public final class NodeSearcher { | |
| 51 private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class); | |
| 52 | |
| 53 public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT)); | |
| 54 | |
| 55 public static class Builder { | |
| 56 private static final String[] nodeSearchFields = new String[]{ | |
| 57 Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD | |
| 58 }; | |
| 59 | |
| 60 private final SiteImpl site; | |
| 61 private final BooleanQuery query = new BooleanQuery(); | |
| 62 private Query textQuery = null; | |
| 63 private boolean isAuthenticated = false; | |
| 64 private final long nodeId; | |
| 65 private User currentUser; | |
| 66 private String userSearchId = null; | |
| 67 private Sort sort = null; | |
| 68 private Filter filter = null; | |
| 69 private Date from = null; | |
| 70 private Date to = null; | |
| 71 | |
| 72 public Builder(Node node) { | |
| 73 this(node.getSite(),node.getId()); | |
| 74 } | |
| 75 | |
| 76 public Builder(Site site,long nodeId) { | |
| 77 if( nodeId == 0L ) | |
| 78 throw new RuntimeException(); | |
| 79 this.site = (SiteImpl)site; | |
| 80 this.nodeId = nodeId; | |
| 81 Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId))); | |
| 82 query.add(query2,BooleanClause.Occur.MUST); | |
| 83 } | |
| 84 | |
| 85 public void setCurrentUser(User user) { | |
| 86 this.isAuthenticated = true; | |
| 87 this.currentUser = user; | |
| 88 } | |
| 89 | |
| 90 private BooleanQuery getQuery() { | |
| 91 if( !isAuthenticated ) | |
| 92 return query; | |
| 93 if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) ) | |
| 94 return query; | |
| 95 BooleanQuery q = new BooleanQuery(); | |
| 96 q.add(query, BooleanClause.Occur.MUST); | |
| 97 if( currentUser != null ) { | |
| 98 NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); | |
| 99 q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST); | |
| 100 return q; | |
| 101 } | |
| 102 q.add(publicQuery, BooleanClause.Occur.MUST); | |
| 103 return q; | |
| 104 } | |
| 105 | |
| 106 public void addQuery(Query query2) { | |
| 107 query.add(query2,BooleanClause.Occur.MUST); | |
| 108 } | |
| 109 | |
| 110 public void addLine(String line) throws ParseException { | |
| 111 if( textQuery != null ) | |
| 112 throw new RuntimeException(); | |
| 113 textQuery = parse(line,nodeSearchFields); | |
| 114 if( textQuery != null ) | |
| 115 query.add(textQuery,BooleanClause.Occur.MUST); | |
| 116 } | |
| 117 | |
| 118 public void addUser(Person user) { | |
| 119 if( user==null ) | |
| 120 return; | |
| 121 addUser(user.getSearchId()); | |
| 122 } | |
| 123 | |
| 124 public void addUser(String userSearchId) { | |
| 125 this.userSearchId = userSearchId; | |
| 126 Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId)); | |
| 127 query.add(query2,BooleanClause.Occur.MUST); | |
| 128 } | |
| 129 | |
| 130 public void addUsers(List<? extends Person> visitors) { | |
| 131 if (visitors != null && visitors.size() > 0) { | |
| 132 BooleanQuery usersClause = new BooleanQuery(); | |
| 133 for (Person v : visitors) { | |
| 134 Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId())); | |
| 135 usersClause.add(q, BooleanClause.Occur.SHOULD); | |
| 136 } | |
| 137 query.add(usersClause, BooleanClause.Occur.MUST); | |
| 138 } | |
| 139 } | |
| 140 | |
| 141 void addExcludeUser(String userSearchId) { | |
| 142 BooleanClause excludeUserClause = new BooleanClause( | |
| 143 new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)), | |
| 144 BooleanClause.Occur.MUST_NOT); | |
| 145 query.add(excludeUserClause); | |
| 146 } | |
| 147 | |
| 148 public void setUserSearchId(String userSearchId) { | |
| 149 this.userSearchId = userSearchId; | |
| 150 } | |
| 151 | |
| 152 private final static Query appQuery = | |
| 153 new ConstantScoreQuery( | |
| 154 new CachingWrapperFilter( | |
| 155 new QueryWrapperFilter( | |
| 156 new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString())) | |
| 157 ) | |
| 158 ) | |
| 159 ) | |
| 160 ; | |
| 161 | |
| 162 public void addNodeKind(Node.Kind kind) { | |
| 163 query.add(appQuery, | |
| 164 kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT); | |
| 165 } | |
| 166 | |
| 167 private final static Query publicQuery = | |
| 168 new ConstantScoreQuery( | |
| 169 new CachingWrapperFilter( | |
| 170 new QueryWrapperFilter( | |
| 171 new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none")) | |
| 172 ) | |
| 173 ) | |
| 174 ) | |
| 175 ; | |
| 176 | |
| 177 public void excludePrivate() { | |
| 178 query.add(publicQuery,BooleanClause.Occur.MUST); | |
| 179 } | |
| 180 | |
| 181 public void setSort(Sort sort) { | |
| 182 this.sort = sort; | |
| 183 } | |
| 184 | |
| 185 public void setFilter(Filter filter) { | |
| 186 this.filter = filter; | |
| 187 } | |
| 188 | |
| 189 public void setDateRange(Date from, Date to) { | |
| 190 if( sort != SORT_BY_DATE ) | |
| 191 throw new UnsupportedOperationException(); | |
| 192 this.from = from; | |
| 193 this.to = to; | |
| 194 } | |
| 195 | |
| 196 public NodeSearcher build() { | |
| 197 return new NodeSearcher(this); | |
| 198 } | |
| 199 } | |
| 200 | |
| 201 private final SiteImpl site; | |
| 202 private final BooleanQuery query; | |
| 203 private final Query textQuery; | |
| 204 private final Sort sort; | |
| 205 private final Filter filter; | |
| 206 private final Date from; | |
| 207 private final Date to; | |
| 208 private Set<String> searchTerms = null; | |
| 209 private int totalHits = -1; | |
| 210 private final QueryScorer scorer; | |
| 211 | |
| 212 private NodeSearcher(Builder builder) { | |
| 213 this.site = builder.site; | |
| 214 this.query = builder.getQuery(); | |
| 215 this.textQuery = builder.textQuery; | |
| 216 this.sort = builder.sort; | |
| 217 this.filter = builder.filter; | |
| 218 this.from = builder.from; | |
| 219 this.to = builder.to; | |
| 220 this.scorer = new QueryScorer(query); | |
| 221 } | |
| 222 | |
| 223 public BooleanQuery getQuery() { | |
| 224 return query; | |
| 225 } | |
| 226 | |
| 227 static Query parse(String line, String[] fields) throws ParseException { | |
| 228 if( line == null || line.length() == 0 ) | |
| 229 return null; | |
| 230 line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation | |
| 231 MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer); | |
| 232 parser.setDefaultOperator(QueryParser.AND_OPERATOR); | |
| 233 return parser.parse(line); | |
| 234 } | |
| 235 | |
| 236 public String toString() { | |
| 237 return query.toString(); | |
| 238 } | |
| 239 | |
| 240 public Set<String> getSearchTerms() { | |
| 241 if( searchTerms==null ) { | |
| 242 searchTerms = new HashSet<String>(); | |
| 243 if( textQuery != null ) | |
| 244 searchTerms(searchTerms,textQuery); | |
| 245 } | |
| 246 return searchTerms; | |
| 247 } | |
| 248 | |
| 249 private static void searchTerms(Set<String> searchTerms,Query query) { | |
| 250 if( query instanceof BooleanQuery ) { | |
| 251 BooleanQuery q = (BooleanQuery)query; | |
| 252 BooleanClause[] clauses = q.getClauses(); | |
| 253 for (BooleanClause clause : clauses) { | |
| 254 if (!clause.isProhibited()) | |
| 255 searchTerms(searchTerms, clause.getQuery()); | |
| 256 } | |
| 257 } else if( query instanceof TermQuery ) { | |
| 258 TermQuery q = (TermQuery)query; | |
| 259 searchTerms.add( q.getTerm().text() ); | |
| 260 } else if( query instanceof PhraseQuery ) { | |
| 261 PhraseQuery q = (PhraseQuery)query; | |
| 262 Term[] terms = q.getTerms(); | |
| 263 for (Term term : terms) { | |
| 264 searchTerms.add(term.text()); | |
| 265 } | |
| 266 } | |
| 267 } | |
| 268 | |
| 269 public String highlight(String text,String pre,String post) { | |
| 270 try { | |
| 271 Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer ); | |
| 272 hl.setTextFragmenter( new NullFragmenter() ); | |
| 273 String s = hl.getBestFragment(Lucene.analyzer,null,text); | |
| 274 return s != null ? s : text; | |
| 275 } catch(IOException e) { | |
| 276 throw new RuntimeException(e); | |
| 277 } catch(InvalidTokenOffsetsException e) { | |
| 278 throw new RuntimeException(e); | |
| 279 } | |
| 280 } | |
| 281 | |
| 282 public static String getStartingFragment(String text,int size,String dotdotdot) { | |
| 283 if (text.length() <= size) return text; | |
| 284 int end = text.lastIndexOf(' ', size); | |
| 285 if (end < 0) end = size; | |
| 286 String fragment = text.substring(0, end); | |
| 287 if (dotdotdot != null && fragment.length() < text.length()) | |
| 288 fragment = fragment + dotdotdot; | |
| 289 return fragment; | |
| 290 } | |
| 291 | |
| 292 private static final Formatter nullFormatter = new Formatter() { | |
| 293 public String highlightTerm(String originalText,TokenGroup tokenGroup) { | |
| 294 return originalText; | |
| 295 } | |
| 296 }; | |
| 297 | |
| 298 public String getFragment(String text,int size,String dotdotdot) { | |
| 299 try { | |
| 300 Highlighter hl = new Highlighter(nullFormatter,scorer); | |
| 301 hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) ); | |
| 302 String s = hl.getBestFragment(Lucene.analyzer,null,text); | |
| 303 if( s == null ) | |
| 304 s = getStartingFragment(text,size,dotdotdot); | |
| 305 if( dotdotdot != null && s.length() < text.length() ) { | |
| 306 boolean atStart = text.startsWith(s); | |
| 307 boolean atEnd = text.endsWith(s); | |
| 308 if( !atStart ) | |
| 309 s = dotdotdot + s; | |
| 310 if( !atEnd ) | |
| 311 s = s + dotdotdot; | |
| 312 } | |
| 313 return s; | |
| 314 } catch(IOException e) { | |
| 315 throw new RuntimeException(e); | |
| 316 } catch(InvalidTokenOffsetsException e) { | |
| 317 throw new RuntimeException(e); | |
| 318 } | |
| 319 } | |
| 320 | |
| 321 private static class DoneException extends RuntimeException {} | |
| 322 | |
| 323 public boolean hasNodes() { | |
| 324 try { | |
| 325 LuceneSearcher searcher = Lucene.newSearcher(site); | |
| 326 try { | |
| 327 try { | |
| 328 searcher.search( query, new HitCollector() { | |
| 329 protected void process(Document doc) { | |
| 330 throw new DoneException(); | |
| 331 } | |
| 332 } ); | |
| 333 return false; | |
| 334 } catch(DoneException e) { | |
| 335 return true; | |
| 336 } | |
| 337 } finally { | |
| 338 searcher.close(); | |
| 339 } | |
| 340 } catch(IOException e) { | |
| 341 throw new RuntimeException(e); | |
| 342 } | |
| 343 } | |
| 344 | |
| 345 public interface Handler { | |
| 346 public void handle(long nodeId); | |
| 347 } | |
| 348 | |
| 349 public void forEach(final Handler h) { | |
| 350 try { | |
| 351 final LuceneSearcher searcher = Lucene.newSearcher(site); | |
| 352 try { | |
| 353 searcher.search( query, new HitCollector() { | |
| 354 protected void process(Document doc) { | |
| 355 h.handle( Lucene.getNodeId(doc) ); | |
| 356 } | |
| 357 } ); | |
| 358 } finally { | |
| 359 searcher.close(); | |
| 360 } | |
| 361 } catch(IOException e) { | |
| 362 throw new RuntimeException(e); | |
| 363 } | |
| 364 } | |
| 365 | |
| 366 public int getTotalHits() { | |
| 367 if( totalHits == -1 ) { | |
| 368 try { | |
| 369 LuceneSearcher searcher = Lucene.newSearcher(site); | |
| 370 try { | |
| 371 TopDocs hits = searcher.search(query, filter, 0); | |
| 372 totalHits = hits.totalHits; | |
| 373 } finally { | |
| 374 searcher.close(); | |
| 375 } | |
| 376 } catch (BooleanQuery.TooManyClauses e) { | |
| 377 throw new RuntimeException("Your search will give too many matches."); | |
| 378 } catch(IOException e) { | |
| 379 throw new RuntimeException(e); | |
| 380 } | |
| 381 } | |
| 382 return totalHits; | |
| 383 } | |
| 384 | |
| 385 public List<Node> getNodes(int i, int n) throws TooManyClauses { | |
| 386 try { | |
| 387 LuceneSearcher searcher = Lucene.newSearcher(site); | |
| 388 try { | |
| 389 TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort); | |
| 390 totalHits = hits.totalHits; | |
| 391 int lim = hits.scoreDocs.length; | |
| 392 if( lim <= i ) | |
| 393 return Collections.emptyList(); | |
| 394 List<Node> a = new ArrayList<Node>(); | |
| 395 for (int j=i; j<lim; j++) { | |
| 396 try { | |
| 397 int docId = hits.scoreDocs[j].doc; | |
| 398 Node node = Lucene.getNode(site, searcher, docId); | |
| 399 if (node != null) { | |
| 400 a.add(node); | |
| 401 } | |
| 402 } catch(IOException e) { | |
| 403 logger.error(e.toString()); | |
| 404 } | |
| 405 } | |
| 406 return a; | |
| 407 } finally { | |
| 408 searcher.close(); | |
| 409 } | |
| 410 } catch (BooleanQuery.TooManyClauses e) { | |
| 411 throw new TooManyClauses(e); | |
| 412 } catch (IOException e) { | |
| 413 throw new RuntimeException(e); | |
| 414 } | |
| 415 } | |
| 416 | |
| 417 public static final class TooManyClauses extends RuntimeException { | |
| 418 TooManyClauses(BooleanQuery.TooManyClauses e) { | |
| 419 super(e); | |
| 420 } | |
| 421 } | |
| 422 | |
| 423 } |
