Mercurial Hosting > nabble
diff src/nabble/model/NodeSearcher.java @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children | 72765b66e2c3 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nabble/model/NodeSearcher.java Thu Mar 21 19:15:52 2019 -0600 @@ -0,0 +1,423 @@ +package nabble.model; + +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.Token; +import org.apache.lucene.analysis.TokenFilter; +import org.apache.lucene.analysis.TokenStream; +import org.apache.lucene.analysis.snowball.SnowballAnalyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.NumberTools; +import org.apache.lucene.index.Term; +import org.apache.lucene.queryParser.MultiFieldQueryParser; +import org.apache.lucene.queryParser.ParseException; +import org.apache.lucene.queryParser.QueryParser; +import org.apache.lucene.search.BooleanClause; +import org.apache.lucene.search.BooleanQuery; +import org.apache.lucene.search.CachingWrapperFilter; +import org.apache.lucene.search.ConstantScoreQuery; +import org.apache.lucene.search.Filter; +import nabble.model.lucene.HitCollector; +import nabble.model.lucene.LuceneSearcher; +import org.apache.lucene.search.PhraseQuery; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.QueryWrapperFilter; +import org.apache.lucene.search.Sort; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.TermQuery; +import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.highlight.Formatter; +import org.apache.lucene.search.highlight.Highlighter; +import org.apache.lucene.search.highlight.NullFragmenter; +import org.apache.lucene.search.highlight.QueryScorer; +import org.apache.lucene.search.highlight.SimpleHTMLFormatter; +import org.apache.lucene.search.highlight.SimpleSpanFragmenter; +import org.apache.lucene.search.highlight.TokenGroup; +import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; +import org.apache.lucene.util.Version; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.StringReader; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + + +public final class NodeSearcher { + private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class); + + public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT)); + + public static class Builder { + private static final String[] nodeSearchFields = new String[]{ + Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD + }; + + private final SiteImpl site; + private final BooleanQuery query = new BooleanQuery(); + private Query textQuery = null; + private boolean isAuthenticated = false; + private final long nodeId; + private User currentUser; + private String userSearchId = null; + private Sort sort = null; + private Filter filter = null; + private Date from = null; + private Date to = null; + + public Builder(Node node) { + this(node.getSite(),node.getId()); + } + + public Builder(Site site,long nodeId) { + if( nodeId == 0L ) + throw new RuntimeException(); + this.site = (SiteImpl)site; + this.nodeId = nodeId; + Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId))); + query.add(query2,BooleanClause.Occur.MUST); + } + + public void setCurrentUser(User user) { + this.isAuthenticated = true; + this.currentUser = user; + } + + private BooleanQuery getQuery() { + if( !isAuthenticated ) + return query; + if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) ) + return query; + BooleanQuery q = new BooleanQuery(); + q.add(query, BooleanClause.Occur.MUST); + if( currentUser != null ) { + NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); + q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST); + return q; + } + q.add(publicQuery, BooleanClause.Occur.MUST); + return q; + } + + public void addQuery(Query query2) { + query.add(query2,BooleanClause.Occur.MUST); + } + + public void addLine(String line) throws ParseException { + if( textQuery != null ) + throw new RuntimeException(); + textQuery = parse(line,nodeSearchFields); + if( textQuery != null ) + query.add(textQuery,BooleanClause.Occur.MUST); + } + + public void addUser(Person user) { + if( user==null ) + return; + addUser(user.getSearchId()); + } + + public void addUser(String userSearchId) { + this.userSearchId = userSearchId; + Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId)); + query.add(query2,BooleanClause.Occur.MUST); + } + + public void addUsers(List<? extends Person> visitors) { + if (visitors != null && visitors.size() > 0) { + BooleanQuery usersClause = new BooleanQuery(); + for (Person v : visitors) { + Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId())); + usersClause.add(q, BooleanClause.Occur.SHOULD); + } + query.add(usersClause, BooleanClause.Occur.MUST); + } + } + + void addExcludeUser(String userSearchId) { + BooleanClause excludeUserClause = new BooleanClause( + new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)), + BooleanClause.Occur.MUST_NOT); + query.add(excludeUserClause); + } + + public void setUserSearchId(String userSearchId) { + this.userSearchId = userSearchId; + } + + private final static Query appQuery = + new ConstantScoreQuery( + new CachingWrapperFilter( + new QueryWrapperFilter( + new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString())) + ) + ) + ) + ; + + public void addNodeKind(Node.Kind kind) { + query.add(appQuery, + kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT); + } + + private final static Query publicQuery = + new ConstantScoreQuery( + new CachingWrapperFilter( + new QueryWrapperFilter( + new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none")) + ) + ) + ) + ; + + public void excludePrivate() { + query.add(publicQuery,BooleanClause.Occur.MUST); + } + + public void setSort(Sort sort) { + this.sort = sort; + } + + public void setFilter(Filter filter) { + this.filter = filter; + } + + public void setDateRange(Date from, Date to) { + if( sort != SORT_BY_DATE ) + throw new UnsupportedOperationException(); + this.from = from; + this.to = to; + } + + public NodeSearcher build() { + return new NodeSearcher(this); + } + } + + private final SiteImpl site; + private final BooleanQuery query; + private final Query textQuery; + private final Sort sort; + private final Filter filter; + private final Date from; + private final Date to; + private Set<String> searchTerms = null; + private int totalHits = -1; + private final QueryScorer scorer; + + private NodeSearcher(Builder builder) { + this.site = builder.site; + this.query = builder.getQuery(); + this.textQuery = builder.textQuery; + this.sort = builder.sort; + this.filter = builder.filter; + this.from = builder.from; + this.to = builder.to; + this.scorer = new QueryScorer(query); + } + + public BooleanQuery getQuery() { + return query; + } + + static Query parse(String line, String[] fields) throws ParseException { + if( line == null || line.length() == 0 ) + return null; + line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation + MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer); + parser.setDefaultOperator(QueryParser.AND_OPERATOR); + return parser.parse(line); + } + + public String toString() { + return query.toString(); + } + + public Set<String> getSearchTerms() { + if( searchTerms==null ) { + searchTerms = new HashSet<String>(); + if( textQuery != null ) + searchTerms(searchTerms,textQuery); + } + return searchTerms; + } + + private static void searchTerms(Set<String> searchTerms,Query query) { + if( query instanceof BooleanQuery ) { + BooleanQuery q = (BooleanQuery)query; + BooleanClause[] clauses = q.getClauses(); + for (BooleanClause clause : clauses) { + if (!clause.isProhibited()) + searchTerms(searchTerms, clause.getQuery()); + } + } else if( query instanceof TermQuery ) { + TermQuery q = (TermQuery)query; + searchTerms.add( q.getTerm().text() ); + } else if( query instanceof PhraseQuery ) { + PhraseQuery q = (PhraseQuery)query; + Term[] terms = q.getTerms(); + for (Term term : terms) { + searchTerms.add(term.text()); + } + } + } + + public String highlight(String text,String pre,String post) { + try { + Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer ); + hl.setTextFragmenter( new NullFragmenter() ); + String s = hl.getBestFragment(Lucene.analyzer,null,text); + return s != null ? s : text; + } catch(IOException e) { + throw new RuntimeException(e); + } catch(InvalidTokenOffsetsException e) { + throw new RuntimeException(e); + } + } + + public static String getStartingFragment(String text,int size,String dotdotdot) { + if (text.length() <= size) return text; + int end = text.lastIndexOf(' ', size); + if (end < 0) end = size; + String fragment = text.substring(0, end); + if (dotdotdot != null && fragment.length() < text.length()) + fragment = fragment + dotdotdot; + return fragment; + } + + private static final Formatter nullFormatter = new Formatter() { + public String highlightTerm(String originalText,TokenGroup tokenGroup) { + return originalText; + } + }; + + public String getFragment(String text,int size,String dotdotdot) { + try { + Highlighter hl = new Highlighter(nullFormatter,scorer); + hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) ); + String s = hl.getBestFragment(Lucene.analyzer,null,text); + if( s == null ) + s = getStartingFragment(text,size,dotdotdot); + if( dotdotdot != null && s.length() < text.length() ) { + boolean atStart = text.startsWith(s); + boolean atEnd = text.endsWith(s); + if( !atStart ) + s = dotdotdot + s; + if( !atEnd ) + s = s + dotdotdot; + } + return s; + } catch(IOException e) { + throw new RuntimeException(e); + } catch(InvalidTokenOffsetsException e) { + throw new RuntimeException(e); + } + } + + private static class DoneException extends RuntimeException {} + + public boolean hasNodes() { + try { + LuceneSearcher searcher = Lucene.newSearcher(site); + try { + try { + searcher.search( query, new HitCollector() { + protected void process(Document doc) { + throw new DoneException(); + } + } ); + return false; + } catch(DoneException e) { + return true; + } + } finally { + searcher.close(); + } + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public interface Handler { + public void handle(long nodeId); + } + + public void forEach(final Handler h) { + try { + final LuceneSearcher searcher = Lucene.newSearcher(site); + try { + searcher.search( query, new HitCollector() { + protected void process(Document doc) { + h.handle( Lucene.getNodeId(doc) ); + } + } ); + } finally { + searcher.close(); + } + } catch(IOException e) { + throw new RuntimeException(e); + } + } + + public int getTotalHits() { + if( totalHits == -1 ) { + try { + LuceneSearcher searcher = Lucene.newSearcher(site); + try { + TopDocs hits = searcher.search(query, filter, 0); + totalHits = hits.totalHits; + } finally { + searcher.close(); + } + } catch (BooleanQuery.TooManyClauses e) { + throw new RuntimeException("Your search will give too many matches."); + } catch(IOException e) { + throw new RuntimeException(e); + } + } + return totalHits; + } + + public List<Node> getNodes(int i, int n) throws TooManyClauses { + try { + LuceneSearcher searcher = Lucene.newSearcher(site); + try { + TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort); + totalHits = hits.totalHits; + int lim = hits.scoreDocs.length; + if( lim <= i ) + return Collections.emptyList(); + List<Node> a = new ArrayList<Node>(); + for (int j=i; j<lim; j++) { + try { + int docId = hits.scoreDocs[j].doc; + Node node = Lucene.getNode(site, searcher, docId); + if (node != null) { + a.add(node); + } + } catch(IOException e) { + logger.error(e.toString()); + } + } + return a; + } finally { + searcher.close(); + } + } catch (BooleanQuery.TooManyClauses e) { + throw new TooManyClauses(e); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + public static final class TooManyClauses extends RuntimeException { + TooManyClauses(BooleanQuery.TooManyClauses e) { + super(e); + } + } + +}