Mercurial Hosting > nabble
view src/nabble/model/NodeSearcher.java @ 62:4674ed7d56df default tip
remove n2
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sat, 30 Sep 2023 20:25:29 -0600 |
parents | 72765b66e2c3 |
children |
line wrap: on
line source
package nabble.model; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.snowball.SnowballAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.NumberTools; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.MultiFieldQueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.ConstantScoreQuery; import org.apache.lucene.search.Filter; import nabble.model.lucene.HitCollector; import nabble.model.lucene.LuceneSearcher; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryWrapperFilter; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.NullFragmenter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleHTMLFormatter; import org.apache.lucene.search.highlight.SimpleSpanFragmenter; import org.apache.lucene.search.highlight.TokenGroup; import org.apache.lucene.search.highlight.InvalidTokenOffsetsException; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashSet; import java.util.List; import java.util.Set; public final class NodeSearcher { private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class); public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT)); public static class Builder { private static final String[] nodeSearchFields = new String[]{ Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD }; private final SiteImpl site; private final BooleanQuery query = new BooleanQuery(); private Query textQuery = null; private boolean isAuthenticated = false; private final long nodeId; private User currentUser; private String userSearchId = null; private Sort sort = null; private Filter filter = null; private Date from = null; private Date to = null; public Builder(Node node) { this(node.getSite(),node.getId()); } public Builder(Site site,long nodeId) { if( nodeId == 0L ) throw new RuntimeException(); this.site = (SiteImpl)site; this.nodeId = nodeId; Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId))); query.add(query2,BooleanClause.Occur.MUST); } public void setCurrentUser(User user) { this.isAuthenticated = true; this.currentUser = user; } private BooleanQuery getQuery() { if( !isAuthenticated ) return query; if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) ) return query; BooleanQuery q = new BooleanQuery(); q.add(query, BooleanClause.Occur.MUST); if( currentUser != null ) { NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST); return q; } q.add(publicQuery, BooleanClause.Occur.MUST); return q; } public void addQuery(Query query2) { query.add(query2,BooleanClause.Occur.MUST); } public void addLine(String line) throws ParseException { if( textQuery != null ) throw new RuntimeException(); textQuery = parse(line,nodeSearchFields); if( textQuery != null ) query.add(textQuery,BooleanClause.Occur.MUST); } public void addUser(Person user) { if( user==null ) return; addUser(user.getSearchId()); } public void addUser(String userSearchId) { this.userSearchId = userSearchId; Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId)); query.add(query2,BooleanClause.Occur.MUST); } public void addUsers(List<? extends Person> visitors) { if (visitors != null && visitors.size() > 0) { BooleanQuery usersClause = new BooleanQuery(); for (Person v : visitors) { Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId())); usersClause.add(q, BooleanClause.Occur.SHOULD); } query.add(usersClause, BooleanClause.Occur.MUST); } } void addExcludeUser(String userSearchId) { BooleanClause excludeUserClause = new BooleanClause( new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)), BooleanClause.Occur.MUST_NOT); query.add(excludeUserClause); } public void setUserSearchId(String userSearchId) { this.userSearchId = userSearchId; } private final static Query appQuery = new ConstantScoreQuery( new CachingWrapperFilter( new QueryWrapperFilter( new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString())) ) ) ) ; public void addNodeKind(Node.Kind kind) { query.add(appQuery, kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT); } private final static Query publicQuery = new ConstantScoreQuery( new CachingWrapperFilter( new QueryWrapperFilter( new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none")) ) ) ) ; public void excludePrivate() { query.add(publicQuery,BooleanClause.Occur.MUST); } public void setSort(Sort sort) { this.sort = sort; } public void setFilter(Filter filter) { this.filter = filter; } public void setDateRange(Date from, Date to) { if( sort != SORT_BY_DATE ) throw new UnsupportedOperationException(); this.from = from; this.to = to; } public NodeSearcher build() { return new NodeSearcher(this); } } private final SiteImpl site; private final BooleanQuery query; private final Query textQuery; private final Sort sort; private final Filter filter; private final Date from; private final Date to; private Set<String> searchTerms = null; private int totalHits = -1; private final QueryScorer scorer; private NodeSearcher(Builder builder) { this.site = builder.site; this.query = builder.getQuery(); this.textQuery = builder.textQuery; this.sort = builder.sort; this.filter = builder.filter; this.from = builder.from; this.to = builder.to; this.scorer = new QueryScorer(query); } public BooleanQuery getQuery() { return query; } static Query parse(String line, String[] fields) throws ParseException { if( line == null || line.length() == 0 ) return null; line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer); parser.setDefaultOperator(QueryParser.AND_OPERATOR); return parser.parse(line); } public String toString() { return query.toString(); } public Set<String> getSearchTerms() { if( searchTerms==null ) { searchTerms = new HashSet<String>(); if( textQuery != null ) searchTerms(searchTerms,textQuery); } return searchTerms; } private static void searchTerms(Set<String> searchTerms,Query query) { if( query instanceof BooleanQuery ) { BooleanQuery q = (BooleanQuery)query; BooleanClause[] clauses = q.getClauses(); for (BooleanClause clause : clauses) { if (!clause.isProhibited()) searchTerms(searchTerms, clause.getQuery()); } } else if( query instanceof TermQuery ) { TermQuery q = (TermQuery)query; searchTerms.add( q.getTerm().text() ); } else if( query instanceof PhraseQuery ) { PhraseQuery q = (PhraseQuery)query; Term[] terms = q.getTerms(); for (Term term : terms) { searchTerms.add(term.text()); } } } public String highlight(String text,String pre,String post) { try { Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer ); hl.setTextFragmenter( new NullFragmenter() ); String s = hl.getBestFragment(Lucene.analyzer,null,text); return s != null ? s : text; } catch(IOException e) { throw new RuntimeException(e); } catch(InvalidTokenOffsetsException e) { throw new RuntimeException(e); } } public static String getStartingFragment(String text,int size,String dotdotdot) { if (text.length() <= size) return text; int end = text.lastIndexOf(' ', size); if (end < 0) end = size; String fragment = text.substring(0, end); if (dotdotdot != null && fragment.length() < text.length()) fragment = fragment + dotdotdot; return fragment; } private static final Formatter nullFormatter = new Formatter() { public String highlightTerm(String originalText,TokenGroup tokenGroup) { return originalText; } }; public String getFragment(String text,int size,String dotdotdot) { try { Highlighter hl = new Highlighter(nullFormatter,scorer); hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) ); String s = hl.getBestFragment(Lucene.analyzer,null,text); if( s == null ) s = getStartingFragment(text,size,dotdotdot); if( dotdotdot != null && s.length() < text.length() ) { boolean atStart = text.startsWith(s); boolean atEnd = text.endsWith(s); if( !atStart ) s = dotdotdot + s; if( !atEnd ) s = s + dotdotdot; } return s; } catch(IOException e) { throw new RuntimeException(e); } catch(InvalidTokenOffsetsException e) { throw new RuntimeException(e); } } private static class DoneException extends RuntimeException {} public boolean hasNodes() { try { LuceneSearcher searcher = Lucene.newSearcher(site); try { try { searcher.search( query, new HitCollector() { protected void process(Document doc) { throw new DoneException(); } } ); return false; } catch(DoneException e) { return true; } } finally { searcher.close(); } } catch(IOException e) { throw new RuntimeException(e); } } public interface Handler { public void handle(long nodeId); } public void forEach(final Handler h) { try { final LuceneSearcher searcher = Lucene.newSearcher(site); try { searcher.search( query, new HitCollector() { protected void process(Document doc) { h.handle( Lucene.getNodeId(doc) ); } } ); } finally { searcher.close(); } } catch(IOException e) { throw new RuntimeException(e); } } public int getTotalHits() { if( totalHits == -1 ) { try { LuceneSearcher searcher = Lucene.newSearcher(site); try { TopDocs hits = searcher.search(query, filter, 0); totalHits = hits.totalHits; } finally { searcher.close(); } } catch (BooleanQuery.TooManyClauses e) { throw new RuntimeException("Your search will give too many matches."); } catch(IOException e) { throw new RuntimeException(e); } } return totalHits; } public List<Node> getNodes(int i, int n) throws TooManyClauses { try { LuceneSearcher searcher = Lucene.newSearcher(site); try { TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort); totalHits = hits.totalHits; int lim = hits.scoreDocs.length; if( lim <= i ) return Collections.emptyList(); List<Node> a = new ArrayList<Node>(); for (int j=i; j<lim; j++) { try { int docId = hits.scoreDocs[j].doc; Node node = Lucene.getNode(site, searcher, docId); if (node != null) { a.add(node); } } catch(IOException e) { logger.error(e.toString()); } } return a; } finally { searcher.close(); } } catch (BooleanQuery.TooManyClauses e) { throw new TooManyClauses(e); } catch (IOException e) { throw new RuntimeException(e); } } public static final class TooManyClauses extends RuntimeException { TooManyClauses(BooleanQuery.TooManyClauses e) { super(e); } } }