diff src/nabble/model/NodeSearcher.java @ 0:7ecd1a4ef557

add content
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 21 Mar 2019 19:15:52 -0600
parents
children 72765b66e2c3
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nabble/model/NodeSearcher.java	Thu Mar 21 19:15:52 2019 -0600
@@ -0,0 +1,423 @@
+package nabble.model;
+
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.snowball.SnowballAnalyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.NumberTools;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.queryParser.MultiFieldQueryParser;
+import org.apache.lucene.queryParser.ParseException;
+import org.apache.lucene.queryParser.QueryParser;
+import org.apache.lucene.search.BooleanClause;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.CachingWrapperFilter;
+import org.apache.lucene.search.ConstantScoreQuery;
+import org.apache.lucene.search.Filter;
+import nabble.model.lucene.HitCollector;
+import nabble.model.lucene.LuceneSearcher;
+import org.apache.lucene.search.PhraseQuery;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.QueryWrapperFilter;
+import org.apache.lucene.search.Sort;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.TermQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.highlight.Formatter;
+import org.apache.lucene.search.highlight.Highlighter;
+import org.apache.lucene.search.highlight.NullFragmenter;
+import org.apache.lucene.search.highlight.QueryScorer;
+import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
+import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
+import org.apache.lucene.search.highlight.TokenGroup;
+import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
+import org.apache.lucene.util.Version;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.Date;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+
+public final class NodeSearcher {
+	private static final Logger logger = LoggerFactory.getLogger(NodeSearcher.class);
+
+	public static final Sort SORT_BY_DATE = new Sort(new SortField(Lucene.DATE_FLD, SortField.INT));
+
+	public static class Builder {
+		private static final String[] nodeSearchFields = new String[]{
+			Lucene.SUBJECT_FLD, Lucene.MESSAGE_FLD, Lucene.AUTHOR_FLD, Lucene.MAILING_LIST_FLD
+		};
+
+		private final SiteImpl site;
+		private final BooleanQuery query = new BooleanQuery();
+		private Query textQuery = null;
+		private boolean isAuthenticated = false;
+		private final long nodeId;
+		private User currentUser;
+		private String userSearchId = null;
+		private Sort sort = null;
+		private Filter filter = null;
+		private Date from = null;
+		private Date to = null;
+
+		public Builder(Node node) {
+			this(node.getSite(),node.getId());
+		}
+	
+		public Builder(Site site,long nodeId) {
+			if( nodeId == 0L )
+				throw new RuntimeException();
+			this.site = (SiteImpl)site;
+			this.nodeId = nodeId;
+			Query query2 = new TermQuery(new Term(Lucene.ANCESTORS_FLD,Long.toString(nodeId)));
+			query.add(query2,BooleanClause.Occur.MUST);
+		}
+
+		public void setCurrentUser(User user) {
+			this.isAuthenticated = true;
+			this.currentUser = user;
+		}
+
+		private BooleanQuery getQuery() {
+			if( !isAuthenticated )
+				return query;
+			if( currentUser!=null && currentUser.getSearchId().equals(userSearchId) )
+				return query;
+			BooleanQuery q = new BooleanQuery();
+			q.add(query, BooleanClause.Occur.MUST);
+			if( currentUser != null ) {
+				NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId);
+				q.add(new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD, Lucene.formatPrivateNode(node))), BooleanClause.Occur.MUST);
+				return q;
+			}
+			q.add(publicQuery, BooleanClause.Occur.MUST);
+			return q;
+		}
+
+		public void addQuery(Query query2) {
+			query.add(query2,BooleanClause.Occur.MUST);
+		}
+		
+		public void addLine(String line) throws ParseException {
+			if( textQuery != null )
+				throw new RuntimeException();
+			textQuery = parse(line,nodeSearchFields);
+			if( textQuery != null )
+				query.add(textQuery,BooleanClause.Occur.MUST);
+		}
+
+		public void addUser(Person user) {
+			if( user==null )
+				return;
+			addUser(user.getSearchId());
+		}
+	
+		public void addUser(String userSearchId) {
+			this.userSearchId = userSearchId;
+			Query query2 = new TermQuery(new Term(Lucene.USER_ID_FLD,userSearchId));
+			query.add(query2,BooleanClause.Occur.MUST);
+		}
+
+		public void addUsers(List<? extends Person> visitors) {
+			if (visitors != null && visitors.size() > 0) {
+				BooleanQuery usersClause = new BooleanQuery();
+				for (Person v : visitors) {
+					Query q = new TermQuery(new Term(Lucene.USER_ID_FLD,v.getSearchId()));
+					usersClause.add(q, BooleanClause.Occur.SHOULD);
+				}
+				query.add(usersClause, BooleanClause.Occur.MUST);
+			}
+		}
+	
+		void addExcludeUser(String userSearchId) {
+			BooleanClause excludeUserClause = new BooleanClause(
+					new TermQuery(new Term(Lucene.USER_ID_FLD, userSearchId)),
+					BooleanClause.Occur.MUST_NOT);
+			query.add(excludeUserClause);
+		}
+
+		public void setUserSearchId(String userSearchId) {
+			this.userSearchId = userSearchId;
+		}
+
+		private final static Query appQuery =
+			new ConstantScoreQuery(
+				new CachingWrapperFilter(
+					new QueryWrapperFilter(
+						new TermQuery(new Term(Lucene.KIND_FLD,Node.Kind.APP.toString()))
+					)
+				)
+			)
+		;
+		
+		public void addNodeKind(Node.Kind kind) {
+			query.add(appQuery,
+					kind==Node.Kind.APP?BooleanClause.Occur.MUST:BooleanClause.Occur.MUST_NOT);
+		}
+	
+		private final static Query publicQuery =
+			new ConstantScoreQuery(
+				new CachingWrapperFilter(
+					new QueryWrapperFilter(
+						new TermQuery(new Term(Lucene.PRIVATE_NODE_FLD,"none"))
+					)
+				)
+			)
+		;
+	
+		public void excludePrivate() {
+			query.add(publicQuery,BooleanClause.Occur.MUST);
+		}
+
+		public void setSort(Sort sort) {
+			this.sort = sort;
+		}
+	
+		public void setFilter(Filter filter) {
+			this.filter = filter;
+		}
+	
+		public void setDateRange(Date from, Date to) {
+			if( sort != SORT_BY_DATE )
+				throw new UnsupportedOperationException();
+			this.from = from;
+			this.to = to;
+		}
+
+		public NodeSearcher build() {
+			return new NodeSearcher(this);
+		}
+	}
+
+	private final SiteImpl site;
+	private final BooleanQuery query;
+	private final Query textQuery;
+	private final Sort sort;
+	private final Filter filter;
+	private final Date from;
+	private final Date to;
+	private Set<String> searchTerms = null;
+	private int totalHits = -1;
+	private final QueryScorer scorer;
+
+	private NodeSearcher(Builder builder) {
+		this.site = builder.site;
+		this.query = builder.getQuery();
+		this.textQuery = builder.textQuery;
+		this.sort = builder.sort;
+		this.filter = builder.filter;
+		this.from = builder.from;
+		this.to = builder.to;
+		this.scorer = new QueryScorer(query);
+	}
+
+	public BooleanQuery getQuery() {
+		return query;
+	}
+	
+	static Query parse(String line, String[] fields) throws ParseException {
+		if( line == null || line.length() == 0 )
+			return null;
+		line = line.replace('[','|').replace(']','|'); // hack - treat [] as punctuation
+		MultiFieldQueryParser parser = new MultiFieldQueryParser(Version.LUCENE_CURRENT,fields, Lucene.analyzer);
+		parser.setDefaultOperator(QueryParser.AND_OPERATOR);
+		return parser.parse(line);
+	}
+	
+	public String toString() {
+		return query.toString();
+	}
+
+	public Set<String> getSearchTerms() {
+		if( searchTerms==null ) {
+			searchTerms = new HashSet<String>();
+			if( textQuery != null )
+				searchTerms(searchTerms,textQuery);
+		}
+		return searchTerms;
+	}
+
+	private static void searchTerms(Set<String> searchTerms,Query query) {
+		if( query instanceof BooleanQuery ) {
+			BooleanQuery q = (BooleanQuery)query;
+			BooleanClause[] clauses = q.getClauses();
+			for (BooleanClause clause : clauses) {
+				if (!clause.isProhibited())
+					searchTerms(searchTerms, clause.getQuery());
+			}
+		} else if( query instanceof TermQuery ) {
+			TermQuery q = (TermQuery)query;
+			searchTerms.add( q.getTerm().text() );
+		} else if( query instanceof PhraseQuery ) {
+			PhraseQuery q = (PhraseQuery)query;
+			Term[] terms = q.getTerms();
+			for (Term term : terms) {
+				searchTerms.add(term.text());
+			}
+		} 
+	}
+
+	public String highlight(String text,String pre,String post) {
+		try {
+			Highlighter hl = new Highlighter( new SimpleHTMLFormatter(pre,post), scorer );
+			hl.setTextFragmenter( new NullFragmenter() );
+			String s = hl.getBestFragment(Lucene.analyzer,null,text);
+			return s != null ? s : text;
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		} catch(InvalidTokenOffsetsException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public static String getStartingFragment(String text,int size,String dotdotdot) {
+		if (text.length() <= size) return text;
+		int end = text.lastIndexOf(' ', size);
+		if (end < 0) end = size;
+		String fragment = text.substring(0, end);
+		if (dotdotdot != null && fragment.length() < text.length()) 
+			fragment = fragment + dotdotdot;
+		return fragment;
+	}
+
+	private static final Formatter nullFormatter = new Formatter() {
+		public String highlightTerm(String originalText,TokenGroup tokenGroup) {
+			return originalText;
+		}
+	};
+
+	public String getFragment(String text,int size,String dotdotdot) {
+		try {
+			Highlighter hl = new Highlighter(nullFormatter,scorer);
+			hl.setTextFragmenter( new SimpleSpanFragmenter(scorer,size) );
+			String s = hl.getBestFragment(Lucene.analyzer,null,text);
+			if( s == null )
+				s = getStartingFragment(text,size,dotdotdot);
+			if( dotdotdot != null && s.length() < text.length() ) {
+				boolean atStart = text.startsWith(s);
+				boolean atEnd = text.endsWith(s);
+				if( !atStart )
+					s = dotdotdot + s;
+				if( !atEnd )
+					s = s + dotdotdot;
+			}
+			return s;
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		} catch(InvalidTokenOffsetsException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	private static class DoneException extends RuntimeException {}
+
+	public boolean hasNodes() {
+		try {
+			LuceneSearcher searcher = Lucene.newSearcher(site);
+			try {
+				try {
+					searcher.search( query, new HitCollector() {
+						protected void process(Document doc) {
+							throw new DoneException();
+						}
+					} );
+					return false;
+				} catch(DoneException e) {
+					return true;
+				}
+			} finally {
+				searcher.close();
+			}
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public interface Handler {
+		public void handle(long nodeId);
+	}
+
+	public void forEach(final Handler h) {
+		try {
+			final LuceneSearcher searcher = Lucene.newSearcher(site);
+			try {
+				searcher.search( query, new HitCollector() {
+					protected void process(Document doc) {
+						h.handle( Lucene.getNodeId(doc) );
+					}
+				} );
+			} finally {
+				searcher.close();
+			}
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public int getTotalHits() {
+		if( totalHits == -1 ) {
+			try {
+				LuceneSearcher searcher = Lucene.newSearcher(site);
+				try {
+					TopDocs hits = searcher.search(query, filter, 0);
+					totalHits = hits.totalHits;
+				} finally {
+					searcher.close();
+				}
+			} catch (BooleanQuery.TooManyClauses e) {
+				throw new RuntimeException("Your search will give too many matches.");
+			} catch(IOException e) {
+				throw new RuntimeException(e);
+			}
+		}
+		return totalHits;
+	}
+
+	public List<Node> getNodes(int i, int n) throws TooManyClauses {
+		try {
+			LuceneSearcher searcher = Lucene.newSearcher(site);
+			try {
+				TopDocs hits = sort==null ? searcher.search(query,filter,i+n) : searcher.search(query,filter,i+n,sort);
+				totalHits = hits.totalHits;
+				int lim = hits.scoreDocs.length;
+				if( lim <= i )
+					return Collections.emptyList();
+				List<Node> a = new ArrayList<Node>();
+				for (int j=i; j<lim; j++) {
+					try {
+						int docId = hits.scoreDocs[j].doc;
+						Node node = Lucene.getNode(site, searcher, docId);
+						if (node != null) {
+							a.add(node);
+						}
+					} catch(IOException e) {
+						logger.error(e.toString());
+					}
+				}
+				return a;
+			} finally {
+				searcher.close();
+			}
+		} catch (BooleanQuery.TooManyClauses e) {
+			throw new TooManyClauses(e);
+		} catch (IOException e) {
+			throw new RuntimeException(e);
+		}
+	}
+
+	public static final class TooManyClauses extends RuntimeException {
+		TooManyClauses(BooleanQuery.TooManyClauses e) {
+			super(e);
+		}
+	}
+
+}