Mercurial Hosting > nabble
view src/nabble/model/Lucene.java @ 47:72765b66e2c3
remove mailing list code
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 18 Jun 2021 17:44:24 -0600 |
parents | abe0694e9849 |
children |
line wrap: on
line source
/* Copyright (C) 2004 Franklin Schmidt <frank@gustos.com> */ package nabble.model; import fschmidt.db.Listener; import fschmidt.util.java.CollectionUtils; import fschmidt.util.mail.MailEncodingException; import nabble.model.lucene.HitCollector; import nabble.model.lucene.IndexCache; import nabble.model.lucene.LuceneSearcher; import nabble.view.lib.Permissions; import nabble.view.lib.help.Help; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.snowball.SnowballAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanFilter; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.CachingWrapperFilter; import org.apache.lucene.search.Filter; import org.apache.lucene.search.FilterClause; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.NumericRangeFilter; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.IOException; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Collection; import java.util.Date; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; public final class Lucene { private static final Logger logger = LoggerFactory.getLogger(Lucene.class); public static interface DocumentListener { public void event(Node node,Document doc); } private static final int nodeIndexVersion = 3; private static final String NODE_ID_FLD = "nodeId"; static final String KIND_FLD = "kind"; static final String SUBJECT_FLD = "subject"; static final String MESSAGE_FLD = "message"; static final String ANCESTORS_FLD = "ancestors"; static final String PARENT_ID_FLD = "parentId"; static final String DATE_FLD = "date"; private static final String RANGE_SEARCH_DATE_FLD = "rangeSearchDate"; private static final String DAY_FLD = "day"; static final String USER_ID_FLD = "userId"; static final String AUTHOR_FLD = "author"; static final String PRIVATE_NODE_FLD = "privateNode"; static final Analyzer analyzer = new SnowballAnalyzer(Version.LUCENE_CURRENT,"English"); private static final List<DocumentListener> documentListeners = new ArrayList<DocumentListener>(); private Lucene() {} // never static LuceneSearcher newSearcher(Site site) throws IOException { return nodeIndex.openSearcher(site.getId()); } static long getNodeId(Document doc) { return Long.parseLong(doc.get(NODE_ID_FLD)); } static NodeImpl getNode(SiteImpl site, LuceneSearcher searcher, int docId) throws IOException { return getNode( site, searcher.doc(docId) ); } static NodeImpl getNode(SiteImpl site,Document doc) { long nodeId = getNodeId(doc); NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); if( node==null ) { logger.error("missing node "+nodeId+", removing from lucene"); removeNode(site,nodeId); } return node; } private static void add(final Node node) { Document doc = document(node); try { IndexWriter indexWriter = nodeIndex.openIndexWriter(node.getSite().getId()); try { indexWriter.addDocument(doc); } finally { indexWriter.close(); } } catch(IOException e) { throw new RuntimeException(e); } } /* private static void removeSite(long siteId) { try { nodeIndex.delete(siteId); } catch(IOException e) { throw new RuntimeException(e); } } */ private static void removeNode(Site site,long nodeId) { Term term = new Term(NODE_ID_FLD,Long.toString(nodeId)); try { IndexWriter indexWriter = nodeIndex.openIndexWriter(site.getId()); try { indexWriter.deleteDocuments(term); } finally { indexWriter.close(); } } catch(IOException e) { throw new RuntimeException(e); } } public static void update(final Node node) { try { Document doc = document(node); if( doc==null ) { removeNode(node.getSite(),node.getId()); } else { IndexWriter indexWriter = nodeIndex.openIndexWriter(node.getSite().getId()); try { indexWriter.updateDocument( new Term(NODE_ID_FLD,doc.get(NODE_ID_FLD)), doc ); } finally { indexWriter.close(); } } } catch (IOException e) { throw new RuntimeException(e); } } static void updateNode(SiteImpl site,long nodeId) { Node node = NodeImpl.getNode(site.siteKey,nodeId); if( node == null ) { removeNode(site,nodeId); } else { update(node); } } static { /* SiteImpl.table.getPostDeleteListeners().add(new Listener<SiteImpl>(){ public void event(SiteImpl site) { removeSite(site.getId()); } }); */ NodeImpl.postDeleteListeners.add(new Listener<NodeImpl>(){ public void event(NodeImpl node) { // remove descendants Term term = new Term(ANCESTORS_FLD,Long.toString(node.getId())); try { IndexWriter indexWriter = nodeIndex.openIndexWriter(node.siteKey.getId()); try { indexWriter.deleteDocuments(term); } finally { indexWriter.close(); } } catch(IOException e) { throw new RuntimeException(e); } } }); NodeImpl.postInsertListeners.add(new Listener<NodeImpl>(){ public void event(final NodeImpl node) { node.siteKey.getDb().runAfterCommit(new Runnable(){public void run(){ try { add(node); } catch(MailEncodingException e) { logger.warn(node.toString(),e); } }}); } }); NodeImpl.preUpdateListeners.add(new Listener<NodeImpl>(){ public void event(NodeImpl node) { Set fields = node.getDbRecord().fields().keySet(); if( CollectionUtils.intersects(fields,nodeDbFields) ) { final long nodeId = node.getId(); final SiteKey siteKey = node.siteKey; siteKey.getDb().runAfterCommit(new Runnable() { public void run() { NodeImpl node = NodeImpl.getNode(siteKey,nodeId); if (node != null) update(node); } }); } } }); } static void staleNode(NodeImpl node) throws IOException { if( node==null ) return; logger.debug("staleNode update"); updateNodes( node.getSiteImpl(), descendants(node) ); logger.debug("staleNode done"); } static void nop() {} public static void addDocumentListener(DocumentListener documentListener) { documentListeners.add(documentListener); } static Document document(Node node) { Document doc = new Document(); doc.add( new Field(NODE_ID_FLD, Long.toString(node.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS) ); doc.add( new Field(KIND_FLD, node.getKind().toString(), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) ); String subject = node.getSubject(); Field subjectFld = new Field(SUBJECT_FLD, subject, Field.Store.NO, Field.Index.ANALYZED); subjectFld.setBoost(2.0f); doc.add(subjectFld); try { String message = MessageUtils.htmlToSearchText(node.getMessage().parse()); doc.add( new Field(MESSAGE_FLD, message, Field.Store.NO, Field.Index.ANALYZED) ); } catch(RuntimeException e) { logger.error("nodeId="+node.getId(),e); } for( Node f : node.getAncestors() ) { doc.add( new Field(ANCESTORS_FLD, Long.toString(f.getId()), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) ); } Node parent = node.getParent(); if (parent != null) doc.add(new Field(PARENT_ID_FLD, Long.toString(parent.getId()), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); int date = (int)(-node.getWhenCreated().getTime()/1000); doc.add( new NumericField(DATE_FLD).setIntValue(date) ); int rangeSearchDate = formatRangeSearchDate(node.getWhenCreated()); doc.add( new NumericField(RANGE_SEARCH_DATE_FLD).setIntValue(rangeSearchDate) ); String day = formatDay(node.getWhenCreated()); doc.add( new Field(DAY_FLD, day, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) ); Person owner = node.getOwner(); String userId = owner.getSearchId(); doc.add( new Field(USER_ID_FLD, userId, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS) ); String author = owner.getName(); doc.add( new Field(AUTHOR_FLD, author, Field.Store.NO, Field.Index.ANALYZED) ); doc.add( new Field(PRIVATE_NODE_FLD, formatPrivateNode(node), Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); for( DocumentListener documentListener : documentListeners ) { documentListener.event(node,doc); } return doc; } private static final String[] nodeDbFields = {"subject", "when_created", "msg_fmt", "parent_id", "is_app", "owner_id", "cookie", "anonymous_name"}; public static void updateRecursively(Node node) { update(node); for (Node n : node.getChildren()) { updateRecursively(n); } } // from SearchServer static NodeImpl node(SiteImpl site,Document doc) { long nodeId = getNodeId(doc); NodeImpl node = NodeImpl.getNode(site.siteKey,nodeId); if (node==null) logger.error("invalid node_id in lucene index: "+nodeId); return node; } private static final IndexCache.Builder<Long> builder = new IndexCache.Builder<Long>() { public void build(Long siteId) throws SQLException, IOException { SiteKey siteKey = SiteKey.getInstance(siteId); Connection con = siteKey.getDb().getConnection(); long[] nodeIds; { Statement stmt = con.createStatement(); ResultSet rs = stmt.executeQuery( "select count(*) as n from node" ); rs.next(); nodeIds = new long[rs.getInt("n")]; rs.close(); stmt.close(); } { PreparedStatement stmt = con.prepareStatement( "select node_id from node order by node_id limit ?" ); stmt.setInt(1,nodeIds.length); ResultSet rs = stmt.executeQuery(); for( int i=0; rs.next(); i++ ) { nodeIds[i] = rs.getLong("node_id"); } rs.close(); stmt.close(); } logger.error("Lucene started - site_id = " + siteId + " / " + nodeIds.length + " nodes"); IndexWriter indexWriter = nodeIndex.openIndexWriter(siteId); int count = 0; int lastPercent = 0; try { for( long nodeId : nodeIds ) { Node node = NodeImpl.getNode(siteKey,nodeId); if( node != null ) { Document doc = document(node); indexWriter.updateDocument( new Term(NODE_ID_FLD,doc.get(NODE_ID_FLD)), doc ); } count++; int percent = Math.round(100f * count / (float) nodeIds.length); if (percent > lastPercent) { logger.error("Lucene build " + percent + "% completed"); lastPercent = percent; } } } finally { indexWriter.close(); } con.close(); } public boolean exists(String keyString) { long id; try { id = Long.parseLong(keyString); } catch(NumberFormatException e) { return false; } return SiteKey.getInstance(id).siteGlobal() != null; } }; private static final IndexCache<Long> nodeIndex; static { logger.info("Starting search server"); Init.luceneStarted = true; String homeDir = (String)Init.get("home_dir"); String luceneDir = homeDir + "local/lucene/"; File dirFile = new File(luceneDir); nodeIndex = new IndexCache<Long>(dirFile,analyzer,nodeIndexVersion,builder); } private static void updateNodes(final SiteImpl site,Query query) { try { final LuceneSearcher searcher = newSearcher(site); try { searcher.search(query,new HitCollector() { protected void process(Document doc) { Node node = getNode(site,doc); if( node != null ) update(node); } }); } finally { searcher.close(); } } catch(IOException e) { throw new RuntimeException(e); } } public static boolean isReady(Site site) { return nodeIndex.isReady(site.getId()); } public static void rebuild(Site site) throws IOException { nodeIndex.rebuild(site.getId()); } static synchronized void shutdown() { nodeIndex.shutdown(); } private static final long tenMinutes = 1000L*60*10; static int formatRangeSearchDate(Date date) { return (int)(date.getTime()/tenMinutes); } private static final DateFormat dayFormat = new SimpleDateFormat("yyyyMMdd"); static String formatDay(Date date) { synchronized(dayFormat) { return dayFormat.format(date); } } static String formatPrivateNode(Node node) { Node privateNode = Permissions.getPrivateNodeForSearch(node); return privateNode==null ? "none" : Long.toString(privateNode.getId()); } public static Filter and(Filter f1,Filter f2) { BooleanFilter f = new BooleanFilter(); f.add(new FilterClause(f1,BooleanClause.Occur.MUST)); f.add(new FilterClause(f2,BooleanClause.Occur.MUST)); return f; } public static Filter getRangeFilter(Date from, Date to) { Integer lowerDateTerm = (from==null)?null:formatRangeSearchDate(from); Integer upperDateTerm = (to==null)?null:formatRangeSearchDate(to); return NumericRangeFilter.newIntRange(RANGE_SEARCH_DATE_FLD, lowerDateTerm, upperDateTerm, true,true); } private static final int maxCachedFilters = Init.get("maxCachedFilters", 20); private static Map<Filter,CachingWrapperFilter> filterCache = new LinkedHashMap<Filter,CachingWrapperFilter>() { protected boolean removeEldestEntry(Map.Entry eldest) { return size() > maxCachedFilters; } }; public static synchronized CachingWrapperFilter getCachedFilter(Filter filter) { CachingWrapperFilter f = filterCache.get(filter); if( f == null ) { f = new CachingWrapperFilter(filter); filterCache.put(filter,f); } return f; } static Query descendants(Node node) { return descendants(node.getId()); } private static Query descendants(long nodeId) { return new TermQuery(new Term(ANCESTORS_FLD,Long.toString(nodeId))); } static Query children(Node node) { return new TermQuery(new Term(PARENT_ID_FLD,Long.toString(node.getId()))); } static Query node(Node node) { return node(node.getId()); } static Query node(long nodeId) { return new TermQuery(new Term(NODE_ID_FLD,Long.toString(nodeId))); } static Query day(Date date) { return new TermQuery(new Term(DAY_FLD,formatDay(date))); } private static final Directory helpDir = new RAMDirectory(); private static IndexReader helpIndexReader; private static final String[] helpSearchFields = new String[] { "answer", "question" }; public static Help[] searchHelp(String line) throws ParseException { try { Query query = NodeSearcher.parse(line,helpSearchFields); Searcher searcher = new IndexSearcher(helpIndexReader); try { TopDocs hits = searcher.search(query,helpIndexReader.numDocs()); Help[] helps = new Help[hits.scoreDocs.length]; for( int i=0; i<helps.length; i++ ) { helps[i] = Help.getHelp(Integer.parseInt(searcher.doc(hits.scoreDocs[i].doc).get("id"))); } return helps; } catch (BooleanQuery.TooManyClauses e) { throw new RuntimeException("Your search will give too many matches."); } finally { searcher.close(); } } catch (IOException e) { throw new RuntimeException(e); } } public static void addHelp(final Collection<Help> helps) { try { IndexWriter writer = new IndexWriter(helpDir,analyzer,true,IndexWriter.MaxFieldLength.LIMITED); for( Help help : helps ) { writer.addDocument(document(help)); } writer.close(); helpIndexReader = IndexReader.open(helpDir,true); } catch (IOException e) { throw new RuntimeException(e); } } private static Document document(Help help) { Document doc = new Document(); String id = Integer.toString(help.id); doc.add( new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); Field answer = new Field("answer", help.answer(), Field.Store.NO, Field.Index.ANALYZED); doc.add(answer); Field question = new Field("question", help.question, Field.Store.NO, Field.Index.ANALYZED); doc.add(question); return doc; } }