Mercurial Hosting > luan
diff src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1476:7d145095cc0b
lucene.logging check
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 19 Apr 2020 20:42:26 -0600 |
parents | c7b86342857f |
children | 1fa6e8ec2d53 |
line wrap: on
line diff
--- a/src/goodjava/lucene/logging/LoggingIndexWriter.java Sat Apr 18 11:02:18 2020 -0600 +++ b/src/goodjava/lucene/logging/LoggingIndexWriter.java Sun Apr 19 20:42:26 2020 -0600 @@ -16,10 +16,14 @@ import org.apache.lucene.document.Document; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.TopDocs; +import org.apache.lucene.search.PrefixQuery; +import org.apache.lucene.search.SortField; +import org.apache.lucene.search.Sort; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import goodjava.io.IoUtils; @@ -63,31 +67,34 @@ logs.add( new LogFile(file,"rwd") ); } deleteUnusedFiles(); - log().gotoEnd(); return; } } finally { dis.close(); } } + newLogs(); + } + + public synchronized boolean isMerging() { + return isMerging; + } + + private synchronized void isNotMerging() { + isMerging = false; + } + + public synchronized void newLogs() throws IOException { + if( isMerging ) + throw new RuntimeException("merging"); + logger.info("building new logs"); + logs.clear(); for( int i=0; i<2; i++ ) { logs.add( newLogFile() ); } - isMerging = true; - new Thread(new Runnable(){public void run(){ - try { - logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); - synchronized(LoggingIndexWriter.this) { - writeIndex(); - } - } catch(IOException e) { - throw new RuntimeException(e); - } finally { - synchronized(LoggingIndexWriter.this) { - isMerging = false; - } - } - }}).start(); + logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); + writeIndex(); + logger.info("done building new logs"); } private static void logLucene(long time,LogFile log,LuceneIndexWriter indexWriter) throws IOException { @@ -149,8 +156,7 @@ logger.info("merge"); LogFile first = logs.get(0); LogFile second = logs.get(1); - second.gotoEnd(); - long lastTime = second.readLong(); + long lastTime = second.file.lastModified(); File dirFile = new File(logDir,"merge"); if( dirFile.exists() ) throw new RuntimeException(); @@ -163,53 +169,144 @@ logLucene( lastTime, merge, mergeWriter ); mergeWriter.close(); synchronized(this) { - check(); + //check(); logs.remove(0); logs.set(0,merge); writeIndex(); - check(); + //check(null); } } private final Runnable mergeLogs = new Runnable() { public void run() { try { mergeLogs(); -/* } catch(IOException e) { throw new RuntimeException(e); -*/ - } catch(Exception e) { - e.printStackTrace(); - System.exit(-1); } finally { - synchronized(LoggingIndexWriter.this) { - isMerging = false; - } + isNotMerging(); } } }; - private void check() throws IOException { - File dirFile = new File(logDir,"check"); - if( dirFile.exists() ) - throw new RuntimeException(); - Directory dir = FSDirectory.open(dirFile); - LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); - playLog(checkWriter); - int nCheck = numDocs(checkWriter); - int nOrig = numDocs(indexWriter); - if( nCheck != nOrig ) { - logger.error("nCheck = "+nCheck); - logger.error("nOrig = "+nOrig); - //new Exception().printStackTrace(); - Thread.dumpStack(); - System.out.println(); - System.out.println("indexWriter"); - dump(indexWriter); - System.out.println("checkWriter"); - dump(checkWriter); - System.exit(-1); + private static class DocIter { + final IndexReader reader; + final TopDocs td; + final int n; + int i = 0; + + DocIter(IndexReader reader,Query query,Sort sort) throws IOException { + this.reader = reader; + IndexSearcher searcher = new IndexSearcher(reader); + this.td = searcher.search(query,10000000,sort); + this.n = td.scoreDocs.length; + if( td.totalHits != n ) + throw new RuntimeException(); + } + + Document next() throws IOException { + return i < n ? reader.document(td.scoreDocs[i++].doc) : null; + } + } + + public void check(SortField sortField) throws IOException { + IndexReader indexReader; + List<LogFile> logs; + synchronized(this) { + if( isMerging ) { + logger.warn("is merging, check aborted"); + return; + } + isMerging = true; + indexReader = indexWriter.openReader(); + logs = new ArrayList<LogFile>(this.logs); + int i = logs.size() - 1; + LogFile last = logs.get(i); + logs.set(i,last.snapshot()); } - checkWriter.close(); - IoUtils.deleteRecursively(dirFile); + try { + logger.info("check start"); + indexWriter.check(); + File dirFile = new File(logDir,"check"); + IoUtils.deleteRecursively(dirFile); + Directory dir = FSDirectory.open(dirFile); + LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); + playLogs(logs,checkWriter); + logger.info("check lucene"); + IndexReader checkReader = checkWriter.openReader(); + if( sortField == null ) { + int nCheck = checkReader.numDocs(); + int nOrig = indexReader.numDocs(); + if( nCheck != nOrig ) { + logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck); + } + logger.info("numDocs="+nOrig); + if( hash(indexReader) != hash(checkReader) ) { + logger.error("hash mismatch"); + } + } else { + Sort sort = new Sort(sortField); + String sortFieldName = sortField.getField(); + Query query = new PrefixQuery(new Term(sortFieldName)); + DocIter origIter = new DocIter(indexReader,query,sort); + DocIter checkIter = new DocIter(checkReader,query,sort); + Map<String,Object> origFields = LuceneUtils.toMap(origIter.next()); + Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next()); + while( origFields!=null && checkFields!=null ) { + Comparable origFld = (Comparable)origFields.get(sortFieldName); + Comparable checkFld = (Comparable)checkFields.get(sortFieldName); + int cmp = origFld.compareTo(checkFld); + if( cmp==0 ) { + if( !origFields.equals(checkFields) ) { + logger.error(sortFieldName+" "+origFld+" not equal"); + logger.error("lucene = "+origFields); + logger.error("logs = "+checkFields); + } + origFields = LuceneUtils.toMap(origIter.next()); + checkFields = LuceneUtils.toMap(checkIter.next()); + } else if( cmp < 0 ) { + logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); + origFields = LuceneUtils.toMap(origIter.next()); + } else { // > + logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); + checkFields = LuceneUtils.toMap(checkIter.next()); + } + } + while( origFields!=null ) { + Comparable origFld = (Comparable)origFields.get(sortFieldName); + logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); + origFields = LuceneUtils.toMap(origIter.next()); + } + while( checkFields!=null ) { + Comparable checkFld = (Comparable)checkFields.get(sortFieldName); + logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); + checkFields = LuceneUtils.toMap(checkIter.next()); + } + //logger.info("check done"); + } + checkReader.close(); + checkWriter.close(); + IoUtils.deleteRecursively(dirFile); + logger.info("check done"); + } finally { + indexReader.close(); + isNotMerging(); + } + } + + private static abstract class HashCollector extends GoodCollector { + int total = 0; + } + + private static int hash(IndexReader reader) throws IOException { + final IndexSearcher searcher = new IndexSearcher(reader); + Query query = new MatchAllDocsQuery(); + HashCollector col = new HashCollector() { + public void collectDoc(int iDoc) throws IOException { + Document doc = searcher.doc(iDoc); + Map<String,Object> storedFields = LuceneUtils.toMap(doc); + total += storedFields.hashCode(); + } + }; + searcher.search(query,col); + return col.total; } private LogFile log() { @@ -228,22 +325,21 @@ log.commit(); if( isMerging ) return; - if( log.length() > logs.get(0).length() ) { - log.writeLong( System.currentTimeMillis() ); + if( log.end() > logs.get(0).end() ) { logs.add( newLogFile() ); writeIndex(); } if( logs.size() > 3 ) { isMerging = true; -// new Thread(mergeLogs).start(); - mergeLogs.run(); + new Thread(mergeLogs).start(); +// mergeLogs.run(); } } public synchronized void rollback() throws IOException { indexWriter.rollback(); LogFile log = log(); - log.gotoEnd(); + log.rollback(); } public synchronized void deleteAll() throws IOException { @@ -283,11 +379,11 @@ log.writeByte(op); } - public synchronized void playLog() throws IOException { - playLog(indexWriter); + public synchronized void playLogs() throws IOException { + playLogs(logs,indexWriter); } - private void playLog(LuceneIndexWriter indexWriter) throws IOException { + private static void playLogs(List<LogFile> logs,LuceneIndexWriter indexWriter) throws IOException { if( numDocs(indexWriter) != 0 ) throw new RuntimeException ("not empty"); for( LogFile log : logs ) { @@ -304,32 +400,32 @@ } private static void playLog(LogFile log,LuceneIndexWriter indexWriter) throws IOException { - log.gotoStart(); - while( log.hasMore() ) { - playOp(log,indexWriter); + LogInputStream in = log.input(); + while( in.available() > 0 ) { + playOp(in,indexWriter); } } - private static void playOp(LogFile log,LuceneIndexWriter indexWriter) throws IOException { - log.readLong(); // time - int op = log.readByte(); + private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { + in.readLong(); // time + int op = in.readByte(); switch(op) { case OP_DELETE_ALL: indexWriter.deleteAll(); return; case OP_DELETE_DOCUMENTS: - indexWriter.deleteDocuments( log.readQuery() ); + indexWriter.deleteDocuments( in.readQuery() ); return; case OP_ADD_DOCUMENT: { - Map storedFields = log.readMap(); + Map storedFields = in.readMap(); indexWriter.addDocument(storedFields); return; } case OP_UPDATE_DOCUMENT: { - String keyFieldName = log.readUTF(); - Map storedFields = log.readMap(); + String keyFieldName = in.readUTF(); + Map storedFields = in.readMap(); indexWriter.updateDocument(keyFieldName,storedFields); return; }