diff src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1476:7d145095cc0b

lucene.logging check
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 19 Apr 2020 20:42:26 -0600
parents c7b86342857f
children 1fa6e8ec2d53
line wrap: on
line diff
--- a/src/goodjava/lucene/logging/LoggingIndexWriter.java	Sat Apr 18 11:02:18 2020 -0600
+++ b/src/goodjava/lucene/logging/LoggingIndexWriter.java	Sun Apr 19 20:42:26 2020 -0600
@@ -16,10 +16,14 @@
 import org.apache.lucene.document.Document;
 import org.apache.lucene.index.DirectoryReader;
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.Term;
 import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.PrefixQuery;
+import org.apache.lucene.search.SortField;
+import org.apache.lucene.search.Sort;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import goodjava.io.IoUtils;
@@ -63,31 +67,34 @@
 						logs.add( new LogFile(file,"rwd") );
 					}
 					deleteUnusedFiles();
-					log().gotoEnd();
 					return;
 				}
 			} finally {
 				dis.close();
 			}
 		}
+		newLogs();
+	}
+
+	public synchronized boolean isMerging() {
+		return isMerging;
+	}
+
+	private synchronized void isNotMerging() {
+		isMerging = false;
+	}
+
+	public synchronized void newLogs() throws IOException {
+		if( isMerging )
+			throw new RuntimeException("merging");
+		logger.info("building new logs");
+		logs.clear();
 		for( int i=0; i<2; i++ ) {
 			logs.add( newLogFile() );
 		}
-		isMerging = true;
-		new Thread(new Runnable(){public void run(){
-			try {
-				logLucene( System.currentTimeMillis(), logs.get(0), indexWriter );
-				synchronized(LoggingIndexWriter.this) {
-					writeIndex();
-				}
-			} catch(IOException e) {
-				throw new RuntimeException(e);
-			} finally {
-				synchronized(LoggingIndexWriter.this) {
-					isMerging = false;
-				}
-			}
-		}}).start();
+		logLucene( System.currentTimeMillis(), logs.get(0), indexWriter );
+		writeIndex();
+		logger.info("done building new logs");
 	}
 
 	private static void logLucene(long time,LogFile log,LuceneIndexWriter indexWriter) throws IOException {
@@ -149,8 +156,7 @@
 		logger.info("merge");
 		LogFile first = logs.get(0);
 		LogFile second = logs.get(1);
-		second.gotoEnd();
-		long lastTime = second.readLong();
+		long lastTime = second.file.lastModified();
 		File dirFile = new File(logDir,"merge");
 		if( dirFile.exists() )
 			throw new RuntimeException();
@@ -163,53 +169,144 @@
 		logLucene( lastTime, merge, mergeWriter );
 		mergeWriter.close();
 		synchronized(this) {
-			check();
+			//check();
 			logs.remove(0);
 			logs.set(0,merge);
 			writeIndex();
-			check();
+			//check(null);
 		}
 	}
 	private final Runnable mergeLogs = new Runnable() { public void run() {
 		try {
 			mergeLogs();
-/*
 		} catch(IOException e) {
 			throw new RuntimeException(e);
-*/
-		} catch(Exception e) {
-			e.printStackTrace();
-			System.exit(-1);
 		} finally {
-			synchronized(LoggingIndexWriter.this) {
-				isMerging = false;
-			}
+			isNotMerging();
 		}
 	} };
 
-	private void check() throws IOException {
-		File dirFile = new File(logDir,"check");
-		if( dirFile.exists() )
-			throw new RuntimeException();
-		Directory dir = FSDirectory.open(dirFile);
-		LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
-		playLog(checkWriter);
-		int nCheck = numDocs(checkWriter);
-		int nOrig = numDocs(indexWriter);
-		if( nCheck != nOrig ) {
-			logger.error("nCheck = "+nCheck);
-			logger.error("nOrig = "+nOrig);
-			//new Exception().printStackTrace();
-			Thread.dumpStack();
-			System.out.println();
-			System.out.println("indexWriter");
-			dump(indexWriter);
-			System.out.println("checkWriter");
-			dump(checkWriter);
-			System.exit(-1);
+	private static class DocIter {
+		final IndexReader reader;
+		final TopDocs td;
+		final int n;
+		int i = 0;
+
+		DocIter(IndexReader reader,Query query,Sort sort) throws IOException {
+			this.reader = reader;
+			IndexSearcher searcher = new IndexSearcher(reader);
+			this.td = searcher.search(query,10000000,sort);
+			this.n = td.scoreDocs.length;
+			if( td.totalHits != n )
+				throw new RuntimeException();
+		}
+
+		Document next() throws IOException {
+			return i < n ? reader.document(td.scoreDocs[i++].doc) : null;
+		}
+	}
+
+	public void check(SortField sortField) throws IOException {
+		IndexReader indexReader;
+		List<LogFile> logs;
+		synchronized(this) {
+			if( isMerging ) {
+				logger.warn("is merging, check aborted");
+				return;
+			}
+			isMerging = true;
+			indexReader = indexWriter.openReader();
+			logs = new ArrayList<LogFile>(this.logs);
+			int i = logs.size() - 1;
+			LogFile last = logs.get(i);
+			logs.set(i,last.snapshot());
 		}
-		checkWriter.close();
-		IoUtils.deleteRecursively(dirFile);
+		try {
+			logger.info("check start");
+			indexWriter.check();
+			File dirFile = new File(logDir,"check");
+			IoUtils.deleteRecursively(dirFile);
+			Directory dir = FSDirectory.open(dirFile);
+			LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
+			playLogs(logs,checkWriter);
+			logger.info("check lucene");
+			IndexReader checkReader = checkWriter.openReader();
+			if( sortField == null ) {
+				int nCheck = checkReader.numDocs();
+				int nOrig = indexReader.numDocs();
+				if( nCheck != nOrig ) {
+					logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck);
+				}
+				logger.info("numDocs="+nOrig);
+				if( hash(indexReader) != hash(checkReader) ) {
+					logger.error("hash mismatch");
+				}
+			} else {
+				Sort sort = new Sort(sortField);
+				String sortFieldName = sortField.getField();
+				Query query = new PrefixQuery(new Term(sortFieldName));
+				DocIter origIter = new DocIter(indexReader,query,sort);
+				DocIter checkIter = new DocIter(checkReader,query,sort);
+				Map<String,Object> origFields = LuceneUtils.toMap(origIter.next());
+				Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next());
+				while( origFields!=null && checkFields!=null ) {
+					Comparable origFld = (Comparable)origFields.get(sortFieldName);
+					Comparable checkFld = (Comparable)checkFields.get(sortFieldName);
+					int cmp = origFld.compareTo(checkFld);
+					if( cmp==0 ) {
+						if( !origFields.equals(checkFields) ) {
+							logger.error(sortFieldName+" "+origFld+" not equal");
+							logger.error("lucene = "+origFields);
+							logger.error("logs = "+checkFields);
+						}
+						origFields = LuceneUtils.toMap(origIter.next());
+						checkFields = LuceneUtils.toMap(checkIter.next());
+					} else if( cmp < 0 ) {
+						logger.error(sortFieldName+" "+origFld+" found in lucene but not logs");
+						origFields = LuceneUtils.toMap(origIter.next());
+					} else {  // >
+						logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene");
+						checkFields = LuceneUtils.toMap(checkIter.next());
+					}
+				}
+				while( origFields!=null ) {
+					Comparable origFld = (Comparable)origFields.get(sortFieldName);
+					logger.error(sortFieldName+" "+origFld+" found in lucene but not logs");
+					origFields = LuceneUtils.toMap(origIter.next());
+				}
+				while( checkFields!=null ) {
+					Comparable checkFld = (Comparable)checkFields.get(sortFieldName);
+					logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene");
+					checkFields = LuceneUtils.toMap(checkIter.next());
+				}
+				//logger.info("check done");
+			}
+			checkReader.close();
+			checkWriter.close();
+			IoUtils.deleteRecursively(dirFile);
+			logger.info("check done");
+		} finally {
+			indexReader.close();
+			isNotMerging();
+		}
+	}
+
+	private static abstract class HashCollector extends GoodCollector {
+		int total = 0;
+	}
+
+	private static int hash(IndexReader reader) throws IOException {
+		final IndexSearcher searcher = new IndexSearcher(reader);
+		Query query = new MatchAllDocsQuery();
+		HashCollector col = new HashCollector() {
+			public void collectDoc(int iDoc) throws IOException {
+				Document doc = searcher.doc(iDoc);
+				Map<String,Object> storedFields = LuceneUtils.toMap(doc);
+				total += storedFields.hashCode();
+			}
+		};
+		searcher.search(query,col);
+		return col.total;
 	}
 
 	private LogFile log() {
@@ -228,22 +325,21 @@
 		log.commit();
 		if( isMerging )
 			return;
-		if( log.length() > logs.get(0).length() ) {
-			log.writeLong( System.currentTimeMillis() );
+		if( log.end() > logs.get(0).end() ) {
 			logs.add( newLogFile() );
 			writeIndex();
 		}
 		if( logs.size() > 3 ) {
 			isMerging = true;
-//			new Thread(mergeLogs).start();
-			mergeLogs.run();
+			new Thread(mergeLogs).start();
+//			mergeLogs.run();
 		}
 	}
 
 	public synchronized void rollback() throws IOException {
 		indexWriter.rollback();
 		LogFile log = log();
-		log.gotoEnd();
+		log.rollback();
 	}
 
 	public synchronized void deleteAll() throws IOException {
@@ -283,11 +379,11 @@
 		log.writeByte(op);
 	}
 
-	public synchronized void playLog() throws IOException {
-		playLog(indexWriter);
+	public synchronized void playLogs() throws IOException {
+		playLogs(logs,indexWriter);
 	}
 
-	private void playLog(LuceneIndexWriter indexWriter) throws IOException {
+	private static void playLogs(List<LogFile> logs,LuceneIndexWriter indexWriter) throws IOException {
 		if( numDocs(indexWriter) != 0 )
 			throw new RuntimeException ("not empty");
 		for( LogFile log : logs ) {
@@ -304,32 +400,32 @@
 	}
 
 	private static void playLog(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
-		log.gotoStart();
-		while( log.hasMore() ) {
-			playOp(log,indexWriter);
+		LogInputStream in = log.input();
+		while( in.available() > 0 ) {
+			playOp(in,indexWriter);
 		}
 	}
 
-	private static void playOp(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
-		log.readLong();  // time
-		int op = log.readByte();
+	private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException {
+		in.readLong();  // time
+		int op = in.readByte();
 		switch(op) {
 		case OP_DELETE_ALL:
 			indexWriter.deleteAll();
 			return;
 		case OP_DELETE_DOCUMENTS:
-			indexWriter.deleteDocuments( log.readQuery() );
+			indexWriter.deleteDocuments( in.readQuery() );
 			return;
 		case OP_ADD_DOCUMENT:
 			{
-				Map storedFields = log.readMap();
+				Map storedFields = in.readMap();
 				indexWriter.addDocument(storedFields);
 				return;
 			}
 		case OP_UPDATE_DOCUMENT:
 			{
-				String keyFieldName = log.readUTF();
-				Map storedFields = log.readMap();
+				String keyFieldName = in.readUTF();
+				Map storedFields = in.readMap();
 				indexWriter.updateDocument(keyFieldName,storedFields);
 				return;
 			}