changeset 1465:5e3870618377

lucene.logging dir
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 12 Apr 2020 15:59:57 -0600
parents 465b4a0dae4a
children 670b7d089699
files src/goodjava/lucene/api/LuceneIndexWriter.java src/goodjava/lucene/logging/LogFile.java src/goodjava/lucene/logging/LoggingIndexWriter.java
diffstat 3 files changed, 336 insertions(+), 55 deletions(-) [+]
line wrap: on
line diff
--- a/src/goodjava/lucene/api/LuceneIndexWriter.java	Fri Apr 03 10:04:52 2020 -0600
+++ b/src/goodjava/lucene/api/LuceneIndexWriter.java	Sun Apr 12 15:59:57 2020 -0600
@@ -26,16 +26,19 @@
 
 
 public final class LuceneIndexWriter implements GoodIndexWriter {
-	final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer();
+	private final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer();
+	public final Version luceneVersion;
 	public final IndexWriterConfig luceneConfig;
+	public final IndexWriter luceneWriter;
 	public final GoodIndexWriterConfig goodConfig;
-	public final IndexWriter luceneWriter;
 	private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>();
 
-	public LuceneIndexWriter(Version matchVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException {
-		luceneConfig = new IndexWriterConfig(matchVersion,fieldAnalyzer);
-		luceneWriter = new IndexWriter(dir,luceneConfig);
+	public LuceneIndexWriter(Version luceneVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException {
+		this.luceneVersion = luceneVersion;
+		this.luceneConfig = new IndexWriterConfig(luceneVersion,fieldAnalyzer);
+		this.luceneWriter = new IndexWriter(dir,luceneConfig);
 		this.goodConfig = goodConfig;
+		luceneWriter.commit();  // commit index creation
 	}
 
 	public void close() throws IOException {
@@ -162,7 +165,7 @@
 
 
 	public void reindexDocuments(final String keyFieldName,Query query) throws IOException {
-		IndexReader reader = DirectoryReader.open(luceneWriter.getDirectory());
+		IndexReader reader = openReader();
 		final IndexSearcher searcher = new IndexSearcher(reader);
 		searcher.search( query, new GoodCollector(){
 			public void collectDoc(int iDoc) throws IOException {
@@ -173,4 +176,8 @@
 		});
 		reader.close();
 	}
+
+	public IndexReader openReader() throws IOException {
+		return DirectoryReader.open(luceneWriter.getDirectory());
+	}
 }
--- a/src/goodjava/lucene/logging/LogFile.java	Fri Apr 03 10:04:52 2020 -0600
+++ b/src/goodjava/lucene/logging/LogFile.java	Sun Apr 12 15:59:57 2020 -0600
@@ -19,18 +19,18 @@
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.search.BooleanClause;
 import org.apache.lucene.util.BytesRef;
+import goodjava.logging.Logger;
+import goodjava.logging.LoggerFactory;
 
 
 public class LogFile extends RandomAccessFile {
+	private static final Logger logger = LoggerFactory.getLogger(LogFile.class);
+	public final File file;
 	private long end;
 
 	public LogFile(File file,String mode) throws IOException {
 		super(file,mode);
-		init();
-	}
-
-	public LogFile(String file,String mode) throws IOException {
-		super(file,mode);
+		this.file = file;
 		init();
 	}
 
@@ -45,6 +45,10 @@
 		}
 	}
 
+	public String toString() {
+		return "LogFile<" + file.getName() + ">";
+	}
+
 	public void gotoStart() throws IOException {
 		seek(8L);
 	}
--- a/src/goodjava/lucene/logging/LoggingIndexWriter.java	Fri Apr 03 10:04:52 2020 -0600
+++ b/src/goodjava/lucene/logging/LoggingIndexWriter.java	Sun Apr 12 15:59:57 2020 -0600
@@ -1,94 +1,364 @@
 package goodjava.lucene.logging;
 
+import java.io.File;
+import java.io.RandomAccessFile;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.DataInputStream;
+import java.io.FileInputStream;
 import java.io.IOException;
 import java.util.Map;
+import java.util.Set;
+import java.util.HashSet;
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Random;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.Query;
+import org.apache.lucene.search.MatchAllDocsQuery;
+import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
 import goodjava.lucene.api.GoodIndexWriter;
+import goodjava.lucene.api.LuceneIndexWriter;
+import goodjava.lucene.api.GoodCollector;
+import goodjava.lucene.api.LuceneUtils;
+import goodjava.logging.Logger;
+import goodjava.logging.LoggerFactory;
 
 
-public class LoggingIndexWriter implements GoodIndexWriter {
+public final class LoggingIndexWriter implements GoodIndexWriter {
+	private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class);
+	private static final int version = 1;
 	private static final int OP_DELETE_ALL = 1;
 	private static final int OP_DELETE_DOCUMENTS = 2;
 	private static final int OP_ADD_DOCUMENT = 3;
 	private static final int OP_UPDATE_DOCUMENT = 4;
+	private static final Random rnd = new Random();
 
-	public final GoodIndexWriter indexWriter;
-	private final LogFile logFile;
+	public final LuceneIndexWriter indexWriter;
+	private final File logDir;
+	private final List<LogFile> logs = new ArrayList<LogFile>();
+	private final File index;
+	private boolean isMerging = false;
 
-	public LoggingIndexWriter(GoodIndexWriter indexWriter) throws IOException {
+	public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException {
 		this.indexWriter = indexWriter;
-		logFile = new LogFile("lucene.log","rw");
-		logFile.gotoStart();  // for now
+		this.logDir = logDir;
+		logDir.mkdirs();
+		if( !logDir.isDirectory() )
+			throw new RuntimeException();
+		index = new File(logDir,"index");
+		if( index.exists() ) {
+			DataInputStream dis = new DataInputStream(new FileInputStream(index));
+			try {
+				if( dis.readInt() == version ) {
+					final int n = dis.readInt();
+					for( int i=0; i<n; i++ ) {
+						File file = new File( logDir, dis.readUTF() );
+						logs.add( new LogFile(file,"rwd") );
+					}
+					deleteUnusedFiles();
+					log().gotoEnd();
+					return;
+				}
+			} finally {
+				dis.close();
+			}
+		}
+		for( int i=0; i<2; i++ ) {
+			logs.add( newLogFile() );
+		}
+		isMerging = true;
+		new Thread(new Runnable(){public void run(){
+			try {
+				logLucene( System.currentTimeMillis(), logs.get(0), indexWriter );
+				synchronized(LoggingIndexWriter.this) {
+					writeIndex();
+				}
+			} catch(IOException e) {
+				throw new RuntimeException(e);
+			} finally {
+				synchronized(LoggingIndexWriter.this) {
+					isMerging = false;
+				}
+			}
+		}}).start();
 	}
 
-	public void close() throws IOException {
-		indexWriter.close();
-		logFile.commit();
+	private static void logLucene(long time,LogFile log,LuceneIndexWriter indexWriter) throws IOException {
+		IndexReader reader = indexWriter.openReader();
+		final IndexSearcher searcher = new IndexSearcher(reader);
+		Query query = new MatchAllDocsQuery();
+		searcher.search( query, new GoodCollector(){
+			public void collectDoc(int iDoc) throws IOException {
+				Document doc = searcher.doc(iDoc);
+				Map<String,Object> storedFields = LuceneUtils.toMap(doc);
+				log.writeLong(time);
+				log.writeByte(OP_ADD_DOCUMENT);
+				log.writeMap(storedFields);
+			}
+		});
+		reader.close();
+		log.commit();
+	}
+
+	private LogFile newLogFile() throws IOException {
+		File file;
+		do {
+			file = new File(logDir,"_"+rnd.nextInt(100)+".log");
+		} while( file.exists() );
+		return new LogFile(file,"rwd");
 	}
 
-	public void commit() throws IOException {
-		indexWriter.commit();
-		logFile.commit();
+	private void deleteUnusedFiles() {
+		Set<String> used = new HashSet<String>();
+		used.add( index.getName() );
+		for( LogFile lf : logs ) {
+			used.add( lf.file.getName() );
+		}
+		for( File f : logDir.listFiles() ) {
+			if( !used.contains(f.getName()) ) {
+				deleteFile(f);
+			}
+		}
 	}
 
-	public void rollback() throws IOException {
-		indexWriter.rollback();
-		logFile.gotoEnd();
+	private static void deleteFile(File file) {
+		if( file.isDirectory() ) {
+			for( File f : file.listFiles() ) {
+				deleteFile(f);
+			}
+		}
+		if( !file.delete() )
+			throw new RuntimeException(file.getName());
+	}
+
+	private void writeIndex() throws IOException {
+		ByteArrayOutputStream baos = new ByteArrayOutputStream();
+		DataOutputStream dos = new DataOutputStream(baos);
+		dos.writeInt(version);
+		dos.writeInt(logs.size());
+		for( LogFile lf : logs ) {
+			String fileName = lf.file.getName();
+			dos.writeUTF(fileName);
+		}
+		dos.close();
+		RandomAccessFile raf = new RandomAccessFile( index, "rwd" );
+		raf.write( baos.toByteArray() );
+		raf.close();
+		deleteUnusedFiles();
+		logger.info("writeIndex "+logs.toString());
 	}
 
-	public void deleteAll() throws IOException {
-		indexWriter.deleteAll();
-		logFile.writeByte(OP_DELETE_ALL);
+	private void mergeLogs() throws IOException {
+		logger.info("merge");
+		LogFile first = logs.get(0);
+		LogFile second = logs.get(1);
+		second.gotoEnd();
+		long lastTime = second.readLong();
+		File dirFile = new File(logDir,"merge");
+		if( dirFile.exists() )
+			throw new RuntimeException();
+		Directory dir = FSDirectory.open(dirFile);
+		LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
+		playLog(first,mergeWriter);
+		playLog(second,mergeWriter);
+		mergeWriter.commit();
+		LogFile merge = newLogFile();
+		logLucene( lastTime, merge, mergeWriter );
+		mergeWriter.close();
+		synchronized(this) {
+			check();
+			logs.remove(0);
+			logs.set(0,merge);
+			writeIndex();
+			check();
+		}
 	}
+	private final Runnable mergeLogs = new Runnable() { public void run() {
+		try {
+			mergeLogs();
+/*
+		} catch(IOException e) {
+			throw new RuntimeException(e);
+*/
+		} catch(Exception e) {
+			e.printStackTrace();
+			System.exit(-1);
+		} finally {
+			synchronized(LoggingIndexWriter.this) {
+				isMerging = false;
+			}
+		}
+	} };
 
-	public void deleteDocuments(Query query) throws IOException {
-		indexWriter.deleteDocuments(query);
-		logFile.writeByte(OP_DELETE_DOCUMENTS);
-		logFile.writeQuery(query);
+	private void check() throws IOException {
+		File dirFile = new File(logDir,"check");
+		if( dirFile.exists() )
+			throw new RuntimeException();
+		Directory dir = FSDirectory.open(dirFile);
+		LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
+		playLog(checkWriter);
+		int nCheck = numDocs(checkWriter);
+		int nOrig = numDocs(indexWriter);
+		if( nCheck != nOrig ) {
+			logger.error("nCheck = "+nCheck);
+			logger.error("nOrig = "+nOrig);
+			//new Exception().printStackTrace();
+			Thread.dumpStack();
+			System.out.println();
+			System.out.println("indexWriter");
+			dump(indexWriter);
+			System.out.println("checkWriter");
+			dump(checkWriter);
+			System.exit(-1);
+		}
+		checkWriter.close();
+		deleteFile(dirFile);
 	}
 
-	public void addDocument(Map<String,Object> storedFields) throws IOException {
-		indexWriter.addDocument(storedFields);
-		logFile.writeByte(OP_ADD_DOCUMENT);
-		logFile.writeMap(storedFields);
+	private LogFile log() {
+		return logs.get(logs.size()-1);
+	}
+
+	public synchronized void close() throws IOException {
+		indexWriter.close();
+		LogFile log = log();
+		log.commit();
+	}
+
+	public synchronized void commit() throws IOException {
+		indexWriter.commit();
+		LogFile log = log();
+		log.commit();
+		if( isMerging )
+			return;
+		if( log.length() > logs.get(0).length() ) {
+			log.writeLong( System.currentTimeMillis() );
+			logs.add( newLogFile() );
+			writeIndex();
+		}
+		if( logs.size() > 3 ) {
+			isMerging = true;
+//			new Thread(mergeLogs).start();
+			mergeLogs.run();
+		}
 	}
 
-	public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException {
-		indexWriter.updateDocument(keyFieldName,storedFields);
-		logFile.writeByte(OP_UPDATE_DOCUMENT);
-		logFile.writeUTF(keyFieldName);
-		logFile.writeMap(storedFields);
+	public synchronized void rollback() throws IOException {
+		indexWriter.rollback();
+		LogFile log = log();
+		log.gotoEnd();
+	}
+
+	public synchronized void deleteAll() throws IOException {
+		indexWriter.deleteAll();
+		LogFile log = log();
+		writeOp(log,OP_DELETE_ALL);
 	}
 
-	public void reindexDocuments(String keyFieldName,Query query) throws IOException {
+	public synchronized void deleteDocuments(Query query) throws IOException {
+		indexWriter.deleteDocuments(query);
+		LogFile log = log();
+		writeOp(log,OP_DELETE_DOCUMENTS);
+		log.writeQuery(query);
+	}
+
+	public synchronized void addDocument(Map<String,Object> storedFields) throws IOException {
+		indexWriter.addDocument(storedFields);
+		LogFile log = log();
+		writeOp(log,OP_ADD_DOCUMENT);
+		log.writeMap(storedFields);
+	}
+
+	public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException {
+		indexWriter.updateDocument(keyFieldName,storedFields);
+		LogFile log = log();
+		writeOp(log,OP_UPDATE_DOCUMENT);
+		log.writeUTF(keyFieldName);
+		log.writeMap(storedFields);
+	}
+
+	public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException {
 		indexWriter.reindexDocuments(keyFieldName,query);
 	}
 
-	private void playOp() throws IOException {
-		int op = logFile.readByte();
+	private void writeOp(LogFile log,int op) throws IOException {
+		log.writeLong(System.currentTimeMillis());
+		log.writeByte(op);
+	}
+
+	public synchronized void playLog() throws IOException {
+		playLog(indexWriter);
+	}
+
+	private void playLog(LuceneIndexWriter indexWriter) throws IOException {
+		if( numDocs(indexWriter) != 0 )
+			throw new RuntimeException ("not empty");
+		for( LogFile log : logs ) {
+			playLog(log,indexWriter);
+		}
+		indexWriter.commit();
+	}
+
+	private static int numDocs(LuceneIndexWriter indexWriter) throws IOException {
+		IndexReader reader = indexWriter.openReader();
+		int n = reader.numDocs();
+		reader.close();
+		return n;
+	}
+
+	private static void playLog(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
+		log.gotoStart();
+		while( log.hasMore() ) {
+			playOp(log,indexWriter);
+		}
+	}
+
+	private static void playOp(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
+		log.readLong();  // time
+		int op = log.readByte();
 		switch(op) {
 		case OP_DELETE_ALL:
 			indexWriter.deleteAll();
 			return;
 		case OP_DELETE_DOCUMENTS:
-			indexWriter.deleteDocuments( logFile.readQuery() );
+			indexWriter.deleteDocuments( log.readQuery() );
 			return;
 		case OP_ADD_DOCUMENT:
-			indexWriter.addDocument( logFile.readMap() );
-			return;
+			{
+				Map storedFields = log.readMap();
+				indexWriter.addDocument(storedFields);
+				return;
+			}
 		case OP_UPDATE_DOCUMENT:
-			indexWriter.updateDocument( logFile.readUTF(), logFile.readMap() );
-			return;
+			{
+				String keyFieldName = log.readUTF();
+				Map storedFields = log.readMap();
+				indexWriter.updateDocument(keyFieldName,storedFields);
+				return;
+			}
 		default:
 			throw new RuntimeException("invalid op "+op);
 		}
 	}
 
-	public void playLog() throws IOException {
-		logFile.gotoStart();
-		while( logFile.hasMore() ) {
-			playOp();
+	private static void dump(LuceneIndexWriter indexWriter) throws IOException {
+		IndexReader reader = indexWriter.openReader();
+		IndexSearcher searcher = new IndexSearcher(reader);
+		Query query = new MatchAllDocsQuery();
+		TopDocs td = searcher.search(query,100);
+		System.out.println("totalHits = "+td.totalHits);
+		for( int i=0; i<td.scoreDocs.length; i++ ) {
+			Document doc = searcher.doc(td.scoreDocs[i].doc);
+			System.out.println(LuceneUtils.toMap(doc));
 		}
-		indexWriter.commit();
+		System.out.println();
+		reader.close();
 	}
+ 
 }