view src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1474:13cbce740e1e

LowercaseAnalyzer
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 17 Apr 2020 21:53:09 -0600
parents 6c6ce14db6a8
children c7b86342857f
line wrap: on
line source

package goodjava.lucene.logging;

import java.io.File;
import java.io.RandomAccessFile;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.DataInputStream;
import java.io.FileInputStream;
import java.io.IOException;
import java.util.Map;
import java.util.Set;
import java.util.HashSet;
import java.util.List;
import java.util.ArrayList;
import java.util.Random;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import goodjava.io.IoUtils;
import goodjava.lucene.api.GoodIndexWriter;
import goodjava.lucene.api.LuceneIndexWriter;
import goodjava.lucene.api.GoodCollector;
import goodjava.lucene.api.LuceneUtils;
import goodjava.logging.Logger;
import goodjava.logging.LoggerFactory;


public final class LoggingIndexWriter implements GoodIndexWriter {
	private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class);
	private static final int version = 1;
	private static final int OP_DELETE_ALL = 1;
	private static final int OP_DELETE_DOCUMENTS = 2;
	private static final int OP_ADD_DOCUMENT = 3;
	private static final int OP_UPDATE_DOCUMENT = 4;
	private static final Random rnd = new Random();

	public final LuceneIndexWriter indexWriter;
	private final File logDir;
	private final List<LogFile> logs = new ArrayList<LogFile>();
	private final File index;
	private boolean isMerging = false;

	public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException {
		this.indexWriter = indexWriter;
		this.logDir = logDir;
		logDir.mkdirs();
		if( !logDir.isDirectory() )
			throw new RuntimeException();
		index = new File(logDir,"index");
		if( index.exists() ) {
			DataInputStream dis = new DataInputStream(new FileInputStream(index));
			try {
				if( dis.readInt() == version ) {
					final int n = dis.readInt();
					for( int i=0; i<n; i++ ) {
						File file = new File( logDir, dis.readUTF() );
						logs.add( new LogFile(file,"rwd") );
					}
					deleteUnusedFiles();
					log().gotoEnd();
					return;
				}
			} finally {
				dis.close();
			}
		}
		for( int i=0; i<2; i++ ) {
			logs.add( newLogFile() );
		}
		isMerging = true;
		new Thread(new Runnable(){public void run(){
			try {
				logLucene( System.currentTimeMillis(), logs.get(0), indexWriter );
				synchronized(LoggingIndexWriter.this) {
					writeIndex();
				}
			} catch(IOException e) {
				throw new RuntimeException(e);
			} finally {
				synchronized(LoggingIndexWriter.this) {
					isMerging = false;
				}
			}
		}}).start();
	}

	private static void logLucene(long time,LogFile log,LuceneIndexWriter indexWriter) throws IOException {
		IndexReader reader = indexWriter.openReader();
		final IndexSearcher searcher = new IndexSearcher(reader);
		Query query = new MatchAllDocsQuery();
		searcher.search( query, new GoodCollector(){
			public void collectDoc(int iDoc) throws IOException {
				Document doc = searcher.doc(iDoc);
				Map<String,Object> storedFields = LuceneUtils.toMap(doc);
				log.writeLong(time);
				log.writeByte(OP_ADD_DOCUMENT);
				log.writeMap(storedFields);
			}
		});
		reader.close();
		log.commit();
	}

	private LogFile newLogFile() throws IOException {
		File file;
		do {
			file = new File(logDir,"_"+rnd.nextInt(100)+".log");
		} while( file.exists() );
		return new LogFile(file,"rwd");
	}

	private void deleteUnusedFiles() throws IOException {
		Set<String> used = new HashSet<String>();
		used.add( index.getName() );
		for( LogFile lf : logs ) {
			used.add( lf.file.getName() );
		}
		for( File f : logDir.listFiles() ) {
			if( !used.contains(f.getName()) ) {
				deleteFile(f);
			}
		}
	}

	private static void deleteFile(File file) throws IOException {
		if( file.isDirectory() ) {
			for( File f : file.listFiles() ) {
				deleteFile(f);
			}
		}
		IoUtils.delete(file);
	}

	private void writeIndex() throws IOException {
		ByteArrayOutputStream baos = new ByteArrayOutputStream();
		DataOutputStream dos = new DataOutputStream(baos);
		dos.writeInt(version);
		dos.writeInt(logs.size());
		for( LogFile lf : logs ) {
			String fileName = lf.file.getName();
			dos.writeUTF(fileName);
		}
		dos.close();
		RandomAccessFile raf = new RandomAccessFile( index, "rwd" );
		raf.write( baos.toByteArray() );
		raf.close();
		deleteUnusedFiles();
		logger.info("writeIndex "+logs.toString());
	}

	private void mergeLogs() throws IOException {
		logger.info("merge");
		LogFile first = logs.get(0);
		LogFile second = logs.get(1);
		second.gotoEnd();
		long lastTime = second.readLong();
		File dirFile = new File(logDir,"merge");
		if( dirFile.exists() )
			throw new RuntimeException();
		Directory dir = FSDirectory.open(dirFile);
		LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
		playLog(first,mergeWriter);
		playLog(second,mergeWriter);
		mergeWriter.commit();
		LogFile merge = newLogFile();
		logLucene( lastTime, merge, mergeWriter );
		mergeWriter.close();
		synchronized(this) {
			check();
			logs.remove(0);
			logs.set(0,merge);
			writeIndex();
			check();
		}
	}
	private final Runnable mergeLogs = new Runnable() { public void run() {
		try {
			mergeLogs();
/*
		} catch(IOException e) {
			throw new RuntimeException(e);
*/
		} catch(Exception e) {
			e.printStackTrace();
			System.exit(-1);
		} finally {
			synchronized(LoggingIndexWriter.this) {
				isMerging = false;
			}
		}
	} };

	private void check() throws IOException {
		File dirFile = new File(logDir,"check");
		if( dirFile.exists() )
			throw new RuntimeException();
		Directory dir = FSDirectory.open(dirFile);
		LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig );
		playLog(checkWriter);
		int nCheck = numDocs(checkWriter);
		int nOrig = numDocs(indexWriter);
		if( nCheck != nOrig ) {
			logger.error("nCheck = "+nCheck);
			logger.error("nOrig = "+nOrig);
			//new Exception().printStackTrace();
			Thread.dumpStack();
			System.out.println();
			System.out.println("indexWriter");
			dump(indexWriter);
			System.out.println("checkWriter");
			dump(checkWriter);
			System.exit(-1);
		}
		checkWriter.close();
		deleteFile(dirFile);
	}

	private LogFile log() {
		return logs.get(logs.size()-1);
	}

	public synchronized void close() throws IOException {
		indexWriter.close();
		LogFile log = log();
		log.commit();
	}

	public synchronized void commit() throws IOException {
		indexWriter.commit();
		LogFile log = log();
		log.commit();
		if( isMerging )
			return;
		if( log.length() > logs.get(0).length() ) {
			log.writeLong( System.currentTimeMillis() );
			logs.add( newLogFile() );
			writeIndex();
		}
		if( logs.size() > 3 ) {
			isMerging = true;
//			new Thread(mergeLogs).start();
			mergeLogs.run();
		}
	}

	public synchronized void rollback() throws IOException {
		indexWriter.rollback();
		LogFile log = log();
		log.gotoEnd();
	}

	public synchronized void deleteAll() throws IOException {
		indexWriter.deleteAll();
		LogFile log = log();
		writeOp(log,OP_DELETE_ALL);
	}

	public synchronized void deleteDocuments(Query query) throws IOException {
		indexWriter.deleteDocuments(query);
		LogFile log = log();
		writeOp(log,OP_DELETE_DOCUMENTS);
		log.writeQuery(query);
	}

	public synchronized void addDocument(Map<String,Object> storedFields) throws IOException {
		indexWriter.addDocument(storedFields);
		LogFile log = log();
		writeOp(log,OP_ADD_DOCUMENT);
		log.writeMap(storedFields);
	}

	public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException {
		indexWriter.updateDocument(keyFieldName,storedFields);
		LogFile log = log();
		writeOp(log,OP_UPDATE_DOCUMENT);
		log.writeUTF(keyFieldName);
		log.writeMap(storedFields);
	}

	public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException {
		indexWriter.reindexDocuments(keyFieldName,query);
	}

	private void writeOp(LogFile log,int op) throws IOException {
		log.writeLong(System.currentTimeMillis());
		log.writeByte(op);
	}

	public synchronized void playLog() throws IOException {
		playLog(indexWriter);
	}

	private void playLog(LuceneIndexWriter indexWriter) throws IOException {
		if( numDocs(indexWriter) != 0 )
			throw new RuntimeException ("not empty");
		for( LogFile log : logs ) {
			playLog(log,indexWriter);
		}
		indexWriter.commit();
	}

	private static int numDocs(LuceneIndexWriter indexWriter) throws IOException {
		IndexReader reader = indexWriter.openReader();
		int n = reader.numDocs();
		reader.close();
		return n;
	}

	private static void playLog(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
		log.gotoStart();
		while( log.hasMore() ) {
			playOp(log,indexWriter);
		}
	}

	private static void playOp(LogFile log,LuceneIndexWriter indexWriter) throws IOException {
		log.readLong();  // time
		int op = log.readByte();
		switch(op) {
		case OP_DELETE_ALL:
			indexWriter.deleteAll();
			return;
		case OP_DELETE_DOCUMENTS:
			indexWriter.deleteDocuments( log.readQuery() );
			return;
		case OP_ADD_DOCUMENT:
			{
				Map storedFields = log.readMap();
				indexWriter.addDocument(storedFields);
				return;
			}
		case OP_UPDATE_DOCUMENT:
			{
				String keyFieldName = log.readUTF();
				Map storedFields = log.readMap();
				indexWriter.updateDocument(keyFieldName,storedFields);
				return;
			}
		default:
			throw new RuntimeException("invalid op "+op);
		}
	}

	private static void dump(LuceneIndexWriter indexWriter) throws IOException {
		IndexReader reader = indexWriter.openReader();
		IndexSearcher searcher = new IndexSearcher(reader);
		Query query = new MatchAllDocsQuery();
		TopDocs td = searcher.search(query,100);
		System.out.println("totalHits = "+td.totalHits);
		for( int i=0; i<td.scoreDocs.length; i++ ) {
			Document doc = searcher.doc(td.scoreDocs[i].doc);
			System.out.println(LuceneUtils.toMap(doc));
		}
		System.out.println();
		reader.close();
	}
 
}