changeset 1391:94f48cc76de8

add lucene check
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 05 Sep 2019 01:29:57 -0600 (2019-09-05)
parents 179c4882c6b6
children 002152af497a
files examples/blog/src/lib/Db.luan src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/LuceneIndex.java src/luan/modules/lucene/PostgresBackup.java
diffstat 4 files changed, 188 insertions(+), 38 deletions(-) [+]
line wrap: on
line diff
--- a/examples/blog/src/lib/Db.luan	Wed Sep 04 00:06:42 2019 -0600
+++ b/examples/blog/src/lib/Db.luan	Thu Sep 05 01:29:57 2019 -0600
@@ -21,4 +21,5 @@
 Db.db = Db.new("site:/private/local/lucene")
 
 --Db.db.restore_from_postgres()
+Db.db.check()
 return Db
--- a/src/luan/modules/lucene/Lucene.luan	Wed Sep 04 00:06:42 2019 -0600
+++ b/src/luan/modules/lucene/Lucene.luan	Thu Sep 05 01:29:57 2019 -0600
@@ -35,14 +35,15 @@
 
 Lucene.literal = SaneQueryParser.literal
 
-function Lucene.index(index_dir,default_type,default_fields,postgres_backup)
+function Lucene.index(index_dir,default_type,default_fields,completer)
 	type(index_dir)=="table" or error "index_dir must be table"
 	index_dir.to_uri_string and matches(index_dir.to_uri_string(),"^file:") or error "must be file"
 	local index = {}
 	index.dir = index_dir
-	local java_index, closer = LuceneIndex.getLuceneIndex(index_dir.java.file,default_type,default_fields,postgres_backup)
+	local java_index, closer = LuceneIndex.getLuceneIndex(index_dir.java.file,default_type,default_fields,completer)
 	index.java = java_index
 	index.closer = closer or error()
+	index.completer = completer
 
 	index.indexed_fields = {}
 	local mt = {}
@@ -70,8 +71,13 @@
 	index.close = closer.close
 
 	index.has_postgres_backup = java_index.hasPostgresBackup()
-	index.rebuild_postgres_backup = java_index.rebuild_postgres_backup
+	function index.rebuild_postgres_backup()
+		java_index.rebuild_postgres_backup(index.completer)
+	end
 	index.restore_from_postgres = java_index.restore_from_postgres
+	function index.check()
+		java_index.check(index.completer)
+	end
 
 	function index.search( query, from, to, options )
 		from or error "missing 'from' parameter"
--- a/src/luan/modules/lucene/LuceneIndex.java	Wed Sep 04 00:06:42 2019 -0600
+++ b/src/luan/modules/lucene/LuceneIndex.java	Thu Sep 05 01:29:57 2019 -0600
@@ -5,6 +5,7 @@
 import java.io.FileOutputStream;
 import java.io.FileInputStream;
 import java.io.IOException;
+import java.sql.SQLException;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.Map;
@@ -39,6 +40,7 @@
 import org.apache.lucene.index.SnapshotDeletionPolicy;
 import org.apache.lucene.index.IndexCommit;
 import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.index.CheckIndex;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.util.Version;
@@ -76,6 +78,7 @@
 import luan.LuanFunction;
 import luan.LuanException;
 import luan.LuanRuntimeException;
+import luan.modules.parsers.LuanToString;
 import luan.lib.logging.Logger;
 import luan.lib.logging.LoggerFactory;
 
@@ -147,7 +150,7 @@
 	private final MultiFieldParser mfp;
 	private final Analyzer analyzer;
 
-	private File fileDir;
+	private FSDirectory fsDir;
 	private int writeCount;
 	private AtomicInteger writeCounter = new AtomicInteger();
 
@@ -193,12 +196,11 @@
 		IndexWriterConfig conf = new IndexWriterConfig(version,analyzer);
 		snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
 		conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
-		FSDirectory dir = FSDirectory.open(indexDir);
-		fileDir = dir.getDirectory();
-		boolean wasCreated = !fileDir.exists();
-		writer = new IndexWriter(dir,conf);
+		fsDir = FSDirectory.open(indexDir);
+		boolean wasCreated = !fsDir.getDirectory().exists();
+		writer = new IndexWriter(fsDir,conf);
 		writer.commit();  // commit index creation
-		reader = DirectoryReader.open(dir);
+		reader = DirectoryReader.open(fsDir);
 		searcher = new IndexSearcher(reader);
 		initId();
 		return wasCreated;
@@ -235,14 +237,18 @@
 		if( postgresBackup != null ) {
 			final List<Long> ids = new ArrayList<Long>();
 			IndexSearcher searcher = openSearcher();
-			MyCollector col = new MyCollector() {
-				@Override public void collect(int iDoc) throws IOException {
-					Document doc = searcher.doc( docBase + iDoc );
-					Long id = (Long)doc.getField("id").numericValue();
-					ids.add(id);
-				}
-			};
-			searcher.search(query,col);
+			try {
+				MyCollector col = new MyCollector() {
+					@Override public void collect(int iDoc) throws IOException {
+						Document doc = searcher.doc( docBase + iDoc );
+						Long id = (Long)doc.getField("id").numericValue();
+						ids.add(id);
+					}
+				};
+				searcher.search(query,col);
+			} finally {
+				close(searcher);
+			}
 			postgresBackup.begin();
 			for( Long id : ids ) {
 				postgresBackup.delete(id);
@@ -292,13 +298,13 @@
 			if( id == null ) {
 				id = nextId();
 				doc.put("id",id);
+				if( postgresBackup != null )
+					postgresBackup.add(doc);
 				writer.addDocument(toLucene(doc,boosts));
-				if( postgresBackup != null )
-					postgresBackup.add(id,doc);
 			} else {
+				if( postgresBackup != null )
+					postgresBackup.update(doc);
 				writer.updateDocument( term("id",id), toLucene(doc,boosts) );
-				if( postgresBackup != null )
-					postgresBackup.update(id,doc);
 			}
 			if(commit) writer.commit();
 		} finally {
@@ -413,7 +419,7 @@
 	public Object snapshot(LuanFunction fn) throws LuanException, IOException {
 		IndexCommit ic = snapshotDeletionPolicy.snapshot();
 		try {
-			String dir = fileDir.toString();
+			String dir = fsDir.getDirectory().toString();
 			LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames()));
 			return fn.call(dir,fileNames);
 		} finally {
@@ -803,21 +809,19 @@
 		throws IOException, LuanException
 	{
 		writeLock.lock();
+		IndexSearcher searcher = openSearcher();
 		boolean ok = false;
 		try {
 			postgresBackup.begin();
 			postgresBackup.deleteAll();
 			Query query = new PrefixQuery(new Term("id"));
-			IndexSearcher searcher = openSearcher();
 			MyCollector col = new MyCollector() {
 				@Override public void collect(int iDoc) throws IOException {
 					try {
 						Document doc = searcher.doc( docBase + iDoc );
 						LuanTable tbl = toTable(completer.luan(),doc);
 						tbl = (LuanTable)completer.call(tbl);
-						Long id = (Long)tbl.get("id");
-						//logger.info("id = "+id);
-						postgresBackup.add(id,tbl);
+						postgresBackup.add(tbl);
 					} catch(LuanException e) {
 						throw new LuanRuntimeException(e);
 					}
@@ -831,6 +835,7 @@
 			ok = true;
 			postgresBackup.commit();
 		} finally {
+			close(searcher);
 			if( !ok )
 				postgresBackup.rollback();
 			writeLock.unlock();
@@ -869,4 +874,87 @@
 		writer.addDocument(toLucene(doc,null));
 	}
 
+	public void check(LuanFunction completer) throws IOException, SQLException, LuanException {
+		logger.info("start check");
+		CheckIndex.Status status = new CheckIndex(fsDir).checkIndex();
+		if( !status.clean )
+			logger.error("index not clean");
+		if( postgresBackup != null )
+			checkPostgres(completer);
+		logger.info("end check");
+	}
+
+	private void checkPostgres(LuanFunction completer) throws IOException, SQLException, LuanException {
+		final PostgresBackup.Checker postgresChecker;
+		final IndexSearcher searcher;
+		writeLock.lock();
+		try {
+			postgresChecker = postgresBackup.newChecker();
+			searcher = openSearcher();
+		} finally {
+			writeLock.unlock();
+		}
+		try {
+			final List<Long> idsLucene = new ArrayList<Long>();
+			Query query = new PrefixQuery(new Term("id"));
+			MyCollector col = new MyCollector() {
+				@Override public void collect(int iDoc) throws IOException {
+					Document doc = searcher.doc( docBase + iDoc );
+					Long id = (Long)doc.getField("id").numericValue();
+					idsLucene.add(id);
+				}
+			};
+			searcher.search(query,col);
+			Collections.sort(idsLucene);
+			final List<Long> idsPostgres = postgresChecker.getIds();
+			final int nLucene = idsLucene.size();
+			final int nPostgres = idsPostgres.size();
+			int iLucene = 0;
+			int iPostgres = 0;
+			LuanToString lts = new LuanToString();
+			lts.strict = true;
+			lts.numberTypes = true;
+			while( iLucene < nLucene && iPostgres < nPostgres ) {
+				long idLucene = idsLucene.get(iLucene);
+				long idPostgres = idsPostgres.get(iPostgres);
+				if( idLucene < idPostgres ) {
+					iLucene++;
+					logger.error("id "+idLucene+" found in lucene but not postgres");
+				} else if( idLucene > idPostgres ) {
+					iPostgres++;
+					logger.error("id "+idPostgres+" found in postgres but not lucene");
+				} else {  // ==
+					LuanTable docPostgres = postgresChecker.getDoc(idPostgres);
+					TopDocs td = searcher.search(new TermQuery(term("id",idLucene)),1);
+					if( td.totalHits != 1 )  throw new RuntimeException();
+					Document doc = searcher.doc( td.scoreDocs[0].doc );
+					LuanTable docLucene = toTable(completer.luan(),doc);
+					docLucene = (LuanTable)completer.call(docLucene);
+					if( !equal(docPostgres,docLucene) ) {
+						logger.error("id "+idLucene+" not equal");
+						logger.error("lucene = "+lts.toString(docLucene));
+						logger.error("postgres = "+lts.toString(docPostgres));
+					}
+					iLucene++;
+					iPostgres++;
+				}
+			}
+			while( iLucene < nLucene ) {
+				long idLucene = idsLucene.get(iLucene++);
+				logger.error("id "+idLucene+" found in lucene but not postgres");
+			}
+			while( iPostgres < nPostgres ) {
+				long idPostgres = idsPostgres.get(iPostgres++);
+				logger.error("id "+idPostgres+" found in postgres but not lucene");
+			}
+		} finally {
+			close(searcher);
+			postgresChecker.close();
+		}
+	}
+
+	private boolean equal(LuanTable t1,LuanTable t2) throws LuanException {
+		return t1.asMap().equals(t2.asMap());
+	}
+
 }
--- a/src/luan/modules/lucene/PostgresBackup.java	Wed Sep 04 00:06:42 2019 -0600
+++ b/src/luan/modules/lucene/PostgresBackup.java	Thu Sep 05 01:29:57 2019 -0600
@@ -8,6 +8,8 @@
 import java.sql.SQLException;
 import java.sql.ResultSet;
 import java.util.Properties;
+import java.util.List;
+import java.util.ArrayList;
 import luan.Luan;
 import luan.LuanTable;
 import luan.LuanFunction;
@@ -33,25 +35,25 @@
 	}
 
 	final boolean wasCreated;
+	private final String url;
+	private final Properties props = new Properties();
 	private final Connection con;
 	private final PreparedStatement insertStmt;
 	private final PreparedStatement updateStmt;
 	private final PreparedStatement deleteStmt;
 	private int trans = 0;
 	private final LuanToString luanToString = new LuanToString();
-	private final LuanTable env = new LuanTable(new Luan());
 
 	private PostgresBackup()
 		throws ClassNotFoundException, SQLException
 	{
 		Class.forName("org.postgresql.Driver");
 
-		String url = "jdbc:postgresql://localhost:5432/luan";
-		Properties props = new Properties();
+		url = "jdbc:postgresql://localhost:5432/luan";
 		props.setProperty("user","postgres");
 		props.setProperty("password","");
 
-		con = DriverManager.getConnection(url,props);
+		con = newConnection();
 
 		Statement stmt = con.createStatement();
 		boolean hasTable = stmt.executeQuery(
@@ -80,7 +82,10 @@
 
 		luanToString.strict = true;
 		luanToString.numberTypes = true;
-		LuanToString.addNumberTypes(env);
+	}
+
+	Connection newConnection() throws SQLException {
+		return DriverManager.getConnection(url,props);
 	}
 
 	void close() {
@@ -102,9 +107,9 @@
 		}
 	}
 
-	void add(long id,LuanTable doc) throws LuanException {
+	void add(LuanTable doc) throws LuanException {
 		try {
-//logger.info("getAutoCommit="+con.getAutoCommit());
+			Long id = (Long)doc.get("id");
 			String data = luanToString.toString(doc);
 			insertStmt.setLong(1,id);
 			insertStmt.setString(2,data);
@@ -114,15 +119,16 @@
 		}
 	}
 
-	void update(long id,LuanTable doc) throws LuanException {
+	void update(LuanTable doc) throws LuanException {
 		try {
+			Long id = (Long)doc.get("id");
 			String data = luanToString.toString(doc);
 			updateStmt.setString(1,data);
 			updateStmt.setLong(2,id);
 			int n = updateStmt.executeUpdate();
 			if( n==0 ) {
 				logger.error("update not found for id="+id+", trying add");
-				add(id,doc);
+				add(doc);
 			} else if( n!=1 )
 				throw new RuntimeException();
 		} catch(SQLException e) {
@@ -187,9 +193,13 @@
 		}
 	}
 
-	private final Luan luanEval = new Luan();
+	private static LuanTable newEnv() {
+		LuanTable env = new LuanTable(new Luan());
+		LuanToString.addNumberTypes(env);
+		return env;
+	}
 
-	private Object eval(String s) throws LuanException {
+	private static Object eval(String s,LuanTable env) throws LuanException {
 		LuanFunction fn = env.luan().load( "return "+s, "PostgresBackup", env );
 		return fn.call();
 	}
@@ -198,11 +208,12 @@
 		throws LuanException, IOException
 	{
 		try {
+			LuanTable env = newEnv();
 			Statement stmt = con.createStatement();
 			ResultSet rs = stmt.executeQuery("select data from lucene");
 			while( rs.next() ) {
 				String data = rs.getString("data");
-				LuanTable doc = (LuanTable)eval(data);
+				LuanTable doc = (LuanTable)eval(data,env);
 				li.restore(doc);
 			}
 			stmt.close();
@@ -228,4 +239,48 @@
 		}
 	}
 
+	final class Checker {
+		private final Connection con;
+		private final PreparedStatement pstmt;
+		private final LuanTable env = newEnv();
+
+		Checker() throws SQLException {
+			con = newConnection();
+			con.setAutoCommit(false);
+			pstmt = con.prepareStatement(
+				"select data from lucene where id=?"
+			);
+		}
+
+		void close() throws SQLException {
+			pstmt.close();
+			con.close();
+		}
+
+		List<Long> getIds() throws SQLException {
+			List<Long> ids = new ArrayList<Long>();
+			Statement stmt = con.createStatement();
+			ResultSet rs = stmt.executeQuery("select id from lucene order by id");
+			while( rs.next() ) {
+				long id = rs.getLong("id");
+				ids.add(id);
+			}
+			stmt.close();
+			return ids;
+		}
+
+		LuanTable getDoc(long id) throws SQLException, LuanException {
+			pstmt.setLong(1,id);
+			ResultSet rs = pstmt.executeQuery();
+			rs.next();
+			String data = rs.getString("data");
+			LuanTable doc = (LuanTable)eval(data,env);
+			return doc;
+		}
+	}
+
+	Checker newChecker() throws SQLException {
+		return new Checker();
+	}
+
 }