Mercurial Hosting > luan
changeset 1528:3bd4d7963456
use goodjava/lucene/api
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 26 Jul 2020 23:11:53 -0600 (2020-07-27) |
parents | fa1e3adbebfb |
children | e6d808f40bbc |
files | src/goodjava/lucene/api/GoodIndexWriter.java src/goodjava/lucene/api/GoodIndexWriterConfig.java src/goodjava/lucene/api/LuceneIndexWriter.java src/goodjava/lucene/api/MultiFieldParserConfig.java src/goodjava/lucene/logging/LoggingIndexWriter.java src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/LuceneIndex.java src/luan/modules/lucene/PostgresBackup.java src/luan/modules/lucene/SupplementingConfig.java |
diffstat | 9 files changed, 142 insertions(+), 183 deletions(-) [+] |
line wrap: on
line diff
--- a/src/goodjava/lucene/api/GoodIndexWriter.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/goodjava/lucene/api/GoodIndexWriter.java Sun Jul 26 23:11:53 2020 -0600 @@ -2,7 +2,9 @@ import java.io.IOException; import java.util.Map; +import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.search.Query; +import org.apache.lucene.store.Directory; public interface GoodIndexWriter { @@ -14,4 +16,6 @@ public void addDocument(Map<String,Object> storedFields) throws IOException; public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException; public void reindexDocuments(String keyFieldName,Query query) throws IOException; + public Directory getDirectory(); + public LiveIndexWriterConfig getLuceneConfig(); }
--- a/src/goodjava/lucene/api/GoodIndexWriterConfig.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/goodjava/lucene/api/GoodIndexWriterConfig.java Sun Jul 26 23:11:53 2020 -0600 @@ -2,9 +2,11 @@ import java.util.Map; import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.index.IndexWriterConfig; public interface GoodIndexWriterConfig { + public IndexWriterConfig newLuceneConfig(); public boolean isIndexed(String fieldName); public Analyzer getAnalyzer(String fieldName); public MoreFieldInfo getMoreFieldInfo(Map<String,Object> storedFields);
--- a/src/goodjava/lucene/api/LuceneIndexWriter.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/goodjava/lucene/api/LuceneIndexWriter.java Sun Jul 26 23:11:53 2020 -0600 @@ -16,6 +16,7 @@ import org.apache.lucene.document.FloatField; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.index.Term; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; @@ -23,28 +24,36 @@ import org.apache.lucene.search.Query; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; -import org.apache.lucene.util.Version; import goodjava.logging.Logger; import goodjava.logging.LoggerFactory; public final class LuceneIndexWriter implements GoodIndexWriter { private static final Logger logger = LoggerFactory.getLogger(LuceneIndexWriter.class); - private final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer(); - public final Version luceneVersion; - public final IndexWriterConfig luceneConfig; + private final FieldAnalyzer fieldAnalyzer; public final IndexWriter luceneWriter; public final GoodIndexWriterConfig goodConfig; private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>(); - public LuceneIndexWriter(Version luceneVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { - this.luceneVersion = luceneVersion; - this.luceneConfig = new IndexWriterConfig(luceneVersion,fieldAnalyzer); + public LuceneIndexWriter(Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { + IndexWriterConfig luceneConfig = goodConfig.newLuceneConfig(); + Analyzer analyzer = luceneConfig.getAnalyzer(); + if( !(analyzer instanceof FieldAnalyzer) ) + throw new RuntimeException("analyzer must be FieldAnalyzer"); + this.fieldAnalyzer = (FieldAnalyzer)analyzer; this.luceneWriter = new IndexWriter(dir,luceneConfig); this.goodConfig = goodConfig; luceneWriter.commit(); // commit index creation } + public Directory getDirectory() { + return luceneWriter.getDirectory(); + } + + public LiveIndexWriterConfig getLuceneConfig() { + return luceneWriter.getConfig(); + } + public void close() throws IOException { luceneWriter.close(); } @@ -74,7 +83,7 @@ if( !isIndexed(keyFieldName) ) throw new RuntimeException("can't update using unindexed field "+keyFieldName); if( fieldAnalyzer.isAdded(keyFieldName) ) - throw new RuntimeException("can't update using analyzeed field "+keyFieldName); + throw new RuntimeException("can't update using analyzed field "+keyFieldName); Document doc = newDocument(storedFields); Object keyValue = storedFields.get(keyFieldName); if( keyValue==null )
--- a/src/goodjava/lucene/api/MultiFieldParserConfig.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/goodjava/lucene/api/MultiFieldParserConfig.java Sun Jul 26 23:11:53 2020 -0600 @@ -4,18 +4,26 @@ import java.util.Collections; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.KeywordAnalyzer; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.util.Version; import goodjava.lucene.queryparser.MultiFieldParser; import goodjava.lucene.queryparser.FieldParser; import goodjava.lucene.queryparser.StringFieldParser; public class MultiFieldParserConfig implements GoodIndexWriterConfig { + private final Version luceneVersion; private final MultiFieldParser mfp; - public MultiFieldParserConfig(MultiFieldParser mfp) { + public MultiFieldParserConfig(Version luceneVersion,MultiFieldParser mfp) { + this.luceneVersion = luceneVersion; this.mfp = mfp; } + public IndexWriterConfig newLuceneConfig() { + return new IndexWriterConfig(luceneVersion,new FieldAnalyzer()); + } + public final boolean isIndexed(String fieldName) { return mfp.fields.containsKey(fieldName); }
--- a/src/goodjava/lucene/logging/LoggingIndexWriter.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/goodjava/lucene/logging/LoggingIndexWriter.java Sun Jul 26 23:11:53 2020 -0600 @@ -17,6 +17,7 @@ import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.Term; +import org.apache.lucene.index.LiveIndexWriterConfig; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.MatchAllDocsQuery; @@ -78,6 +79,14 @@ newLogs(); } + public Directory getDirectory() { + return indexWriter.getDirectory(); + } + + public LiveIndexWriterConfig getLuceneConfig() { + return indexWriter.getLuceneConfig(); + } + private void setLog() throws IOException { if( log != null ) log.close(); @@ -180,7 +189,7 @@ if( dirFile.exists() ) throw new RuntimeException(); Directory dir = FSDirectory.open(dirFile); - LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); + LuceneIndexWriter mergeWriter = new LuceneIndexWriter( dir, indexWriter.goodConfig ); playLog( first.input(), mergeWriter ); playLog( second.input(), mergeWriter ); mergeWriter.commit(); @@ -251,7 +260,7 @@ File dirFile = new File(logDir,"check"); IoUtils.deleteRecursively(dirFile); Directory dir = FSDirectory.open(dirFile); - LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); + LuceneIndexWriter checkWriter = new LuceneIndexWriter( dir, indexWriter.goodConfig ); playLogs(logReaders,checkWriter); logger.info("check lucene"); IndexReader checkReader = checkWriter.openReader();
--- a/src/luan/modules/lucene/Lucene.luan Sun Jul 26 15:06:15 2020 -0600 +++ b/src/luan/modules/lucene/Lucene.luan Sun Jul 26 23:11:53 2020 -0600 @@ -40,9 +40,7 @@ options = options or {} local index = {} index.dir = index_dir - index.completer = options.completer - options.completer = nil - options.supplementer = nil + options.completer = nil -- remove local java_index = LuceneIndex.getLuceneIndex(index_dir.java.file,options) index.java = java_index @@ -62,28 +60,19 @@ index.search_in_transaction = java_index.search_in_transaction index.delete_all = java_index.delete_all index.delete = java_index.delete - --index.save = java_index.save + index.save = java_index.save index.run_in_transaction = java_index.run_in_transaction index.ensure_open = java_index.ensure_open index.next_id = java_index.nextId index.highlighter = java_index.highlighter - index.indexed_only_fields = java_index.indexed_only_fields + function index.indexed_only_fields(fields) end -- remove index.count_tokens = java_index.count_tokens --index.close = java_index.close - local java_save = java_index.save - function index.save(doc,boosts) - java_save(index.completer,doc,boosts) - end - index.has_postgres_backup = java_index.hasPostgresBackup() index.rebuild_postgres_backup = java_index.rebuild_postgres_backup - function index.restore_from_postgres() - java_index.restore_from_postgres(index.completer) - end - function index.force_restore_from_postgres() - java_index.force_restore_from_postgres(index.completer) - end + index.restore_from_postgres = java_index.restore_from_postgres + index.force_restore_from_postgres = java_index.force_restore_from_postgres index.check = java_index.check function index.search( query, from, to, options )
--- a/src/luan/modules/lucene/LuceneIndex.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/luan/modules/lucene/LuceneIndex.java Sun Jul 26 23:11:53 2020 -0600 @@ -12,6 +12,7 @@ import java.util.Iterator; import java.util.Map; import java.util.HashMap; +import java.util.LinkedHashMap; import java.util.List; import java.util.ArrayList; import java.util.Set; @@ -74,6 +75,10 @@ import goodjava.lucene.queryparser.MultiFieldParser; import goodjava.lucene.queryparser.StringFieldParser; import goodjava.lucene.queryparser.NumberFieldParser; +import goodjava.lucene.api.GoodIndexWriter; +import goodjava.lucene.api.LuceneIndexWriter; +import goodjava.lucene.api.GoodIndexWriterConfig; +import goodjava.lucene.api.LuceneUtils; import goodjava.parser.ParseException; import luan.modules.Utils; import luan.Luan; @@ -122,21 +127,17 @@ private final ReentrantLock writeLock = new ReentrantLock(); private final File indexDir; - private SnapshotDeletionPolicy snapshotDeletionPolicy; - private IndexWriter writer; + private GoodIndexWriter writer; private DirectoryReader reader; private IndexSearcher searcher; private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); private final MultiFieldParser mfp; - private final Analyzer analyzer; + private final Analyzer analyzer; // ??? private FSDirectory fsDir; private int writeCount; private AtomicInteger writeCounter = new AtomicInteger(); - - private Set<String> indexOnly = new HashSet<String>(); -// private final FieldParser defaultFieldParser; -// private final String[] defaultFields; + private final GoodIndexWriterConfig config; private final PostgresBackup postgresBackup; private boolean wasCreated; @@ -150,10 +151,9 @@ LuanTable defaultFieldsTbl = Utils.removeTable(options,"default_fields"); String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]); LuanTable postgresSpec = Utils.removeTable(options,"postgres_spec"); + LuanFunction supplementer = Utils.removeFunction(options,"supplementer"); Utils.checkEmpty(options); -// this.defaultFieldParser = defaultFieldParser; -// this.defaultFields = defaultFields; mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); mfp.fields.put( "type", STRING_FIELD_PARSER ); mfp.fields.put( "id", NumberFieldParser.LONG ); @@ -164,6 +164,7 @@ analyzer = sfp.analyzer; } this.analyzer = analyzer; + this.config = new SupplementingConfig(luceneVersion,mfp,supplementer); wasCreated = reopen(); if( postgresSpec == null ) { postgresBackup = null; @@ -182,13 +183,9 @@ } public boolean reopen() throws IOException { - IndexWriterConfig conf = new IndexWriterConfig(luceneVersion,analyzer); - snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy()); - conf.setIndexDeletionPolicy(snapshotDeletionPolicy); fsDir = FSDirectory.open(indexDir); boolean wasCreated = !fsDir.getDirectory().exists(); - writer = new IndexWriter(fsDir,conf); - writer.commit(); // commit index creation + writer = new LuceneIndexWriter(fsDir,config); reader = DirectoryReader.open(fsDir); searcher = new IndexSearcher(reader); initId(); @@ -263,16 +260,9 @@ } } - public void indexed_only_fields(List<String> fields) { - indexOnly.addAll(fields); - } - - public void save(LuanFunction completer,LuanTable doc,LuanTable boosts) + public void save(LuanTable doc) throws LuanException, IOException, SQLException { - if( boosts!=null && postgresBackup!=null ) - throw new LuanException("boosts are not saved to postgres backup"); - Object obj = doc.get("id"); Long id; try { @@ -289,11 +279,11 @@ doc.put("id",id); if( postgresBackup != null ) postgresBackup.add(doc); - writer.addDocument(toLucene(completer,doc,boosts)); + writer.addDocument(toLucene(doc)); } else { if( postgresBackup != null ) postgresBackup.update(doc); - writer.updateDocument( term("id",id), toLucene(completer,doc,boosts) ); + writer.updateDocument( "id", toLucene(doc) ); } if(commit) writer.commit(); } finally { @@ -368,10 +358,10 @@ } private void saveNextId(long nextId) throws LuanException, IOException { - Map doc = new HashMap(); + Map<String,Object> doc = new HashMap(); doc.put( "type", "next_id" ); doc.put( FLD_NEXT_ID, idLim ); - writer.updateDocument(new Term("type","next_id"),toLucene(doc.entrySet(),null)); + writer.updateDocument("type",doc); } public synchronized long nextId() throws LuanException, IOException { @@ -404,10 +394,11 @@ } */ public SnapshotDeletionPolicy snapshotDeletionPolicy() { - return snapshotDeletionPolicy; + return (SnapshotDeletionPolicy)writer.getLuceneConfig().getIndexDeletionPolicy(); } public Object snapshot(LuanFunction fn) throws LuanException, IOException { + SnapshotDeletionPolicy snapshotDeletionPolicy = snapshotDeletionPolicy(); IndexCommit ic = snapshotDeletionPolicy.snapshot(); try { String dir = fsDir.getDirectory().toString(); @@ -587,136 +578,34 @@ mfp.fields.put( field, fp ); } - - private IndexableField newField(String name,Object value,Set<String> indexed,Float boost) - throws LuanException - { - boolean hasBoost = boost!=null; - IndexableField fld = newField2(name,value,indexed,hasBoost); - if( hasBoost ) - ((Field)fld).setBoost(boost); - return fld; - } - - private IndexableField newField2(String name,Object value,Set<String> indexed,boolean hasBoost) - throws LuanException - { - Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES; - if( value instanceof String ) { - String s = (String)value; - FieldParser fp = mfp.fields.get(name); - if( fp != null ) { - if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) { - return new TextField(name, s, store); - } else if (hasBoost) { - // fuck you modern lucene developers - return new Field(name, s, store, Field.Index.NOT_ANALYZED); - } else { - return new StringField(name, s, store); - } - } else { - return new StoredField(name, s); - } - } else if( value instanceof Integer ) { - int i = (Integer)value; - if( indexed.contains(name) ) { - return new IntField(name, i, store); - } else { - return new StoredField(name, i); - } - } else if( value instanceof Long ) { - long i = (Long)value; - if( indexed.contains(name) ) { - return new LongField(name, i, store); - } else { - return new StoredField(name, i); - } - } else if( value instanceof Double ) { - double i = (Double)value; - if( indexed.contains(name) ) { - return new DoubleField(name, i, store); - } else { - return new StoredField(name, i); - } - } else if( value instanceof byte[] ) { - byte[] b = (byte[])value; - return new StoredField(name, b); - } else - throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'"); - } - - private Document toLucene(LuanFunction completer,LuanTable table,LuanTable boosts) throws LuanException { - if( completer != null ) - table = (LuanTable)completer.call(table); - return toLucene(table.iterable(),boosts); - } - - private Document toLucene(Iterable<Map.Entry> iterable,LuanTable boosts) throws LuanException { - Set<String> indexed = mfp.fields.keySet(); - Document doc = new Document(); - for( Map.Entry<Object,Object> entry : iterable ) { - Object key = entry.getKey(); - if( !(key instanceof String) ) - throw new LuanException("key must be string"); - String name = (String)key; - Object value = entry.getValue(); - Float boost = null; - if( boosts != null ) { - Object obj = boosts.get(name); - if( obj != null ) { - if( !(obj instanceof Number) ) - throw new LuanException("boost '"+name+"' must be number"); - boost = ((Number)obj).floatValue(); - } - } - if( !(value instanceof LuanTable) ) { - doc.add(newField( name, value, indexed, boost )); - } else { // list + static Map<String,Object> toLucene(LuanTable table) throws LuanException { + Map<String,Object> map = new LinkedHashMap<String,Object>(); + for( Map.Entry<Object,Object> entry : table.iterable() ) { + String name = (String)entry.getKey(); + Object value = entry.getValue(); + if( value instanceof LuanTable ) { LuanTable list = (LuanTable)value; if( !list.isList() ) throw new LuanException("table value for '"+name+"' must be a list"); - for( Object el : list.asList() ) { - doc.add(newField( name, el, indexed, boost )); - } + value = list.asList(); } + map.put(name,value); } - return doc; - } - - private static Object getValue(IndexableField ifld) throws LuanException { - BytesRef br = ifld.binaryValue(); - if( br != null ) - return br.bytes; - Number n = ifld.numericValue(); - if( n != null ) - return n; - String s = ifld.stringValue(); - if( s != null ) - return s; - throw new LuanException("invalid field type for "+ifld); + return map; } private static LuanTable toTable(Luan luan,Document doc) throws LuanException { - if( doc==null ) - return null; + return doc==null ? null : toTable(luan,LuceneUtils.toMap(doc)); + } + + static LuanTable toTable(Luan luan,Map map) throws LuanException { LuanTable table = new LuanTable(luan); - for( IndexableField ifld : doc ) { - String name = ifld.name(); - Object value = getValue(ifld); - Object old = table.rawGet(name); - if( old == null ) { - table.rawPut(name,value); - } else { - LuanTable list; - if( old instanceof LuanTable ) { - list = (LuanTable)old; - } else { - list = new LuanTable(luan); - list.rawPut(1,old); - table.rawPut(name,list); - } - list.rawPut(list.rawLength()+1,value); - } + for( Object obj : map.entrySet() ) { + Map.Entry entry = (Map.Entry)obj; + Object value = entry.getValue(); + if( value instanceof List ) + value = new LuanTable(luan,(List)value); + table.rawPut( entry.getKey(), value ); } return table; } @@ -843,16 +732,16 @@ logger.info("end rebuild_postgres_backup"); } - public void restore_from_postgres(LuanFunction completer) + public void restore_from_postgres() throws IOException, LuanException, SQLException, ParseException { if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) { logger.error("restoring from postgres"); - force_restore_from_postgres(completer); + force_restore_from_postgres(); } } - public void force_restore_from_postgres(LuanFunction completer) + public void force_restore_from_postgres() throws IOException, LuanException, SQLException, ParseException { logger.warn("start restore_from_postgres"); @@ -865,7 +754,7 @@ try { writer.deleteAll(); long nextId = postgresBackup.maxId() + 1; - postgresBackup.restoreLucene(this,completer); + postgresBackup.restoreLucene(this); id = idLim = nextId; saveNextId(nextId); ok = true; @@ -882,10 +771,10 @@ logger.warn("end restore_from_postgres"); } - void restore(LuanFunction completer,LuanTable doc) + void restore(LuanTable doc) throws LuanException, IOException { - writer.addDocument(toLucene(completer,doc,null)); + writer.addDocument(toLucene(doc)); } public void check(Luan luan) throws IOException, SQLException, LuanException, ParseException {
--- a/src/luan/modules/lucene/PostgresBackup.java Sun Jul 26 15:06:15 2020 -0600 +++ b/src/luan/modules/lucene/PostgresBackup.java Sun Jul 26 23:11:53 2020 -0600 @@ -155,7 +155,7 @@ con.setAutoCommit(true); } - void restoreLucene(LuceneIndex li,LuanFunction completer) + void restoreLucene(LuceneIndex li) throws LuanException, IOException, SQLException, ParseException { Luan luan = new Luan(); @@ -164,7 +164,7 @@ while( rs.next() ) { String data = rs.getString("data"); LuanTable doc = (LuanTable)LuanParser.parse(luan,data); - li.restore(completer,doc); + li.restore(doc); } stmt.close(); }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/luan/modules/lucene/SupplementingConfig.java Sun Jul 26 23:11:53 2020 -0600 @@ -0,0 +1,49 @@ +package luan.modules.lucene; + +import java.util.Map; +import java.util.Collections; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.SnapshotDeletionPolicy; +import org.apache.lucene.util.Version; +import goodjava.lucene.queryparser.MultiFieldParser; +import goodjava.lucene.api.MultiFieldParserConfig; +import goodjava.lucene.api.MoreFieldInfo; +import luan.LuanFunction; +import luan.LuanTable; +import luan.LuanCloner; +import luan.LuanException; +import luan.LuanRuntimeException; + + +final class SupplementingConfig extends MultiFieldParserConfig { + private final LuanFunction supplementer; + + SupplementingConfig(Version luceneVersion,MultiFieldParser mfp,LuanFunction supplementer) { + super(luceneVersion,mfp); + LuanCloner cloner = new LuanCloner(LuanCloner.Type.COMPLETE); + this.supplementer = (LuanFunction)cloner.get(supplementer); + } + + public IndexWriterConfig newLuceneConfig() { + IndexWriterConfig luceneConfig = super.newLuceneConfig(); + SnapshotDeletionPolicy snapshotDeletionPolicy = new SnapshotDeletionPolicy(luceneConfig.getIndexDeletionPolicy()); + luceneConfig.setIndexDeletionPolicy(snapshotDeletionPolicy); + return luceneConfig; + } + + public MoreFieldInfo getMoreFieldInfo(Map<String,Object> storedFields) { + if( supplementer == null ) + return super.getMoreFieldInfo(storedFields); + try { + LuanTable tbl = LuceneIndex.toTable(supplementer.luan(),storedFields); + tbl = (LuanTable)supplementer.call(tbl); + if( tbl == null ) { + return super.getMoreFieldInfo(storedFields); + } else { + return new MoreFieldInfo(LuceneIndex.toLucene(tbl),Collections.emptyMap()); + } + } catch(LuanException e) { + throw new LuanRuntimeException(e); + } + } +}