comparison src/luan/modules/lucene/LuceneIndex.java @ 1528:3bd4d7963456

use goodjava/lucene/api
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 26 Jul 2020 23:11:53 -0600
parents efbc3720d3f3
children e6d808f40bbc
comparison
equal deleted inserted replaced
1527:fa1e3adbebfb 1528:3bd4d7963456
10 import java.sql.SQLException; 10 import java.sql.SQLException;
11 import java.util.Arrays; 11 import java.util.Arrays;
12 import java.util.Iterator; 12 import java.util.Iterator;
13 import java.util.Map; 13 import java.util.Map;
14 import java.util.HashMap; 14 import java.util.HashMap;
15 import java.util.LinkedHashMap;
15 import java.util.List; 16 import java.util.List;
16 import java.util.ArrayList; 17 import java.util.ArrayList;
17 import java.util.Set; 18 import java.util.Set;
18 import java.util.HashSet; 19 import java.util.HashSet;
19 import java.util.Collections; 20 import java.util.Collections;
72 import goodjava.lucene.queryparser.GoodQueryParser; 73 import goodjava.lucene.queryparser.GoodQueryParser;
73 import goodjava.lucene.queryparser.FieldParser; 74 import goodjava.lucene.queryparser.FieldParser;
74 import goodjava.lucene.queryparser.MultiFieldParser; 75 import goodjava.lucene.queryparser.MultiFieldParser;
75 import goodjava.lucene.queryparser.StringFieldParser; 76 import goodjava.lucene.queryparser.StringFieldParser;
76 import goodjava.lucene.queryparser.NumberFieldParser; 77 import goodjava.lucene.queryparser.NumberFieldParser;
78 import goodjava.lucene.api.GoodIndexWriter;
79 import goodjava.lucene.api.LuceneIndexWriter;
80 import goodjava.lucene.api.GoodIndexWriterConfig;
81 import goodjava.lucene.api.LuceneUtils;
77 import goodjava.parser.ParseException; 82 import goodjava.parser.ParseException;
78 import luan.modules.Utils; 83 import luan.modules.Utils;
79 import luan.Luan; 84 import luan.Luan;
80 import luan.LuanTable; 85 import luan.LuanTable;
81 import luan.LuanFunction; 86 import luan.LuanFunction;
120 125
121 private final Object version; 126 private final Object version;
122 127
123 private final ReentrantLock writeLock = new ReentrantLock(); 128 private final ReentrantLock writeLock = new ReentrantLock();
124 private final File indexDir; 129 private final File indexDir;
125 private SnapshotDeletionPolicy snapshotDeletionPolicy; 130 private GoodIndexWriter writer;
126 private IndexWriter writer;
127 private DirectoryReader reader; 131 private DirectoryReader reader;
128 private IndexSearcher searcher; 132 private IndexSearcher searcher;
129 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>(); 133 private final ThreadLocal<IndexSearcher> threadLocalSearcher = new ThreadLocal<IndexSearcher>();
130 private final MultiFieldParser mfp; 134 private final MultiFieldParser mfp;
131 private final Analyzer analyzer; 135 private final Analyzer analyzer; // ???
132 136
133 private FSDirectory fsDir; 137 private FSDirectory fsDir;
134 private int writeCount; 138 private int writeCount;
135 private AtomicInteger writeCounter = new AtomicInteger(); 139 private AtomicInteger writeCounter = new AtomicInteger();
136 140 private final GoodIndexWriterConfig config;
137 private Set<String> indexOnly = new HashSet<String>();
138 // private final FieldParser defaultFieldParser;
139 // private final String[] defaultFields;
140 141
141 private final PostgresBackup postgresBackup; 142 private final PostgresBackup postgresBackup;
142 private boolean wasCreated; 143 private boolean wasCreated;
143 144
144 private LuceneIndex(Luan luan,File indexDir,LuanTable options) 145 private LuceneIndex(Luan luan,File indexDir,LuanTable options)
148 this.version = options.remove("version"); 149 this.version = options.remove("version");
149 FieldParser defaultFieldParser = (FieldParser)options.remove("default_type"); 150 FieldParser defaultFieldParser = (FieldParser)options.remove("default_type");
150 LuanTable defaultFieldsTbl = Utils.removeTable(options,"default_fields"); 151 LuanTable defaultFieldsTbl = Utils.removeTable(options,"default_fields");
151 String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]); 152 String[] defaultFields = defaultFieldsTbl==null ? null : (String[])defaultFieldsTbl.asList().toArray(new String[0]);
152 LuanTable postgresSpec = Utils.removeTable(options,"postgres_spec"); 153 LuanTable postgresSpec = Utils.removeTable(options,"postgres_spec");
154 LuanFunction supplementer = Utils.removeFunction(options,"supplementer");
153 Utils.checkEmpty(options); 155 Utils.checkEmpty(options);
154 156
155 // this.defaultFieldParser = defaultFieldParser;
156 // this.defaultFields = defaultFields;
157 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields); 157 mfp = defaultFieldParser==null ? new MultiFieldParser() : new MultiFieldParser(defaultFieldParser,defaultFields);
158 mfp.fields.put( "type", STRING_FIELD_PARSER ); 158 mfp.fields.put( "type", STRING_FIELD_PARSER );
159 mfp.fields.put( "id", NumberFieldParser.LONG ); 159 mfp.fields.put( "id", NumberFieldParser.LONG );
160 this.indexDir = indexDir; 160 this.indexDir = indexDir;
161 Analyzer analyzer = STRING_FIELD_PARSER.analyzer; 161 Analyzer analyzer = STRING_FIELD_PARSER.analyzer;
162 if( defaultFieldParser instanceof StringFieldParser ) { 162 if( defaultFieldParser instanceof StringFieldParser ) {
163 StringFieldParser sfp = (StringFieldParser)defaultFieldParser; 163 StringFieldParser sfp = (StringFieldParser)defaultFieldParser;
164 analyzer = sfp.analyzer; 164 analyzer = sfp.analyzer;
165 } 165 }
166 this.analyzer = analyzer; 166 this.analyzer = analyzer;
167 this.config = new SupplementingConfig(luceneVersion,mfp,supplementer);
167 wasCreated = reopen(); 168 wasCreated = reopen();
168 if( postgresSpec == null ) { 169 if( postgresSpec == null ) {
169 postgresBackup = null; 170 postgresBackup = null;
170 } else { 171 } else {
171 postgresBackup = new PostgresBackup(luan,postgresSpec); 172 postgresBackup = new PostgresBackup(luan,postgresSpec);
180 } 181 }
181 } 182 }
182 } 183 }
183 184
184 public boolean reopen() throws IOException { 185 public boolean reopen() throws IOException {
185 IndexWriterConfig conf = new IndexWriterConfig(luceneVersion,analyzer);
186 snapshotDeletionPolicy = new SnapshotDeletionPolicy(conf.getIndexDeletionPolicy());
187 conf.setIndexDeletionPolicy(snapshotDeletionPolicy);
188 fsDir = FSDirectory.open(indexDir); 186 fsDir = FSDirectory.open(indexDir);
189 boolean wasCreated = !fsDir.getDirectory().exists(); 187 boolean wasCreated = !fsDir.getDirectory().exists();
190 writer = new IndexWriter(fsDir,conf); 188 writer = new LuceneIndexWriter(fsDir,config);
191 writer.commit(); // commit index creation
192 reader = DirectoryReader.open(fsDir); 189 reader = DirectoryReader.open(fsDir);
193 searcher = new IndexSearcher(reader); 190 searcher = new IndexSearcher(reader);
194 initId(); 191 initId();
195 return wasCreated; 192 return wasCreated;
196 } 193 }
261 wrote(); 258 wrote();
262 writeLock.unlock(); 259 writeLock.unlock();
263 } 260 }
264 } 261 }
265 262
266 public void indexed_only_fields(List<String> fields) { 263 public void save(LuanTable doc)
267 indexOnly.addAll(fields);
268 }
269
270 public void save(LuanFunction completer,LuanTable doc,LuanTable boosts)
271 throws LuanException, IOException, SQLException 264 throws LuanException, IOException, SQLException
272 { 265 {
273 if( boosts!=null && postgresBackup!=null )
274 throw new LuanException("boosts are not saved to postgres backup");
275
276 Object obj = doc.get("id"); 266 Object obj = doc.get("id");
277 Long id; 267 Long id;
278 try { 268 try {
279 id = (Long)obj; 269 id = (Long)obj;
280 } catch(ClassCastException e) { 270 } catch(ClassCastException e) {
287 if( id == null ) { 277 if( id == null ) {
288 id = nextId(); 278 id = nextId();
289 doc.put("id",id); 279 doc.put("id",id);
290 if( postgresBackup != null ) 280 if( postgresBackup != null )
291 postgresBackup.add(doc); 281 postgresBackup.add(doc);
292 writer.addDocument(toLucene(completer,doc,boosts)); 282 writer.addDocument(toLucene(doc));
293 } else { 283 } else {
294 if( postgresBackup != null ) 284 if( postgresBackup != null )
295 postgresBackup.update(doc); 285 postgresBackup.update(doc);
296 writer.updateDocument( term("id",id), toLucene(completer,doc,boosts) ); 286 writer.updateDocument( "id", toLucene(doc) );
297 } 287 }
298 if(commit) writer.commit(); 288 if(commit) writer.commit();
299 } finally { 289 } finally {
300 wrote(); 290 wrote();
301 writeLock.unlock(); 291 writeLock.unlock();
366 throw new RuntimeException(); 356 throw new RuntimeException();
367 } 357 }
368 } 358 }
369 359
370 private void saveNextId(long nextId) throws LuanException, IOException { 360 private void saveNextId(long nextId) throws LuanException, IOException {
371 Map doc = new HashMap(); 361 Map<String,Object> doc = new HashMap();
372 doc.put( "type", "next_id" ); 362 doc.put( "type", "next_id" );
373 doc.put( FLD_NEXT_ID, idLim ); 363 doc.put( FLD_NEXT_ID, idLim );
374 writer.updateDocument(new Term("type","next_id"),toLucene(doc.entrySet(),null)); 364 writer.updateDocument("type",doc);
375 } 365 }
376 366
377 public synchronized long nextId() throws LuanException, IOException { 367 public synchronized long nextId() throws LuanException, IOException {
378 if( ++id > idLim ) { 368 if( ++id > idLim ) {
379 idLim += idBatch; 369 idLim += idBatch;
402 snapshotDeletionPolicy.release(ic); 392 snapshotDeletionPolicy.release(ic);
403 } 393 }
404 } 394 }
405 */ 395 */
406 public SnapshotDeletionPolicy snapshotDeletionPolicy() { 396 public SnapshotDeletionPolicy snapshotDeletionPolicy() {
407 return snapshotDeletionPolicy; 397 return (SnapshotDeletionPolicy)writer.getLuceneConfig().getIndexDeletionPolicy();
408 } 398 }
409 399
410 public Object snapshot(LuanFunction fn) throws LuanException, IOException { 400 public Object snapshot(LuanFunction fn) throws LuanException, IOException {
401 SnapshotDeletionPolicy snapshotDeletionPolicy = snapshotDeletionPolicy();
411 IndexCommit ic = snapshotDeletionPolicy.snapshot(); 402 IndexCommit ic = snapshotDeletionPolicy.snapshot();
412 try { 403 try {
413 String dir = fsDir.getDirectory().toString(); 404 String dir = fsDir.getDirectory().toString();
414 LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames())); 405 LuanTable fileNames = new LuanTable(fn.luan(),new ArrayList(ic.getFileNames()));
415 return fn.call(dir,fileNames); 406 return fn.call(dir,fileNames);
585 return; 576 return;
586 } 577 }
587 mfp.fields.put( field, fp ); 578 mfp.fields.put( field, fp );
588 } 579 }
589 580
590 581 static Map<String,Object> toLucene(LuanTable table) throws LuanException {
591 private IndexableField newField(String name,Object value,Set<String> indexed,Float boost) 582 Map<String,Object> map = new LinkedHashMap<String,Object>();
592 throws LuanException 583 for( Map.Entry<Object,Object> entry : table.iterable() ) {
593 { 584 String name = (String)entry.getKey();
594 boolean hasBoost = boost!=null; 585 Object value = entry.getValue();
595 IndexableField fld = newField2(name,value,indexed,hasBoost); 586 if( value instanceof LuanTable ) {
596 if( hasBoost )
597 ((Field)fld).setBoost(boost);
598 return fld;
599 }
600
601 private IndexableField newField2(String name,Object value,Set<String> indexed,boolean hasBoost)
602 throws LuanException
603 {
604 Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES;
605 if( value instanceof String ) {
606 String s = (String)value;
607 FieldParser fp = mfp.fields.get(name);
608 if( fp != null ) {
609 if( fp instanceof StringFieldParser && fp != STRING_FIELD_PARSER ) {
610 return new TextField(name, s, store);
611 } else if (hasBoost) {
612 // fuck you modern lucene developers
613 return new Field(name, s, store, Field.Index.NOT_ANALYZED);
614 } else {
615 return new StringField(name, s, store);
616 }
617 } else {
618 return new StoredField(name, s);
619 }
620 } else if( value instanceof Integer ) {
621 int i = (Integer)value;
622 if( indexed.contains(name) ) {
623 return new IntField(name, i, store);
624 } else {
625 return new StoredField(name, i);
626 }
627 } else if( value instanceof Long ) {
628 long i = (Long)value;
629 if( indexed.contains(name) ) {
630 return new LongField(name, i, store);
631 } else {
632 return new StoredField(name, i);
633 }
634 } else if( value instanceof Double ) {
635 double i = (Double)value;
636 if( indexed.contains(name) ) {
637 return new DoubleField(name, i, store);
638 } else {
639 return new StoredField(name, i);
640 }
641 } else if( value instanceof byte[] ) {
642 byte[] b = (byte[])value;
643 return new StoredField(name, b);
644 } else
645 throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'");
646 }
647
648 private Document toLucene(LuanFunction completer,LuanTable table,LuanTable boosts) throws LuanException {
649 if( completer != null )
650 table = (LuanTable)completer.call(table);
651 return toLucene(table.iterable(),boosts);
652 }
653
654 private Document toLucene(Iterable<Map.Entry> iterable,LuanTable boosts) throws LuanException {
655 Set<String> indexed = mfp.fields.keySet();
656 Document doc = new Document();
657 for( Map.Entry<Object,Object> entry : iterable ) {
658 Object key = entry.getKey();
659 if( !(key instanceof String) )
660 throw new LuanException("key must be string");
661 String name = (String)key;
662 Object value = entry.getValue();
663 Float boost = null;
664 if( boosts != null ) {
665 Object obj = boosts.get(name);
666 if( obj != null ) {
667 if( !(obj instanceof Number) )
668 throw new LuanException("boost '"+name+"' must be number");
669 boost = ((Number)obj).floatValue();
670 }
671 }
672 if( !(value instanceof LuanTable) ) {
673 doc.add(newField( name, value, indexed, boost ));
674 } else { // list
675 LuanTable list = (LuanTable)value; 587 LuanTable list = (LuanTable)value;
676 if( !list.isList() ) 588 if( !list.isList() )
677 throw new LuanException("table value for '"+name+"' must be a list"); 589 throw new LuanException("table value for '"+name+"' must be a list");
678 for( Object el : list.asList() ) { 590 value = list.asList();
679 doc.add(newField( name, el, indexed, boost )); 591 }
680 } 592 map.put(name,value);
681 } 593 }
682 } 594 return map;
683 return doc;
684 }
685
686 private static Object getValue(IndexableField ifld) throws LuanException {
687 BytesRef br = ifld.binaryValue();
688 if( br != null )
689 return br.bytes;
690 Number n = ifld.numericValue();
691 if( n != null )
692 return n;
693 String s = ifld.stringValue();
694 if( s != null )
695 return s;
696 throw new LuanException("invalid field type for "+ifld);
697 } 595 }
698 596
699 private static LuanTable toTable(Luan luan,Document doc) throws LuanException { 597 private static LuanTable toTable(Luan luan,Document doc) throws LuanException {
700 if( doc==null ) 598 return doc==null ? null : toTable(luan,LuceneUtils.toMap(doc));
701 return null; 599 }
600
601 static LuanTable toTable(Luan luan,Map map) throws LuanException {
702 LuanTable table = new LuanTable(luan); 602 LuanTable table = new LuanTable(luan);
703 for( IndexableField ifld : doc ) { 603 for( Object obj : map.entrySet() ) {
704 String name = ifld.name(); 604 Map.Entry entry = (Map.Entry)obj;
705 Object value = getValue(ifld); 605 Object value = entry.getValue();
706 Object old = table.rawGet(name); 606 if( value instanceof List )
707 if( old == null ) { 607 value = new LuanTable(luan,(List)value);
708 table.rawPut(name,value); 608 table.rawPut( entry.getKey(), value );
709 } else {
710 LuanTable list;
711 if( old instanceof LuanTable ) {
712 list = (LuanTable)old;
713 } else {
714 list = new LuanTable(luan);
715 list.rawPut(1,old);
716 table.rawPut(name,list);
717 }
718 list.rawPut(list.rawLength()+1,value);
719 }
720 } 609 }
721 return table; 610 return table;
722 } 611 }
723 612
724 613
841 writeLock.unlock(); 730 writeLock.unlock();
842 } 731 }
843 logger.info("end rebuild_postgres_backup"); 732 logger.info("end rebuild_postgres_backup");
844 } 733 }
845 734
846 public void restore_from_postgres(LuanFunction completer) 735 public void restore_from_postgres()
847 throws IOException, LuanException, SQLException, ParseException 736 throws IOException, LuanException, SQLException, ParseException
848 { 737 {
849 if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) { 738 if( postgresBackup!=null && wasCreated && !postgresBackup.wasCreated ) {
850 logger.error("restoring from postgres"); 739 logger.error("restoring from postgres");
851 force_restore_from_postgres(completer); 740 force_restore_from_postgres();
852 } 741 }
853 } 742 }
854 743
855 public void force_restore_from_postgres(LuanFunction completer) 744 public void force_restore_from_postgres()
856 throws IOException, LuanException, SQLException, ParseException 745 throws IOException, LuanException, SQLException, ParseException
857 { 746 {
858 logger.warn("start restore_from_postgres"); 747 logger.warn("start restore_from_postgres");
859 if( postgresBackup==null ) 748 if( postgresBackup==null )
860 throw new NullPointerException(); 749 throw new NullPointerException();
863 writeLock.lock(); 752 writeLock.lock();
864 boolean ok = false; 753 boolean ok = false;
865 try { 754 try {
866 writer.deleteAll(); 755 writer.deleteAll();
867 long nextId = postgresBackup.maxId() + 1; 756 long nextId = postgresBackup.maxId() + 1;
868 postgresBackup.restoreLucene(this,completer); 757 postgresBackup.restoreLucene(this);
869 id = idLim = nextId; 758 id = idLim = nextId;
870 saveNextId(nextId); 759 saveNextId(nextId);
871 ok = true; 760 ok = true;
872 writer.commit(); 761 writer.commit();
873 wasCreated = false; 762 wasCreated = false;
880 writeLock.unlock(); 769 writeLock.unlock();
881 } 770 }
882 logger.warn("end restore_from_postgres"); 771 logger.warn("end restore_from_postgres");
883 } 772 }
884 773
885 void restore(LuanFunction completer,LuanTable doc) 774 void restore(LuanTable doc)
886 throws LuanException, IOException 775 throws LuanException, IOException
887 { 776 {
888 writer.addDocument(toLucene(completer,doc,null)); 777 writer.addDocument(toLucene(doc));
889 } 778 }
890 779
891 public void check(Luan luan) throws IOException, SQLException, LuanException, ParseException { 780 public void check(Luan luan) throws IOException, SQLException, LuanException, ParseException {
892 boolean hasPostgres = postgresBackup != null; 781 boolean hasPostgres = postgresBackup != null;
893 String msg = "start check"; 782 String msg = "start check";