Mercurial Hosting > luan
changeset 1460:3ab0d043370f
start lucene.api
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Mon, 23 Mar 2020 00:04:42 -0600 |
parents | b04b8fc5f4f4 |
children | e5d48b85351c |
files | src/goodjava/lucene/api/FieldAnalyzer.java src/goodjava/lucene/api/GoodCollector.java src/goodjava/lucene/api/GoodIndexWriter.java src/goodjava/lucene/api/GoodIndexWriterConfig.java src/goodjava/lucene/api/LuceneIndexWriter.java src/goodjava/lucene/api/LuceneUtils.java src/goodjava/lucene/api/MultiFieldParserConfig.java |
diffstat | 7 files changed, 363 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/FieldAnalyzer.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/FieldAnalyzer.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,31 @@ +package goodjava.lucene.api; + +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.AnalyzerWrapper; +import org.apache.lucene.analysis.core.KeywordAnalyzer; + + +public final class FieldAnalyzer extends AnalyzerWrapper { + private static final Analyzer defaultAnalyzer = new KeywordAnalyzer(); + private final Map<String,Analyzer> fieldAnalyzers = new ConcurrentHashMap<String,Analyzer>(); + + public void add(String fieldName,Analyzer analyzer) { + fieldAnalyzers.put(fieldName,analyzer); + } + + public boolean isAdded(String fieldName) { + return fieldAnalyzers.containsKey(fieldName); + } + + protected Analyzer getWrappedAnalyzer(String fieldName) { + Analyzer analyzer = fieldAnalyzers.get(fieldName); +/* + if( analyzer == null ) + throw new RuntimeException("no analyzer for field: "+fieldName); + return analyzer; +*/ + return analyzer!=null ? analyzer : defaultAnalyzer; + } +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/GoodCollector.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/GoodCollector.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,27 @@ +package goodjava.lucene.api; + +import java.io.IOException; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.search.Collector; +import org.apache.lucene.search.Scorer; + + +public abstract class GoodCollector extends Collector { + private int docBase; + + public boolean acceptsDocsOutOfOrder() { + return true; + } + + public void setScorer(Scorer scorer) {} + + public void setNextReader(AtomicReaderContext context) { + this.docBase = context.docBase; + } + + public void collect(int doc) throws IOException { + collectDoc(docBase+doc); + } + + public abstract void collectDoc(int doc) throws IOException; +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/GoodIndexWriter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/GoodIndexWriter.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,17 @@ +package goodjava.lucene.api; + +import java.io.IOException; +import java.util.Map; +import org.apache.lucene.search.Query; + + +public interface GoodIndexWriter { + public void close() throws IOException; + public void commit() throws IOException; + public void rollback() throws IOException; + public void deleteAll() throws IOException; + public void deleteDocuments(Query query) throws IOException; + public void addDocument(Map<String,Object> storedFields) throws IOException; + public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException; + public void reindexDocuments(String keyFieldName,Query query) throws IOException; +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/GoodIndexWriterConfig.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/GoodIndexWriterConfig.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,11 @@ +package goodjava.lucene.api; + +import java.util.Map; +import org.apache.lucene.analysis.Analyzer; + + +public interface GoodIndexWriterConfig { + public boolean isIndexed(String fieldName); + public Analyzer getAnalyzer(String fieldName); + public Map<String,Object> getUnstoredFields(Map<String,Object> storedFields); +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/LuceneIndexWriter.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/LuceneIndexWriter.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,176 @@ +package goodjava.lucene.api; + +import java.io.IOException; +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.document.StringField; +import org.apache.lucene.document.TextField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.FloatField; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.index.IndexWriterConfig; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.DirectoryReader; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.search.Query; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.store.Directory; +import org.apache.lucene.util.Version; + + +public final class LuceneIndexWriter implements GoodIndexWriter { + final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer(); + public final IndexWriterConfig luceneConfig; + public final GoodIndexWriterConfig goodConfig; + public final IndexWriter luceneWriter; + private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>(); + + public LuceneIndexWriter(Version matchVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { + luceneConfig = new IndexWriterConfig(matchVersion,fieldAnalyzer); + luceneWriter = new IndexWriter(dir,luceneConfig); + this.goodConfig = goodConfig; + } + + public void close() throws IOException { + luceneWriter.close(); + } + + public void commit() throws IOException { + luceneWriter.commit(); + } + + public void rollback() throws IOException { + luceneWriter.rollback(); + } + + public void deleteAll() throws IOException { + luceneWriter.deleteAll(); + } + + public void deleteDocuments(Query query) throws IOException { + luceneWriter.deleteDocuments(query); + } + + public void addDocument(Map<String,Object> storedFields) throws IOException { + Document doc = newDocument(storedFields); + luceneWriter.addDocument(doc); + } + + public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { + if( !isIndexed(keyFieldName) ) + throw new RuntimeException("can't update using unindexed field "+keyFieldName); + if( fieldAnalyzer.isAdded(keyFieldName) ) + throw new RuntimeException("can't update using analyzeed field "+keyFieldName); + Document doc = newDocument(storedFields); + Object keyValue = storedFields.get(keyFieldName); + if( keyValue==null ) + throw new RuntimeException("no value for field "+keyFieldName); + Term term = LuceneUtils.term(keyFieldName,keyValue); + luceneWriter.updateDocument(term,doc); + } + + private Document newDocument(Map<String,Object> storedFields) { + Document doc = new Document(); + addFields(doc,storedFields,Field.Store.YES); + Map<String,Object> unstoredFields = goodConfig.getUnstoredFields(storedFields); + addFields(doc,unstoredFields,Field.Store.NO); + return doc; + } + + private void addFields( Document doc, Map<String,Object> fields, Field.Store store ) { + for( Map.Entry<String,Object> entry : fields.entrySet() ) { + String name = entry.getKey(); + Object value = entry.getValue(); + if( value instanceof List ) { + for( Object v : (List)value ) { + doc.add( newField(name,v,store) ); + } + } else { + doc.add( newField(name,value,store) ); + } + } + } + + private Field newField( String name, Object value, Field.Store store ) { + boolean isIndexed = isIndexed(name); + if( store==Field.Store.NO && !isIndexed ) + throw new RuntimeException("field '"+name+"' is unstored and unindexed"); + if( value instanceof String ) { + String s = (String)value; + if( !isIndexed ) { + return new StoredField(name,s); + } else if( !fieldAnalyzer.isAdded(name) ) { + return new StringField(name,s,store); + } else { + return new TextField(name,s,store); + } + } else if( value instanceof Integer ) { + int i = (Integer)value; + if( !isIndexed ) { + return new StoredField(name,i); + } else { + return new IntField(name,i,store); + } + } else if( value instanceof Long ) { + long i = (Long)value; + if( !isIndexed ) { + return new StoredField(name,i); + } else { + return new LongField(name,i,store); + } + } else if( value instanceof Double ) { + double i = (Double)value; + if( !isIndexed ) { + return new StoredField(name,i); + } else { + return new DoubleField(name,i,store); + } + } else if( value instanceof Float ) { + float i = (Float)value; + if( !isIndexed ) { + return new StoredField(name,i); + } else { + return new FloatField(name,i,store); + } + } else if( value instanceof byte[] ) { + if( isIndexed ) + throw new RuntimeException("can't index byte field "+name); + byte[] b = (byte[])value; + return new StoredField(name, b); + } else + throw new RuntimeException("invalid value type "+value.getClass()+"' for field '"+name+"'"); + } + + private synchronized boolean isIndexed(String fieldName) { + Boolean b = indexedMap.get(fieldName); + if( b==null ) { + b = goodConfig.isIndexed(fieldName); + indexedMap.put(fieldName,b); + Analyzer analyzer = goodConfig.getAnalyzer(fieldName); + if( analyzer!=null ) + fieldAnalyzer.add(fieldName,analyzer); + } + return b; + } + + + public void reindexDocuments(final String keyFieldName,Query query) throws IOException { + IndexReader reader = DirectoryReader.open(luceneWriter.getDirectory()); + final IndexSearcher searcher = new IndexSearcher(reader); + searcher.search( query, new GoodCollector(){ + public void collectDoc(int iDoc) throws IOException { + Document doc = searcher.doc(iDoc); + Map<String,Object> storedFields = LuceneUtils.toMap(doc); + updateDocument(keyFieldName,storedFields); + } + }); + reader.close(); + } +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/LuceneUtils.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/LuceneUtils.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,66 @@ +package goodjava.lucene.api; + +import java.util.Map; +import java.util.HashMap; +import java.util.List; +import java.util.ArrayList; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.IndexableField; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; + + +public final class LuceneUtils { + private LuceneUtils() {} // never + + public static Object getValue(IndexableField ifld) { + BytesRef br = ifld.binaryValue(); + if( br != null ) + return br.bytes; + Number n = ifld.numericValue(); + if( n != null ) + return n; + String s = ifld.stringValue(); + if( s != null ) + return s; + throw new RuntimeException("invalid field type for "+ifld); + } + + public static Map<String,Object> toMap(Document doc) { + if( doc==null ) + return null; + Map<String,Object> map = new HashMap<String,Object>(); + for( IndexableField ifld : doc ) { + String name = ifld.name(); + Object value = getValue(ifld); + Object old = map.get(name); + if( old == null ) { + map.put(name,value); + } else { + List list; + if( old instanceof List ) { + list = (List)old; + } else { + list = new ArrayList(); + list.add(old); + map.put(name,list); + } + list.add(value); + } + } + return map; + } + + public static Term term(String name,Object value) { + if( value instanceof String ) { + return new Term(name,(String)value); + } else if( value instanceof Long ) { + BytesRef br = new BytesRef(); + NumericUtils.longToPrefixCoded((Long)value,0,br); + return new Term(name,br); + } else + throw new RuntimeException("invalid value type "+value.getClass()+"' for term '"+name+"'"); + } + +}
diff -r b04b8fc5f4f4 -r 3ab0d043370f src/goodjava/lucene/api/MultiFieldParserConfig.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/lucene/api/MultiFieldParserConfig.java Mon Mar 23 00:04:42 2020 -0600 @@ -0,0 +1,35 @@ +package goodjava.lucene.api; + +import java.util.Map; +import java.util.Collections; +import org.apache.lucene.analysis.Analyzer; +import org.apache.lucene.analysis.core.KeywordAnalyzer; +import goodjava.lucene.queryparser.MultiFieldParser; +import goodjava.lucene.queryparser.FieldParser; +import goodjava.lucene.queryparser.StringFieldParser; + + +public class MultiFieldParserConfig implements GoodIndexWriterConfig { + private final MultiFieldParser mfp; + + public MultiFieldParserConfig(MultiFieldParser mfp) { + this.mfp = mfp; + } + + public final boolean isIndexed(String fieldName) { + return mfp.fields.containsKey(fieldName); + } + + public final Analyzer getAnalyzer(String fieldName) { + FieldParser fp = mfp.fields.get(fieldName); + if( !(fp instanceof StringFieldParser) ) + return null; + StringFieldParser sfp = (StringFieldParser)fp; + Analyzer analyzer = sfp.analyzer; + return analyzer instanceof KeywordAnalyzer ? null : analyzer; + } + + public Map<String,Object> getUnstoredFields(Map<String,Object> storedFields) { + return Collections.emptyMap(); + } +}