changeset 1460:3ab0d043370f

start lucene.api
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 23 Mar 2020 00:04:42 -0600
parents b04b8fc5f4f4
children e5d48b85351c
files src/goodjava/lucene/api/FieldAnalyzer.java src/goodjava/lucene/api/GoodCollector.java src/goodjava/lucene/api/GoodIndexWriter.java src/goodjava/lucene/api/GoodIndexWriterConfig.java src/goodjava/lucene/api/LuceneIndexWriter.java src/goodjava/lucene/api/LuceneUtils.java src/goodjava/lucene/api/MultiFieldParserConfig.java
diffstat 7 files changed, 363 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/FieldAnalyzer.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,31 @@
+package goodjava.lucene.api;
+
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.AnalyzerWrapper;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+
+
+public final class FieldAnalyzer extends AnalyzerWrapper {
+	private static final Analyzer defaultAnalyzer = new KeywordAnalyzer();
+	private final Map<String,Analyzer> fieldAnalyzers = new ConcurrentHashMap<String,Analyzer>();
+
+	public void add(String fieldName,Analyzer analyzer) {
+		fieldAnalyzers.put(fieldName,analyzer);
+	}
+
+	public boolean isAdded(String fieldName) {
+		return fieldAnalyzers.containsKey(fieldName);
+	}
+
+	protected Analyzer getWrappedAnalyzer(String fieldName) {
+		Analyzer analyzer = fieldAnalyzers.get(fieldName);
+/*
+		if( analyzer == null )
+			throw new RuntimeException("no analyzer for field: "+fieldName);
+		return analyzer;
+*/
+		return analyzer!=null ? analyzer : defaultAnalyzer;
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/GoodCollector.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,27 @@
+package goodjava.lucene.api;
+
+import java.io.IOException;
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.Collector;
+import org.apache.lucene.search.Scorer;
+
+
+public abstract class GoodCollector extends Collector {
+	private int docBase;
+
+	public boolean acceptsDocsOutOfOrder() {
+		return true;
+	}
+
+	public void setScorer(Scorer scorer) {}
+
+	public void setNextReader(AtomicReaderContext context) {
+		this.docBase = context.docBase;
+	}
+
+	public void collect(int doc) throws IOException {
+		collectDoc(docBase+doc);
+	}
+
+	public abstract void collectDoc(int doc) throws IOException;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/GoodIndexWriter.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,17 @@
+package goodjava.lucene.api;
+
+import java.io.IOException;
+import java.util.Map;
+import org.apache.lucene.search.Query;
+
+
+public interface GoodIndexWriter {
+	public void close() throws IOException;
+	public void commit() throws IOException;
+	public void rollback() throws IOException;
+	public void deleteAll() throws IOException;
+	public void deleteDocuments(Query query) throws IOException;
+	public void addDocument(Map<String,Object> storedFields) throws IOException;
+	public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException;
+	public void reindexDocuments(String keyFieldName,Query query) throws IOException;
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/GoodIndexWriterConfig.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,11 @@
+package goodjava.lucene.api;
+
+import java.util.Map;
+import org.apache.lucene.analysis.Analyzer;
+
+
+public interface GoodIndexWriterConfig {
+	public boolean isIndexed(String fieldName);
+	public Analyzer getAnalyzer(String fieldName);
+	public Map<String,Object> getUnstoredFields(Map<String,Object> storedFields);
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/LuceneIndexWriter.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,176 @@
+package goodjava.lucene.api;
+
+import java.io.IOException;
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Field;
+import org.apache.lucene.document.StoredField;
+import org.apache.lucene.document.StringField;
+import org.apache.lucene.document.TextField;
+import org.apache.lucene.document.IntField;
+import org.apache.lucene.document.LongField;
+import org.apache.lucene.document.DoubleField;
+import org.apache.lucene.document.FloatField;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.IndexWriterConfig;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.index.DirectoryReader;
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.Version;
+
+
+public final class LuceneIndexWriter implements GoodIndexWriter {
+	final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer();
+	public final IndexWriterConfig luceneConfig;
+	public final GoodIndexWriterConfig goodConfig;
+	public final IndexWriter luceneWriter;
+	private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>();
+
+	public LuceneIndexWriter(Version matchVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException {
+		luceneConfig = new IndexWriterConfig(matchVersion,fieldAnalyzer);
+		luceneWriter = new IndexWriter(dir,luceneConfig);
+		this.goodConfig = goodConfig;
+	}
+
+	public void close() throws IOException {
+		luceneWriter.close();
+	}
+
+	public void commit() throws IOException {
+		luceneWriter.commit();
+	}
+
+	public void rollback() throws IOException {
+		luceneWriter.rollback();
+	}
+
+	public void deleteAll() throws IOException {
+		luceneWriter.deleteAll();
+	}
+
+	public void deleteDocuments(Query query) throws IOException {
+		luceneWriter.deleteDocuments(query);
+	}
+
+	public void addDocument(Map<String,Object> storedFields) throws IOException {
+		Document doc = newDocument(storedFields);
+		luceneWriter.addDocument(doc);
+	}
+
+	public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException {
+		if( !isIndexed(keyFieldName) )
+			throw new RuntimeException("can't update using unindexed field "+keyFieldName);
+		if( fieldAnalyzer.isAdded(keyFieldName) )
+			throw new RuntimeException("can't update using analyzeed field "+keyFieldName);
+		Document doc = newDocument(storedFields);
+		Object keyValue = storedFields.get(keyFieldName);
+		if( keyValue==null )
+			throw new RuntimeException("no value for field "+keyFieldName);
+		Term term = LuceneUtils.term(keyFieldName,keyValue);
+		luceneWriter.updateDocument(term,doc);
+	}
+
+	private Document newDocument(Map<String,Object> storedFields) {
+		Document doc = new Document();
+		addFields(doc,storedFields,Field.Store.YES);
+		Map<String,Object> unstoredFields = goodConfig.getUnstoredFields(storedFields);
+		addFields(doc,unstoredFields,Field.Store.NO);
+		return doc;
+	}
+
+	private void addFields( Document doc, Map<String,Object> fields, Field.Store store ) {
+		for( Map.Entry<String,Object> entry : fields.entrySet() ) {
+			String name = entry.getKey();
+			Object value = entry.getValue();
+			if( value instanceof List ) {
+				for( Object v : (List)value ) {
+					doc.add( newField(name,v,store) );
+				}
+			} else {
+				doc.add( newField(name,value,store) );
+			}
+		}
+	}
+
+	private Field newField( String name, Object value, Field.Store store ) {
+		boolean isIndexed = isIndexed(name);
+		if( store==Field.Store.NO && !isIndexed )
+			throw new RuntimeException("field '"+name+"' is unstored and unindexed");
+		if( value instanceof String ) {
+			String s = (String)value;
+			if( !isIndexed ) {
+				return new StoredField(name,s);
+			} else if( !fieldAnalyzer.isAdded(name) ) {
+				return new StringField(name,s,store);
+			} else {
+				return new TextField(name,s,store);
+			}
+		} else if( value instanceof Integer ) {
+			int i = (Integer)value;
+			if( !isIndexed ) {
+				return new StoredField(name,i);
+			} else {
+				return new IntField(name,i,store);
+			}
+		} else if( value instanceof Long ) {
+			long i = (Long)value;
+			if( !isIndexed ) {
+				return new StoredField(name,i);
+			} else {
+				return new LongField(name,i,store);
+			}
+		} else if( value instanceof Double ) {
+			double i = (Double)value;
+			if( !isIndexed ) {
+				return new StoredField(name,i);
+			} else {
+				return new DoubleField(name,i,store);
+			}
+		} else if( value instanceof Float ) {
+			float i = (Float)value;
+			if( !isIndexed ) {
+				return new StoredField(name,i);
+			} else {
+				return new FloatField(name,i,store);
+			}
+		} else if( value instanceof byte[] ) {
+			if( isIndexed )
+				throw new RuntimeException("can't index byte field "+name);
+			byte[] b = (byte[])value;
+			return new StoredField(name, b);
+		} else
+			throw new RuntimeException("invalid value type "+value.getClass()+"' for field '"+name+"'");
+	}
+
+	private synchronized boolean isIndexed(String fieldName) {
+		Boolean b = indexedMap.get(fieldName);
+		if( b==null ) {
+			b = goodConfig.isIndexed(fieldName);
+			indexedMap.put(fieldName,b);
+			Analyzer analyzer = goodConfig.getAnalyzer(fieldName);
+			if( analyzer!=null )
+				fieldAnalyzer.add(fieldName,analyzer);
+		}
+		return b;
+	}
+
+
+	public void reindexDocuments(final String keyFieldName,Query query) throws IOException {
+		IndexReader reader = DirectoryReader.open(luceneWriter.getDirectory());
+		final IndexSearcher searcher = new IndexSearcher(reader);
+		searcher.search( query, new GoodCollector(){
+			public void collectDoc(int iDoc) throws IOException {
+				Document doc = searcher.doc(iDoc);
+				Map<String,Object> storedFields = LuceneUtils.toMap(doc);
+				updateDocument(keyFieldName,storedFields);
+			}
+		});
+		reader.close();
+	}
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/LuceneUtils.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,66 @@
+package goodjava.lucene.api;
+
+import java.util.Map;
+import java.util.HashMap;
+import java.util.List;
+import java.util.ArrayList;
+import org.apache.lucene.document.Document;
+import org.apache.lucene.index.IndexableField;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
+
+
+public final class LuceneUtils {
+	private LuceneUtils() {}  // never
+
+	public static Object getValue(IndexableField ifld) {
+		BytesRef br = ifld.binaryValue();
+		if( br != null )
+			return br.bytes;
+		Number n = ifld.numericValue();
+		if( n != null )
+			return n;
+		String s = ifld.stringValue();
+		if( s != null )
+			return s;
+		throw new RuntimeException("invalid field type for "+ifld);
+	}
+
+	public static Map<String,Object> toMap(Document doc) {
+		if( doc==null )
+			return null;
+		Map<String,Object> map = new HashMap<String,Object>();
+		for( IndexableField ifld : doc ) {
+			String name = ifld.name();
+			Object value = getValue(ifld);
+			Object old = map.get(name);
+			if( old == null ) {
+				map.put(name,value);
+			} else {
+				List list;
+				if( old instanceof List ) {
+					list = (List)old;
+				} else {
+					list = new ArrayList();
+					list.add(old);
+					map.put(name,list);
+				}
+				list.add(value);
+			}
+		}
+		return map;
+	}
+
+	public static Term term(String name,Object value) {
+		if( value instanceof String ) {
+			return new Term(name,(String)value);
+		} else if( value instanceof Long ) {
+			BytesRef br = new BytesRef();
+			NumericUtils.longToPrefixCoded((Long)value,0,br);
+			return new Term(name,br);
+		} else
+			throw new RuntimeException("invalid value type "+value.getClass()+"' for term '"+name+"'");
+	}
+
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/goodjava/lucene/api/MultiFieldParserConfig.java	Mon Mar 23 00:04:42 2020 -0600
@@ -0,0 +1,35 @@
+package goodjava.lucene.api;
+
+import java.util.Map;
+import java.util.Collections;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.core.KeywordAnalyzer;
+import goodjava.lucene.queryparser.MultiFieldParser;
+import goodjava.lucene.queryparser.FieldParser;
+import goodjava.lucene.queryparser.StringFieldParser;
+
+
+public class MultiFieldParserConfig implements GoodIndexWriterConfig {
+	private final MultiFieldParser mfp;
+
+	public MultiFieldParserConfig(MultiFieldParser mfp) {
+		this.mfp = mfp;
+	}
+
+	public final boolean isIndexed(String fieldName) {
+		return mfp.fields.containsKey(fieldName);
+	}
+
+	public final Analyzer getAnalyzer(String fieldName) {
+		FieldParser fp = mfp.fields.get(fieldName);
+		if( !(fp instanceof StringFieldParser) )
+			return null;
+		StringFieldParser sfp = (StringFieldParser)fp;
+		Analyzer analyzer = sfp.analyzer;
+		return analyzer instanceof KeywordAnalyzer ? null : analyzer;
+	}
+
+	public Map<String,Object> getUnstoredFields(Map<String,Object> storedFields) {
+		return Collections.emptyMap();
+	}
+}