Mercurial Hosting > luan
annotate src/goodjava/lucene/api/LuceneIndexWriter.java @ 1528:3bd4d7963456
use goodjava/lucene/api
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Sun, 26 Jul 2020 23:11:53 -0600 |
| parents | f848d40b3b07 |
| children | c27dc6af87ca |
| rev | line source |
|---|---|
| 1460 | 1 package goodjava.lucene.api; |
| 2 | |
| 3 import java.io.IOException; | |
| 4 import java.util.Map; | |
| 5 import java.util.HashMap; | |
| 6 import java.util.List; | |
| 7 import org.apache.lucene.analysis.Analyzer; | |
| 8 import org.apache.lucene.document.Document; | |
| 9 import org.apache.lucene.document.Field; | |
| 10 import org.apache.lucene.document.StoredField; | |
| 11 import org.apache.lucene.document.StringField; | |
| 12 import org.apache.lucene.document.TextField; | |
| 13 import org.apache.lucene.document.IntField; | |
| 14 import org.apache.lucene.document.LongField; | |
| 15 import org.apache.lucene.document.DoubleField; | |
| 16 import org.apache.lucene.document.FloatField; | |
| 17 import org.apache.lucene.index.IndexWriter; | |
| 18 import org.apache.lucene.index.IndexWriterConfig; | |
|
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
19 import org.apache.lucene.index.LiveIndexWriterConfig; |
| 1460 | 20 import org.apache.lucene.index.Term; |
| 21 import org.apache.lucene.index.DirectoryReader; | |
| 22 import org.apache.lucene.index.IndexReader; | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
23 import org.apache.lucene.index.CheckIndex; |
| 1460 | 24 import org.apache.lucene.search.Query; |
| 25 import org.apache.lucene.search.IndexSearcher; | |
| 26 import org.apache.lucene.store.Directory; | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
27 import goodjava.logging.Logger; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
28 import goodjava.logging.LoggerFactory; |
| 1460 | 29 |
| 30 | |
| 31 public final class LuceneIndexWriter implements GoodIndexWriter { | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
32 private static final Logger logger = LoggerFactory.getLogger(LuceneIndexWriter.class); |
|
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
33 private final FieldAnalyzer fieldAnalyzer; |
| 1465 | 34 public final IndexWriter luceneWriter; |
| 1460 | 35 public final GoodIndexWriterConfig goodConfig; |
| 36 private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>(); | |
| 37 | |
|
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
38 public LuceneIndexWriter(Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
39 IndexWriterConfig luceneConfig = goodConfig.newLuceneConfig(); |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
40 Analyzer analyzer = luceneConfig.getAnalyzer(); |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
41 if( !(analyzer instanceof FieldAnalyzer) ) |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
42 throw new RuntimeException("analyzer must be FieldAnalyzer"); |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
43 this.fieldAnalyzer = (FieldAnalyzer)analyzer; |
| 1465 | 44 this.luceneWriter = new IndexWriter(dir,luceneConfig); |
| 1460 | 45 this.goodConfig = goodConfig; |
| 1465 | 46 luceneWriter.commit(); // commit index creation |
| 1460 | 47 } |
| 48 | |
|
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
49 public Directory getDirectory() { |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
50 return luceneWriter.getDirectory(); |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
51 } |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
52 |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
53 public LiveIndexWriterConfig getLuceneConfig() { |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
54 return luceneWriter.getConfig(); |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
55 } |
|
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
56 |
| 1460 | 57 public void close() throws IOException { |
| 58 luceneWriter.close(); | |
| 59 } | |
| 60 | |
| 61 public void commit() throws IOException { | |
| 62 luceneWriter.commit(); | |
| 63 } | |
| 64 | |
| 65 public void rollback() throws IOException { | |
| 66 luceneWriter.rollback(); | |
| 67 } | |
| 68 | |
| 69 public void deleteAll() throws IOException { | |
| 70 luceneWriter.deleteAll(); | |
| 71 } | |
| 72 | |
| 73 public void deleteDocuments(Query query) throws IOException { | |
| 74 luceneWriter.deleteDocuments(query); | |
| 75 } | |
| 76 | |
| 77 public void addDocument(Map<String,Object> storedFields) throws IOException { | |
| 78 Document doc = newDocument(storedFields); | |
| 79 luceneWriter.addDocument(doc); | |
| 80 } | |
| 81 | |
| 82 public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
| 83 if( !isIndexed(keyFieldName) ) | |
| 84 throw new RuntimeException("can't update using unindexed field "+keyFieldName); | |
| 85 if( fieldAnalyzer.isAdded(keyFieldName) ) | |
|
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
86 throw new RuntimeException("can't update using analyzed field "+keyFieldName); |
| 1460 | 87 Document doc = newDocument(storedFields); |
| 88 Object keyValue = storedFields.get(keyFieldName); | |
| 89 if( keyValue==null ) | |
| 90 throw new RuntimeException("no value for field "+keyFieldName); | |
| 91 Term term = LuceneUtils.term(keyFieldName,keyValue); | |
| 92 luceneWriter.updateDocument(term,doc); | |
| 93 } | |
| 94 | |
| 95 private Document newDocument(Map<String,Object> storedFields) { | |
| 96 Document doc = new Document(); | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
97 MoreFieldInfo more = goodConfig.getMoreFieldInfo(storedFields); |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
98 addFields(doc,storedFields,Field.Store.YES,more.boosts); |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
99 addFields(doc,more.unstoredFields,Field.Store.NO,more.boosts); |
| 1460 | 100 return doc; |
| 101 } | |
| 102 | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
103 private void addFields( Document doc, Map<String,Object> fields, Field.Store store, Map<String,Float> boosts ) { |
| 1460 | 104 for( Map.Entry<String,Object> entry : fields.entrySet() ) { |
| 105 String name = entry.getKey(); | |
| 106 Object value = entry.getValue(); | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
107 Float boost = boosts.get(name); |
| 1460 | 108 if( value instanceof List ) { |
| 109 for( Object v : (List)value ) { | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
110 doc.add( newField(name,v,store,boost) ); |
| 1460 | 111 } |
| 112 } else { | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
113 doc.add( newField(name,value,store,boost) ); |
| 1460 | 114 } |
| 115 } | |
| 116 } | |
| 117 | |
|
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
118 private Field newField( String name, Object value, Field.Store store, Float boost ) { |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
119 Field field = newField(name,value,store); |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
120 if( boost != null ) |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
121 field.setBoost(boost); |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
122 return field; |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
123 } |
|
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
124 |
| 1460 | 125 private Field newField( String name, Object value, Field.Store store ) { |
| 126 boolean isIndexed = isIndexed(name); | |
| 127 if( store==Field.Store.NO && !isIndexed ) | |
| 128 throw new RuntimeException("field '"+name+"' is unstored and unindexed"); | |
| 129 if( value instanceof String ) { | |
| 130 String s = (String)value; | |
| 131 if( !isIndexed ) { | |
| 132 return new StoredField(name,s); | |
| 133 } else if( !fieldAnalyzer.isAdded(name) ) { | |
| 134 return new StringField(name,s,store); | |
| 135 } else { | |
| 136 return new TextField(name,s,store); | |
| 137 } | |
| 138 } else if( value instanceof Integer ) { | |
| 139 int i = (Integer)value; | |
| 140 if( !isIndexed ) { | |
| 141 return new StoredField(name,i); | |
| 142 } else { | |
| 143 return new IntField(name,i,store); | |
| 144 } | |
| 145 } else if( value instanceof Long ) { | |
| 146 long i = (Long)value; | |
| 147 if( !isIndexed ) { | |
| 148 return new StoredField(name,i); | |
| 149 } else { | |
| 150 return new LongField(name,i,store); | |
| 151 } | |
| 152 } else if( value instanceof Double ) { | |
| 153 double i = (Double)value; | |
| 154 if( !isIndexed ) { | |
| 155 return new StoredField(name,i); | |
| 156 } else { | |
| 157 return new DoubleField(name,i,store); | |
| 158 } | |
| 159 } else if( value instanceof Float ) { | |
| 160 float i = (Float)value; | |
| 161 if( !isIndexed ) { | |
| 162 return new StoredField(name,i); | |
| 163 } else { | |
| 164 return new FloatField(name,i,store); | |
| 165 } | |
| 166 } else if( value instanceof byte[] ) { | |
| 167 if( isIndexed ) | |
| 168 throw new RuntimeException("can't index byte field "+name); | |
| 169 byte[] b = (byte[])value; | |
| 170 return new StoredField(name, b); | |
| 171 } else | |
| 172 throw new RuntimeException("invalid value type "+value.getClass()+"' for field '"+name+"'"); | |
| 173 } | |
| 174 | |
| 175 private synchronized boolean isIndexed(String fieldName) { | |
| 176 Boolean b = indexedMap.get(fieldName); | |
| 177 if( b==null ) { | |
| 178 b = goodConfig.isIndexed(fieldName); | |
| 179 indexedMap.put(fieldName,b); | |
| 180 Analyzer analyzer = goodConfig.getAnalyzer(fieldName); | |
| 181 if( analyzer!=null ) | |
| 182 fieldAnalyzer.add(fieldName,analyzer); | |
| 183 } | |
| 184 return b; | |
| 185 } | |
| 186 | |
| 187 | |
| 188 public void reindexDocuments(final String keyFieldName,Query query) throws IOException { | |
| 1465 | 189 IndexReader reader = openReader(); |
| 1460 | 190 final IndexSearcher searcher = new IndexSearcher(reader); |
| 191 searcher.search( query, new GoodCollector(){ | |
| 192 public void collectDoc(int iDoc) throws IOException { | |
| 193 Document doc = searcher.doc(iDoc); | |
| 194 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
| 195 updateDocument(keyFieldName,storedFields); | |
| 196 } | |
| 197 }); | |
| 198 reader.close(); | |
| 199 } | |
| 1465 | 200 |
| 201 public IndexReader openReader() throws IOException { | |
| 202 return DirectoryReader.open(luceneWriter.getDirectory()); | |
| 203 } | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
204 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
205 public void check() throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
206 CheckIndex.Status status = new CheckIndex(luceneWriter.getDirectory()).checkIndex(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
207 if( !status.clean ) |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
208 logger.error("index not clean"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
209 } |
| 1460 | 210 } |
