Mercurial Hosting > luan
annotate src/goodjava/lucene/api/LuceneIndexWriter.java @ 1722:7d2ab44f7a59
remove String regex fns
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 29 Jul 2022 14:12:01 -0600 |
parents | f48db13ae2d9 |
children |
rev | line source |
---|---|
1460 | 1 package goodjava.lucene.api; |
2 | |
3 import java.io.IOException; | |
4 import java.util.Map; | |
5 import java.util.HashMap; | |
6 import java.util.List; | |
7 import org.apache.lucene.analysis.Analyzer; | |
8 import org.apache.lucene.document.Document; | |
9 import org.apache.lucene.document.Field; | |
10 import org.apache.lucene.document.StoredField; | |
11 import org.apache.lucene.document.StringField; | |
12 import org.apache.lucene.document.TextField; | |
13 import org.apache.lucene.document.IntField; | |
14 import org.apache.lucene.document.LongField; | |
15 import org.apache.lucene.document.DoubleField; | |
16 import org.apache.lucene.document.FloatField; | |
17 import org.apache.lucene.index.IndexWriter; | |
18 import org.apache.lucene.index.IndexWriterConfig; | |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
19 import org.apache.lucene.index.LiveIndexWriterConfig; |
1460 | 20 import org.apache.lucene.index.Term; |
21 import org.apache.lucene.index.DirectoryReader; | |
22 import org.apache.lucene.index.IndexReader; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
23 import org.apache.lucene.index.CheckIndex; |
1460 | 24 import org.apache.lucene.search.Query; |
25 import org.apache.lucene.search.IndexSearcher; | |
26 import org.apache.lucene.store.Directory; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
27 import goodjava.logging.Logger; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
28 import goodjava.logging.LoggerFactory; |
1460 | 29 |
30 | |
31 public final class LuceneIndexWriter implements GoodIndexWriter { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
32 private static final Logger logger = LoggerFactory.getLogger(LuceneIndexWriter.class); |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
33 private final FieldAnalyzer fieldAnalyzer; |
1465 | 34 public final IndexWriter luceneWriter; |
1460 | 35 public final GoodIndexWriterConfig goodConfig; |
36 private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>(); | |
37 | |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
38 public LuceneIndexWriter(Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
39 IndexWriterConfig luceneConfig = goodConfig.newLuceneConfig(); |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
40 Analyzer analyzer = luceneConfig.getAnalyzer(); |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
41 if( !(analyzer instanceof FieldAnalyzer) ) |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
42 throw new RuntimeException("analyzer must be FieldAnalyzer"); |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
43 this.fieldAnalyzer = (FieldAnalyzer)analyzer; |
1465 | 44 this.luceneWriter = new IndexWriter(dir,luceneConfig); |
1460 | 45 this.goodConfig = goodConfig; |
1465 | 46 luceneWriter.commit(); // commit index creation |
1460 | 47 } |
48 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
49 @Override public IndexWriter getLuceneIndexWriter() { |
1539 | 50 return luceneWriter; |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
51 } |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
52 |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
53 @Override public void close() throws IOException { |
1460 | 54 luceneWriter.close(); |
55 } | |
56 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
57 @Override public void commit() throws IOException { |
1460 | 58 luceneWriter.commit(); |
59 } | |
60 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
61 @Override public void rollback() throws IOException { |
1460 | 62 luceneWriter.rollback(); |
63 } | |
64 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
65 @Override public void deleteAll() throws IOException { |
1460 | 66 luceneWriter.deleteAll(); |
67 } | |
68 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
69 @Override public void deleteDocuments(Query query) throws IOException { |
1460 | 70 luceneWriter.deleteDocuments(query); |
71 } | |
72 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
73 @Override public void addDocument(Map<String,Object> storedFields) throws IOException { |
1460 | 74 Document doc = newDocument(storedFields); |
75 luceneWriter.addDocument(doc); | |
76 } | |
77 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
78 @Override public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { |
1460 | 79 if( !isIndexed(keyFieldName) ) |
80 throw new RuntimeException("can't update using unindexed field "+keyFieldName); | |
81 if( fieldAnalyzer.isAdded(keyFieldName) ) | |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1525
diff
changeset
|
82 throw new RuntimeException("can't update using analyzed field "+keyFieldName); |
1460 | 83 Document doc = newDocument(storedFields); |
84 Object keyValue = storedFields.get(keyFieldName); | |
85 if( keyValue==null ) | |
86 throw new RuntimeException("no value for field "+keyFieldName); | |
87 Term term = LuceneUtils.term(keyFieldName,keyValue); | |
88 luceneWriter.updateDocument(term,doc); | |
89 } | |
90 | |
91 private Document newDocument(Map<String,Object> storedFields) { | |
92 Document doc = new Document(); | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
93 MoreFieldInfo more = goodConfig.getMoreFieldInfo(storedFields); |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
94 addFields( doc, storedFields, Field.Store.YES, more.boosts ); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
95 addFields( doc, more.unstoredFields, Field.Store.NO, more.boosts ); |
1460 | 96 return doc; |
97 } | |
98 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
99 public void addDocument( Map<String,Object> storedFields, Map<String,Object> unstoredFields, Map<String,Float> boosts ) |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
100 throws IOException |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
101 { |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
102 Document doc = new Document(); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
103 addFields( doc, storedFields, Field.Store.YES, boosts ); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
104 addFields( doc, unstoredFields, Field.Store.NO, boosts ); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
105 luceneWriter.addDocument(doc); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
106 } |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
107 |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
108 private void addFields( Document doc, Map<String,Object> fields, Field.Store store, Map<String,Float> boosts ) { |
1460 | 109 for( Map.Entry<String,Object> entry : fields.entrySet() ) { |
110 String name = entry.getKey(); | |
111 Object value = entry.getValue(); | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
112 Float boost = boosts.get(name); |
1460 | 113 if( value instanceof List ) { |
114 for( Object v : (List)value ) { | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
115 doc.add( newField(name,v,store,boost) ); |
1460 | 116 } |
117 } else { | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
118 doc.add( newField(name,value,store,boost) ); |
1460 | 119 } |
120 } | |
121 } | |
122 | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
123 private Field newField( String name, Object value, Field.Store store, Float boost ) { |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
124 Field field = newField2(name,value,store,boost); |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
125 if( boost != null ) |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
126 field.setBoost(boost); |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
127 return field; |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
128 } |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
129 |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
130 private Field newField2( String name, Object value, Field.Store store, Float boost ) { |
1460 | 131 boolean isIndexed = isIndexed(name); |
132 if( store==Field.Store.NO && !isIndexed ) | |
133 throw new RuntimeException("field '"+name+"' is unstored and unindexed"); | |
134 if( value instanceof String ) { | |
135 String s = (String)value; | |
136 if( !isIndexed ) { | |
137 return new StoredField(name,s); | |
138 } else if( !fieldAnalyzer.isAdded(name) ) { | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
139 if( boost == null ) { |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
140 return new StringField(name,s,store); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
141 } else { |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
142 return new Field( name, s, Field.Store.NO, Field.Index.NOT_ANALYZED); |
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
143 } |
1460 | 144 } else { |
145 return new TextField(name,s,store); | |
146 } | |
147 } else if( value instanceof Integer ) { | |
148 int i = (Integer)value; | |
149 if( !isIndexed ) { | |
150 return new StoredField(name,i); | |
151 } else { | |
152 return new IntField(name,i,store); | |
153 } | |
154 } else if( value instanceof Long ) { | |
155 long i = (Long)value; | |
156 if( !isIndexed ) { | |
157 return new StoredField(name,i); | |
158 } else { | |
159 return new LongField(name,i,store); | |
160 } | |
161 } else if( value instanceof Double ) { | |
162 double i = (Double)value; | |
163 if( !isIndexed ) { | |
164 return new StoredField(name,i); | |
165 } else { | |
166 return new DoubleField(name,i,store); | |
167 } | |
168 } else if( value instanceof Float ) { | |
169 float i = (Float)value; | |
170 if( !isIndexed ) { | |
171 return new StoredField(name,i); | |
172 } else { | |
173 return new FloatField(name,i,store); | |
174 } | |
175 } else if( value instanceof byte[] ) { | |
176 if( isIndexed ) | |
177 throw new RuntimeException("can't index byte field "+name); | |
178 byte[] b = (byte[])value; | |
179 return new StoredField(name, b); | |
180 } else | |
181 throw new RuntimeException("invalid value type "+value.getClass()+"' for field '"+name+"'"); | |
182 } | |
183 | |
184 private synchronized boolean isIndexed(String fieldName) { | |
185 Boolean b = indexedMap.get(fieldName); | |
186 if( b==null ) { | |
187 b = goodConfig.isIndexed(fieldName); | |
188 indexedMap.put(fieldName,b); | |
189 Analyzer analyzer = goodConfig.getAnalyzer(fieldName); | |
190 if( analyzer!=null ) | |
191 fieldAnalyzer.add(fieldName,analyzer); | |
192 } | |
193 return b; | |
194 } | |
195 | |
196 | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
197 @Override public void reindexDocuments(final String keyFieldName,Query query) throws IOException { |
1465 | 198 IndexReader reader = openReader(); |
1460 | 199 final IndexSearcher searcher = new IndexSearcher(reader); |
200 searcher.search( query, new GoodCollector(){ | |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
201 @Override public void collectDoc(int iDoc) throws IOException { |
1460 | 202 Document doc = searcher.doc(iDoc); |
203 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
204 updateDocument(keyFieldName,storedFields); | |
205 } | |
206 }); | |
207 reader.close(); | |
208 } | |
1465 | 209 |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
210 @Override public IndexReader openReader() throws IOException { |
1465 | 211 return DirectoryReader.open(luceneWriter.getDirectory()); |
212 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
213 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
214 public void check() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
215 CheckIndex.Status status = new CheckIndex(luceneWriter.getDirectory()).checkIndex(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
216 if( !status.clean ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
217 logger.error("index not clean"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
218 } |
1544
35601f15ecc3
add lucene log tag and restore_from_log
Franklin Schmidt <fschmidt@gmail.com>
parents:
1539
diff
changeset
|
219 |
1687
f48db13ae2d9
unlogged lucene support
Franklin Schmidt <fschmidt@gmail.com>
parents:
1544
diff
changeset
|
220 @Override public void tag(String tag) throws IOException {} |
1544
35601f15ecc3
add lucene log tag and restore_from_log
Franklin Schmidt <fschmidt@gmail.com>
parents:
1539
diff
changeset
|
221 |
1460 | 222 } |