Mercurial Hosting > luan
annotate src/goodjava/lucene/api/LuceneIndexWriter.java @ 1525:f848d40b3b07
lucene.api add boosts
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sat, 18 Jul 2020 20:41:47 -0600 |
parents | 7d145095cc0b |
children | 3bd4d7963456 |
rev | line source |
---|---|
1460 | 1 package goodjava.lucene.api; |
2 | |
3 import java.io.IOException; | |
4 import java.util.Map; | |
5 import java.util.HashMap; | |
6 import java.util.List; | |
7 import org.apache.lucene.analysis.Analyzer; | |
8 import org.apache.lucene.document.Document; | |
9 import org.apache.lucene.document.Field; | |
10 import org.apache.lucene.document.StoredField; | |
11 import org.apache.lucene.document.StringField; | |
12 import org.apache.lucene.document.TextField; | |
13 import org.apache.lucene.document.IntField; | |
14 import org.apache.lucene.document.LongField; | |
15 import org.apache.lucene.document.DoubleField; | |
16 import org.apache.lucene.document.FloatField; | |
17 import org.apache.lucene.index.IndexWriter; | |
18 import org.apache.lucene.index.IndexWriterConfig; | |
19 import org.apache.lucene.index.Term; | |
20 import org.apache.lucene.index.DirectoryReader; | |
21 import org.apache.lucene.index.IndexReader; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
22 import org.apache.lucene.index.CheckIndex; |
1460 | 23 import org.apache.lucene.search.Query; |
24 import org.apache.lucene.search.IndexSearcher; | |
25 import org.apache.lucene.store.Directory; | |
26 import org.apache.lucene.util.Version; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
27 import goodjava.logging.Logger; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
28 import goodjava.logging.LoggerFactory; |
1460 | 29 |
30 | |
31 public final class LuceneIndexWriter implements GoodIndexWriter { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
32 private static final Logger logger = LoggerFactory.getLogger(LuceneIndexWriter.class); |
1465 | 33 private final FieldAnalyzer fieldAnalyzer = new FieldAnalyzer(); |
34 public final Version luceneVersion; | |
1460 | 35 public final IndexWriterConfig luceneConfig; |
1465 | 36 public final IndexWriter luceneWriter; |
1460 | 37 public final GoodIndexWriterConfig goodConfig; |
38 private final Map<String,Boolean> indexedMap = new HashMap<String,Boolean>(); | |
39 | |
1465 | 40 public LuceneIndexWriter(Version luceneVersion,Directory dir,GoodIndexWriterConfig goodConfig) throws IOException { |
41 this.luceneVersion = luceneVersion; | |
42 this.luceneConfig = new IndexWriterConfig(luceneVersion,fieldAnalyzer); | |
43 this.luceneWriter = new IndexWriter(dir,luceneConfig); | |
1460 | 44 this.goodConfig = goodConfig; |
1465 | 45 luceneWriter.commit(); // commit index creation |
1460 | 46 } |
47 | |
48 public void close() throws IOException { | |
49 luceneWriter.close(); | |
50 } | |
51 | |
52 public void commit() throws IOException { | |
53 luceneWriter.commit(); | |
54 } | |
55 | |
56 public void rollback() throws IOException { | |
57 luceneWriter.rollback(); | |
58 } | |
59 | |
60 public void deleteAll() throws IOException { | |
61 luceneWriter.deleteAll(); | |
62 } | |
63 | |
64 public void deleteDocuments(Query query) throws IOException { | |
65 luceneWriter.deleteDocuments(query); | |
66 } | |
67 | |
68 public void addDocument(Map<String,Object> storedFields) throws IOException { | |
69 Document doc = newDocument(storedFields); | |
70 luceneWriter.addDocument(doc); | |
71 } | |
72 | |
73 public void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
74 if( !isIndexed(keyFieldName) ) | |
75 throw new RuntimeException("can't update using unindexed field "+keyFieldName); | |
76 if( fieldAnalyzer.isAdded(keyFieldName) ) | |
77 throw new RuntimeException("can't update using analyzeed field "+keyFieldName); | |
78 Document doc = newDocument(storedFields); | |
79 Object keyValue = storedFields.get(keyFieldName); | |
80 if( keyValue==null ) | |
81 throw new RuntimeException("no value for field "+keyFieldName); | |
82 Term term = LuceneUtils.term(keyFieldName,keyValue); | |
83 luceneWriter.updateDocument(term,doc); | |
84 } | |
85 | |
86 private Document newDocument(Map<String,Object> storedFields) { | |
87 Document doc = new Document(); | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
88 MoreFieldInfo more = goodConfig.getMoreFieldInfo(storedFields); |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
89 addFields(doc,storedFields,Field.Store.YES,more.boosts); |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
90 addFields(doc,more.unstoredFields,Field.Store.NO,more.boosts); |
1460 | 91 return doc; |
92 } | |
93 | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
94 private void addFields( Document doc, Map<String,Object> fields, Field.Store store, Map<String,Float> boosts ) { |
1460 | 95 for( Map.Entry<String,Object> entry : fields.entrySet() ) { |
96 String name = entry.getKey(); | |
97 Object value = entry.getValue(); | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
98 Float boost = boosts.get(name); |
1460 | 99 if( value instanceof List ) { |
100 for( Object v : (List)value ) { | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
101 doc.add( newField(name,v,store,boost) ); |
1460 | 102 } |
103 } else { | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
104 doc.add( newField(name,value,store,boost) ); |
1460 | 105 } |
106 } | |
107 } | |
108 | |
1525
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
109 private Field newField( String name, Object value, Field.Store store, Float boost ) { |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
110 Field field = newField(name,value,store); |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
111 if( boost != null ) |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
112 field.setBoost(boost); |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
113 return field; |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
114 } |
f848d40b3b07
lucene.api add boosts
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
115 |
1460 | 116 private Field newField( String name, Object value, Field.Store store ) { |
117 boolean isIndexed = isIndexed(name); | |
118 if( store==Field.Store.NO && !isIndexed ) | |
119 throw new RuntimeException("field '"+name+"' is unstored and unindexed"); | |
120 if( value instanceof String ) { | |
121 String s = (String)value; | |
122 if( !isIndexed ) { | |
123 return new StoredField(name,s); | |
124 } else if( !fieldAnalyzer.isAdded(name) ) { | |
125 return new StringField(name,s,store); | |
126 } else { | |
127 return new TextField(name,s,store); | |
128 } | |
129 } else if( value instanceof Integer ) { | |
130 int i = (Integer)value; | |
131 if( !isIndexed ) { | |
132 return new StoredField(name,i); | |
133 } else { | |
134 return new IntField(name,i,store); | |
135 } | |
136 } else if( value instanceof Long ) { | |
137 long i = (Long)value; | |
138 if( !isIndexed ) { | |
139 return new StoredField(name,i); | |
140 } else { | |
141 return new LongField(name,i,store); | |
142 } | |
143 } else if( value instanceof Double ) { | |
144 double i = (Double)value; | |
145 if( !isIndexed ) { | |
146 return new StoredField(name,i); | |
147 } else { | |
148 return new DoubleField(name,i,store); | |
149 } | |
150 } else if( value instanceof Float ) { | |
151 float i = (Float)value; | |
152 if( !isIndexed ) { | |
153 return new StoredField(name,i); | |
154 } else { | |
155 return new FloatField(name,i,store); | |
156 } | |
157 } else if( value instanceof byte[] ) { | |
158 if( isIndexed ) | |
159 throw new RuntimeException("can't index byte field "+name); | |
160 byte[] b = (byte[])value; | |
161 return new StoredField(name, b); | |
162 } else | |
163 throw new RuntimeException("invalid value type "+value.getClass()+"' for field '"+name+"'"); | |
164 } | |
165 | |
166 private synchronized boolean isIndexed(String fieldName) { | |
167 Boolean b = indexedMap.get(fieldName); | |
168 if( b==null ) { | |
169 b = goodConfig.isIndexed(fieldName); | |
170 indexedMap.put(fieldName,b); | |
171 Analyzer analyzer = goodConfig.getAnalyzer(fieldName); | |
172 if( analyzer!=null ) | |
173 fieldAnalyzer.add(fieldName,analyzer); | |
174 } | |
175 return b; | |
176 } | |
177 | |
178 | |
179 public void reindexDocuments(final String keyFieldName,Query query) throws IOException { | |
1465 | 180 IndexReader reader = openReader(); |
1460 | 181 final IndexSearcher searcher = new IndexSearcher(reader); |
182 searcher.search( query, new GoodCollector(){ | |
183 public void collectDoc(int iDoc) throws IOException { | |
184 Document doc = searcher.doc(iDoc); | |
185 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
186 updateDocument(keyFieldName,storedFields); | |
187 } | |
188 }); | |
189 reader.close(); | |
190 } | |
1465 | 191 |
192 public IndexReader openReader() throws IOException { | |
193 return DirectoryReader.open(luceneWriter.getDirectory()); | |
194 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
195 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
196 public void check() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
197 CheckIndex.Status status = new CheckIndex(luceneWriter.getDirectory()).checkIndex(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
198 if( !status.clean ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
199 logger.error("index not clean"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1465
diff
changeset
|
200 } |
1460 | 201 } |