Mercurial Hosting > luan
annotate src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1535:e73b72a510b4
Lucene reindex
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 02 Aug 2020 21:30:47 -0600 |
parents | 3bd4d7963456 |
children | 634f6765830e |
rev | line source |
---|---|
1461 | 1 package goodjava.lucene.logging; |
2 | |
1465 | 3 import java.io.File; |
4 import java.io.RandomAccessFile; | |
5 import java.io.ByteArrayOutputStream; | |
6 import java.io.DataOutputStream; | |
7 import java.io.DataInputStream; | |
8 import java.io.FileInputStream; | |
1461 | 9 import java.io.IOException; |
10 import java.util.Map; | |
1465 | 11 import java.util.Set; |
12 import java.util.HashSet; | |
13 import java.util.List; | |
14 import java.util.ArrayList; | |
15 import java.util.Random; | |
16 import org.apache.lucene.document.Document; | |
17 import org.apache.lucene.index.DirectoryReader; | |
18 import org.apache.lucene.index.IndexReader; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
19 import org.apache.lucene.index.Term; |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
20 import org.apache.lucene.index.LiveIndexWriterConfig; |
1465 | 21 import org.apache.lucene.search.IndexSearcher; |
1461 | 22 import org.apache.lucene.search.Query; |
1465 | 23 import org.apache.lucene.search.MatchAllDocsQuery; |
24 import org.apache.lucene.search.TopDocs; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
25 import org.apache.lucene.search.PrefixQuery; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
26 import org.apache.lucene.search.SortField; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
27 import org.apache.lucene.search.Sort; |
1465 | 28 import org.apache.lucene.store.Directory; |
29 import org.apache.lucene.store.FSDirectory; | |
1473 | 30 import goodjava.io.IoUtils; |
1461 | 31 import goodjava.lucene.api.GoodIndexWriter; |
1465 | 32 import goodjava.lucene.api.LuceneIndexWriter; |
33 import goodjava.lucene.api.GoodCollector; | |
34 import goodjava.lucene.api.LuceneUtils; | |
35 import goodjava.logging.Logger; | |
36 import goodjava.logging.LoggerFactory; | |
1461 | 37 |
38 | |
1488 | 39 public class LoggingIndexWriter implements GoodIndexWriter { |
1465 | 40 private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class); |
41 private static final int version = 1; | |
1461 | 42 private static final int OP_DELETE_ALL = 1; |
43 private static final int OP_DELETE_DOCUMENTS = 2; | |
44 private static final int OP_ADD_DOCUMENT = 3; | |
45 private static final int OP_UPDATE_DOCUMENT = 4; | |
1465 | 46 private static final Random rnd = new Random(); |
1461 | 47 |
1465 | 48 public final LuceneIndexWriter indexWriter; |
49 private final File logDir; | |
1488 | 50 protected final List<LogFile> logs = new ArrayList<LogFile>(); |
1486 | 51 private LogOutputStream log; |
1465 | 52 private final File index; |
53 private boolean isMerging = false; | |
1461 | 54 |
1465 | 55 public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException { |
1461 | 56 this.indexWriter = indexWriter; |
1465 | 57 this.logDir = logDir; |
1501 | 58 IoUtils.mkdirs(logDir); |
1465 | 59 if( !logDir.isDirectory() ) |
60 throw new RuntimeException(); | |
61 index = new File(logDir,"index"); | |
62 if( index.exists() ) { | |
63 DataInputStream dis = new DataInputStream(new FileInputStream(index)); | |
64 try { | |
65 if( dis.readInt() == version ) { | |
66 final int n = dis.readInt(); | |
67 for( int i=0; i<n; i++ ) { | |
68 File file = new File( logDir, dis.readUTF() ); | |
1486 | 69 logs.add( new LogFile(file) ); |
1465 | 70 } |
71 deleteUnusedFiles(); | |
1486 | 72 setLog(); |
1465 | 73 return; |
74 } | |
75 } finally { | |
76 dis.close(); | |
77 } | |
78 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
79 newLogs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
80 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
81 |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
82 public Directory getDirectory() { |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
83 return indexWriter.getDirectory(); |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
84 } |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
85 |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
86 public LiveIndexWriterConfig getLuceneConfig() { |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
87 return indexWriter.getLuceneConfig(); |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
88 } |
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
89 |
1486 | 90 private void setLog() throws IOException { |
91 if( log != null ) | |
92 log.close(); | |
93 log = logs.get(logs.size()-1).output(); | |
94 } | |
95 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
96 public synchronized boolean isMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
97 return isMerging; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
98 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
99 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
100 private synchronized void isNotMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
101 isMerging = false; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
102 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
103 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
104 public synchronized void newLogs() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
105 if( isMerging ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
106 throw new RuntimeException("merging"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
107 logger.info("building new logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
108 logs.clear(); |
1465 | 109 for( int i=0; i<2; i++ ) { |
110 logs.add( newLogFile() ); | |
111 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
112 logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
113 writeIndex(); |
1486 | 114 setLog(); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
115 logger.info("done building new logs"); |
1461 | 116 } |
117 | |
1486 | 118 private static void logLucene(long time,LogFile logLucene,LuceneIndexWriter indexWriter) throws IOException { |
119 LogOutputStream log = logLucene.output(); | |
1465 | 120 IndexReader reader = indexWriter.openReader(); |
121 final IndexSearcher searcher = new IndexSearcher(reader); | |
122 Query query = new MatchAllDocsQuery(); | |
123 searcher.search( query, new GoodCollector(){ | |
124 public void collectDoc(int iDoc) throws IOException { | |
125 Document doc = searcher.doc(iDoc); | |
126 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
127 log.writeLong(time); | |
128 log.writeByte(OP_ADD_DOCUMENT); | |
129 log.writeMap(storedFields); | |
130 } | |
131 }); | |
132 reader.close(); | |
133 log.commit(); | |
1486 | 134 log.close(); |
1465 | 135 } |
136 | |
137 private LogFile newLogFile() throws IOException { | |
138 File file; | |
139 do { | |
140 file = new File(logDir,"_"+rnd.nextInt(100)+".log"); | |
141 } while( file.exists() ); | |
1486 | 142 return new LogFile(file); |
1461 | 143 } |
144 | |
1473 | 145 private void deleteUnusedFiles() throws IOException { |
1499 | 146 deleteUnusedFiles(logs,index); |
147 } | |
148 | |
149 private static void deleteUnusedFiles(List<LogFile> logs,File index) throws IOException { | |
1465 | 150 Set<String> used = new HashSet<String>(); |
151 used.add( index.getName() ); | |
152 for( LogFile lf : logs ) { | |
153 used.add( lf.file.getName() ); | |
154 } | |
1499 | 155 for( File f : index.getParentFile().listFiles() ) { |
1465 | 156 if( !used.contains(f.getName()) ) { |
1475 | 157 IoUtils.deleteRecursively(f); |
1465 | 158 } |
159 } | |
1461 | 160 } |
161 | |
1465 | 162 private void writeIndex() throws IOException { |
1499 | 163 writeIndex(logs,index); |
164 } | |
165 | |
166 public static void writeIndex(List<LogFile> logs,File index) throws IOException { | |
1465 | 167 ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
168 DataOutputStream dos = new DataOutputStream(baos); | |
169 dos.writeInt(version); | |
170 dos.writeInt(logs.size()); | |
171 for( LogFile lf : logs ) { | |
172 String fileName = lf.file.getName(); | |
173 dos.writeUTF(fileName); | |
174 } | |
175 dos.close(); | |
176 RandomAccessFile raf = new RandomAccessFile( index, "rwd" ); | |
177 raf.write( baos.toByteArray() ); | |
178 raf.close(); | |
1499 | 179 deleteUnusedFiles(logs,index); |
180 //logger.info("writeIndex "+logs.toString()); | |
1461 | 181 } |
182 | |
1465 | 183 private void mergeLogs() throws IOException { |
1512 | 184 //logger.info("merge"); |
1465 | 185 LogFile first = logs.get(0); |
186 LogFile second = logs.get(1); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
187 long lastTime = second.file.lastModified(); |
1465 | 188 File dirFile = new File(logDir,"merge"); |
189 if( dirFile.exists() ) | |
190 throw new RuntimeException(); | |
191 Directory dir = FSDirectory.open(dirFile); | |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
192 LuceneIndexWriter mergeWriter = new LuceneIndexWriter( dir, indexWriter.goodConfig ); |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
193 playLog( first.input(), mergeWriter ); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
194 playLog( second.input(), mergeWriter ); |
1465 | 195 mergeWriter.commit(); |
196 LogFile merge = newLogFile(); | |
197 logLucene( lastTime, merge, mergeWriter ); | |
198 mergeWriter.close(); | |
199 synchronized(this) { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
200 //check(); |
1465 | 201 logs.remove(0); |
202 logs.set(0,merge); | |
203 writeIndex(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
204 //check(null); |
1465 | 205 } |
1461 | 206 } |
1465 | 207 private final Runnable mergeLogs = new Runnable() { public void run() { |
208 try { | |
209 mergeLogs(); | |
210 } catch(IOException e) { | |
211 throw new RuntimeException(e); | |
212 } finally { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
213 isNotMerging(); |
1465 | 214 } |
215 } }; | |
1461 | 216 |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
217 private static class DocIter { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
218 final IndexReader reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
219 final TopDocs td; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
220 final int n; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
221 int i = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
222 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
223 DocIter(IndexReader reader,Query query,Sort sort) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
224 this.reader = reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
225 IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
226 this.td = searcher.search(query,10000000,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
227 this.n = td.scoreDocs.length; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
228 if( td.totalHits != n ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
229 throw new RuntimeException(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
230 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
231 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
232 Document next() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
233 return i < n ? reader.document(td.scoreDocs[i++].doc) : null; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
234 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
235 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
236 |
1487 | 237 private volatile boolean isChecking = false; |
238 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
239 public void check(SortField sortField) throws IOException { |
1487 | 240 if( isChecking ) |
241 throw new RuntimeException("another check is running"); | |
1508
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
242 isChecking = true; |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
243 try { |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
244 doCheck(sortField); |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
245 } finally { |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
246 isChecking = false; |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
247 } |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
248 } |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
249 |
86c5e7000ecf
lucene.backup checksum
Franklin Schmidt <fschmidt@gmail.com>
parents:
1504
diff
changeset
|
250 protected void doCheck(SortField sortField) throws IOException { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
251 IndexReader indexReader; |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
252 List<LogInputStream> logReaders; |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
253 synchronized(this) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
254 indexReader = indexWriter.openReader(); |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
255 logReaders = logReaders(logs); |
1465 | 256 } |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
257 try { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
258 logger.info("check start"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
259 indexWriter.check(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
260 File dirFile = new File(logDir,"check"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
261 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
262 Directory dir = FSDirectory.open(dirFile); |
1528
3bd4d7963456
use goodjava/lucene/api
Franklin Schmidt <fschmidt@gmail.com>
parents:
1512
diff
changeset
|
263 LuceneIndexWriter checkWriter = new LuceneIndexWriter( dir, indexWriter.goodConfig ); |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
264 playLogs(logReaders,checkWriter); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
265 logger.info("check lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
266 IndexReader checkReader = checkWriter.openReader(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
267 if( sortField == null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
268 int nCheck = checkReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
269 int nOrig = indexReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
270 if( nCheck != nOrig ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
271 logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
272 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
273 logger.info("numDocs="+nOrig); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
274 if( hash(indexReader) != hash(checkReader) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
275 logger.error("hash mismatch"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
276 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
277 } else { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
278 Sort sort = new Sort(sortField); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
279 String sortFieldName = sortField.getField(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
280 Query query = new PrefixQuery(new Term(sortFieldName)); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
281 DocIter origIter = new DocIter(indexReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
282 DocIter checkIter = new DocIter(checkReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
283 Map<String,Object> origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
284 Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
285 while( origFields!=null && checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
286 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
287 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
288 int cmp = origFld.compareTo(checkFld); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
289 if( cmp==0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
290 if( !origFields.equals(checkFields) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
291 logger.error(sortFieldName+" "+origFld+" not equal"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
292 logger.error("lucene = "+origFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
293 logger.error("logs = "+checkFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
294 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
295 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
296 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
297 } else if( cmp < 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
298 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
299 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
300 } else { // > |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
301 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
302 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
303 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
304 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
305 while( origFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
306 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
307 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
308 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
309 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
310 while( checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
311 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
312 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
313 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
314 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
315 //logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
316 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
317 checkReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
318 checkWriter.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
319 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
320 logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
321 } finally { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
322 indexReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
323 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
324 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
325 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
326 private static abstract class HashCollector extends GoodCollector { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
327 int total = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
328 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
329 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
330 private static int hash(IndexReader reader) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
331 final IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
332 Query query = new MatchAllDocsQuery(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
333 HashCollector col = new HashCollector() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
334 public void collectDoc(int iDoc) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
335 Document doc = searcher.doc(iDoc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
336 Map<String,Object> storedFields = LuceneUtils.toMap(doc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
337 total += storedFields.hashCode(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
338 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
339 }; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
340 searcher.search(query,col); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
341 return col.total; |
1461 | 342 } |
343 | |
1465 | 344 public synchronized void close() throws IOException { |
345 indexWriter.close(); | |
346 log.commit(); | |
1486 | 347 log.close(); |
1465 | 348 } |
349 | |
350 public synchronized void commit() throws IOException { | |
351 indexWriter.commit(); | |
352 log.commit(); | |
353 if( isMerging ) | |
354 return; | |
1486 | 355 if( log.logFile.end() > logs.get(0).end() ) { |
1465 | 356 logs.add( newLogFile() ); |
357 writeIndex(); | |
1486 | 358 setLog(); |
1465 | 359 } |
360 if( logs.size() > 3 ) { | |
361 isMerging = true; | |
1504 | 362 new Thread(mergeLogs).start(); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
363 // mergeLogs.run(); |
1465 | 364 } |
1461 | 365 } |
366 | |
1465 | 367 public synchronized void rollback() throws IOException { |
368 indexWriter.rollback(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
369 log.rollback(); |
1465 | 370 } |
371 | |
372 public synchronized void deleteAll() throws IOException { | |
373 indexWriter.deleteAll(); | |
1486 | 374 writeOp(OP_DELETE_ALL); |
1461 | 375 } |
376 | |
1465 | 377 public synchronized void deleteDocuments(Query query) throws IOException { |
378 indexWriter.deleteDocuments(query); | |
1486 | 379 writeOp(OP_DELETE_DOCUMENTS); |
1465 | 380 log.writeQuery(query); |
381 } | |
382 | |
383 public synchronized void addDocument(Map<String,Object> storedFields) throws IOException { | |
384 indexWriter.addDocument(storedFields); | |
1486 | 385 writeOp(OP_ADD_DOCUMENT); |
1465 | 386 log.writeMap(storedFields); |
387 } | |
388 | |
389 public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
390 indexWriter.updateDocument(keyFieldName,storedFields); | |
1486 | 391 writeOp(OP_UPDATE_DOCUMENT); |
1465 | 392 log.writeUTF(keyFieldName); |
393 log.writeMap(storedFields); | |
394 } | |
395 | |
396 public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException { | |
1461 | 397 indexWriter.reindexDocuments(keyFieldName,query); |
398 } | |
399 | |
1486 | 400 private void writeOp(int op) throws IOException { |
1465 | 401 log.writeLong(System.currentTimeMillis()); |
402 log.writeByte(op); | |
403 } | |
404 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
405 public synchronized void playLogs() throws IOException { |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
406 playLogs( logReaders(logs), indexWriter ); |
1465 | 407 } |
408 | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
409 private static List<LogInputStream> logReaders(List<LogFile> logs) throws IOException { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
410 List<LogInputStream> logReaders = new ArrayList<LogInputStream>(); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
411 for( LogFile log : logs ) { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
412 logReaders.add( log.input() ); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
413 } |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
414 return logReaders; |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
415 } |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
416 |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
417 private static void playLogs(List<LogInputStream> logReaders,LuceneIndexWriter indexWriter) throws IOException { |
1465 | 418 if( numDocs(indexWriter) != 0 ) |
419 throw new RuntimeException ("not empty"); | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
420 for( LogInputStream reader : logReaders ) { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
421 playLog(reader,indexWriter); |
1465 | 422 } |
423 indexWriter.commit(); | |
424 } | |
425 | |
426 private static int numDocs(LuceneIndexWriter indexWriter) throws IOException { | |
427 IndexReader reader = indexWriter.openReader(); | |
428 int n = reader.numDocs(); | |
429 reader.close(); | |
430 return n; | |
431 } | |
432 | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
433 private static void playLog(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
434 while( in.available() > 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
435 playOp(in,indexWriter); |
1465 | 436 } |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
437 in.close(); |
1465 | 438 } |
439 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
440 private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
441 in.readLong(); // time |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
442 int op = in.readByte(); |
1461 | 443 switch(op) { |
444 case OP_DELETE_ALL: | |
445 indexWriter.deleteAll(); | |
446 return; | |
447 case OP_DELETE_DOCUMENTS: | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
448 indexWriter.deleteDocuments( in.readQuery() ); |
1461 | 449 return; |
450 case OP_ADD_DOCUMENT: | |
1465 | 451 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
452 Map storedFields = in.readMap(); |
1465 | 453 indexWriter.addDocument(storedFields); |
454 return; | |
455 } | |
1461 | 456 case OP_UPDATE_DOCUMENT: |
1465 | 457 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
458 String keyFieldName = in.readUTF(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
459 Map storedFields = in.readMap(); |
1465 | 460 indexWriter.updateDocument(keyFieldName,storedFields); |
461 return; | |
462 } | |
1461 | 463 default: |
464 throw new RuntimeException("invalid op "+op); | |
465 } | |
466 } | |
467 | |
1465 | 468 private static void dump(LuceneIndexWriter indexWriter) throws IOException { |
469 IndexReader reader = indexWriter.openReader(); | |
470 IndexSearcher searcher = new IndexSearcher(reader); | |
471 Query query = new MatchAllDocsQuery(); | |
472 TopDocs td = searcher.search(query,100); | |
473 System.out.println("totalHits = "+td.totalHits); | |
474 for( int i=0; i<td.scoreDocs.length; i++ ) { | |
475 Document doc = searcher.doc(td.scoreDocs[i].doc); | |
476 System.out.println(LuceneUtils.toMap(doc)); | |
1461 | 477 } |
1465 | 478 System.out.println(); |
479 reader.close(); | |
1461 | 480 } |
1465 | 481 |
1461 | 482 } |