Mercurial Hosting > luan
annotate src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1480:1f41e5921090
input buffering
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 24 Apr 2020 14:32:20 -0600 |
parents | 7d145095cc0b |
children | 1fa6e8ec2d53 |
rev | line source |
---|---|
1461 | 1 package goodjava.lucene.logging; |
2 | |
1465 | 3 import java.io.File; |
4 import java.io.RandomAccessFile; | |
5 import java.io.ByteArrayOutputStream; | |
6 import java.io.DataOutputStream; | |
7 import java.io.DataInputStream; | |
8 import java.io.FileInputStream; | |
1461 | 9 import java.io.IOException; |
10 import java.util.Map; | |
1465 | 11 import java.util.Set; |
12 import java.util.HashSet; | |
13 import java.util.List; | |
14 import java.util.ArrayList; | |
15 import java.util.Random; | |
16 import org.apache.lucene.document.Document; | |
17 import org.apache.lucene.index.DirectoryReader; | |
18 import org.apache.lucene.index.IndexReader; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
19 import org.apache.lucene.index.Term; |
1465 | 20 import org.apache.lucene.search.IndexSearcher; |
1461 | 21 import org.apache.lucene.search.Query; |
1465 | 22 import org.apache.lucene.search.MatchAllDocsQuery; |
23 import org.apache.lucene.search.TopDocs; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
24 import org.apache.lucene.search.PrefixQuery; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
25 import org.apache.lucene.search.SortField; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
26 import org.apache.lucene.search.Sort; |
1465 | 27 import org.apache.lucene.store.Directory; |
28 import org.apache.lucene.store.FSDirectory; | |
1473 | 29 import goodjava.io.IoUtils; |
1461 | 30 import goodjava.lucene.api.GoodIndexWriter; |
1465 | 31 import goodjava.lucene.api.LuceneIndexWriter; |
32 import goodjava.lucene.api.GoodCollector; | |
33 import goodjava.lucene.api.LuceneUtils; | |
34 import goodjava.logging.Logger; | |
35 import goodjava.logging.LoggerFactory; | |
1461 | 36 |
37 | |
1465 | 38 public final class LoggingIndexWriter implements GoodIndexWriter { |
39 private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class); | |
40 private static final int version = 1; | |
1461 | 41 private static final int OP_DELETE_ALL = 1; |
42 private static final int OP_DELETE_DOCUMENTS = 2; | |
43 private static final int OP_ADD_DOCUMENT = 3; | |
44 private static final int OP_UPDATE_DOCUMENT = 4; | |
1465 | 45 private static final Random rnd = new Random(); |
1461 | 46 |
1465 | 47 public final LuceneIndexWriter indexWriter; |
48 private final File logDir; | |
49 private final List<LogFile> logs = new ArrayList<LogFile>(); | |
50 private final File index; | |
51 private boolean isMerging = false; | |
1461 | 52 |
1465 | 53 public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException { |
1461 | 54 this.indexWriter = indexWriter; |
1465 | 55 this.logDir = logDir; |
56 logDir.mkdirs(); | |
57 if( !logDir.isDirectory() ) | |
58 throw new RuntimeException(); | |
59 index = new File(logDir,"index"); | |
60 if( index.exists() ) { | |
61 DataInputStream dis = new DataInputStream(new FileInputStream(index)); | |
62 try { | |
63 if( dis.readInt() == version ) { | |
64 final int n = dis.readInt(); | |
65 for( int i=0; i<n; i++ ) { | |
66 File file = new File( logDir, dis.readUTF() ); | |
67 logs.add( new LogFile(file,"rwd") ); | |
68 } | |
69 deleteUnusedFiles(); | |
70 return; | |
71 } | |
72 } finally { | |
73 dis.close(); | |
74 } | |
75 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
76 newLogs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
77 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
78 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
79 public synchronized boolean isMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
80 return isMerging; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
81 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
82 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
83 private synchronized void isNotMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
84 isMerging = false; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
85 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
86 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
87 public synchronized void newLogs() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
88 if( isMerging ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
89 throw new RuntimeException("merging"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
90 logger.info("building new logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
91 logs.clear(); |
1465 | 92 for( int i=0; i<2; i++ ) { |
93 logs.add( newLogFile() ); | |
94 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
95 logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
96 writeIndex(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
97 logger.info("done building new logs"); |
1461 | 98 } |
99 | |
1465 | 100 private static void logLucene(long time,LogFile log,LuceneIndexWriter indexWriter) throws IOException { |
101 IndexReader reader = indexWriter.openReader(); | |
102 final IndexSearcher searcher = new IndexSearcher(reader); | |
103 Query query = new MatchAllDocsQuery(); | |
104 searcher.search( query, new GoodCollector(){ | |
105 public void collectDoc(int iDoc) throws IOException { | |
106 Document doc = searcher.doc(iDoc); | |
107 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
108 log.writeLong(time); | |
109 log.writeByte(OP_ADD_DOCUMENT); | |
110 log.writeMap(storedFields); | |
111 } | |
112 }); | |
113 reader.close(); | |
114 log.commit(); | |
115 } | |
116 | |
117 private LogFile newLogFile() throws IOException { | |
118 File file; | |
119 do { | |
120 file = new File(logDir,"_"+rnd.nextInt(100)+".log"); | |
121 } while( file.exists() ); | |
122 return new LogFile(file,"rwd"); | |
1461 | 123 } |
124 | |
1473 | 125 private void deleteUnusedFiles() throws IOException { |
1465 | 126 Set<String> used = new HashSet<String>(); |
127 used.add( index.getName() ); | |
128 for( LogFile lf : logs ) { | |
129 used.add( lf.file.getName() ); | |
130 } | |
131 for( File f : logDir.listFiles() ) { | |
132 if( !used.contains(f.getName()) ) { | |
1475 | 133 IoUtils.deleteRecursively(f); |
1465 | 134 } |
135 } | |
1461 | 136 } |
137 | |
1465 | 138 private void writeIndex() throws IOException { |
139 ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
140 DataOutputStream dos = new DataOutputStream(baos); | |
141 dos.writeInt(version); | |
142 dos.writeInt(logs.size()); | |
143 for( LogFile lf : logs ) { | |
144 String fileName = lf.file.getName(); | |
145 dos.writeUTF(fileName); | |
146 } | |
147 dos.close(); | |
148 RandomAccessFile raf = new RandomAccessFile( index, "rwd" ); | |
149 raf.write( baos.toByteArray() ); | |
150 raf.close(); | |
151 deleteUnusedFiles(); | |
152 logger.info("writeIndex "+logs.toString()); | |
1461 | 153 } |
154 | |
1465 | 155 private void mergeLogs() throws IOException { |
156 logger.info("merge"); | |
157 LogFile first = logs.get(0); | |
158 LogFile second = logs.get(1); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
159 long lastTime = second.file.lastModified(); |
1465 | 160 File dirFile = new File(logDir,"merge"); |
161 if( dirFile.exists() ) | |
162 throw new RuntimeException(); | |
163 Directory dir = FSDirectory.open(dirFile); | |
164 LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); | |
165 playLog(first,mergeWriter); | |
166 playLog(second,mergeWriter); | |
167 mergeWriter.commit(); | |
168 LogFile merge = newLogFile(); | |
169 logLucene( lastTime, merge, mergeWriter ); | |
170 mergeWriter.close(); | |
171 synchronized(this) { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
172 //check(); |
1465 | 173 logs.remove(0); |
174 logs.set(0,merge); | |
175 writeIndex(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
176 //check(null); |
1465 | 177 } |
1461 | 178 } |
1465 | 179 private final Runnable mergeLogs = new Runnable() { public void run() { |
180 try { | |
181 mergeLogs(); | |
182 } catch(IOException e) { | |
183 throw new RuntimeException(e); | |
184 } finally { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
185 isNotMerging(); |
1465 | 186 } |
187 } }; | |
1461 | 188 |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
189 private static class DocIter { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
190 final IndexReader reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
191 final TopDocs td; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
192 final int n; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
193 int i = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
194 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
195 DocIter(IndexReader reader,Query query,Sort sort) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
196 this.reader = reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
197 IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
198 this.td = searcher.search(query,10000000,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
199 this.n = td.scoreDocs.length; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
200 if( td.totalHits != n ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
201 throw new RuntimeException(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
202 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
203 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
204 Document next() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
205 return i < n ? reader.document(td.scoreDocs[i++].doc) : null; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
206 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
207 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
208 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
209 public void check(SortField sortField) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
210 IndexReader indexReader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
211 List<LogFile> logs; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
212 synchronized(this) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
213 if( isMerging ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
214 logger.warn("is merging, check aborted"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
215 return; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
216 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
217 isMerging = true; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
218 indexReader = indexWriter.openReader(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
219 logs = new ArrayList<LogFile>(this.logs); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
220 int i = logs.size() - 1; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
221 LogFile last = logs.get(i); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
222 logs.set(i,last.snapshot()); |
1465 | 223 } |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
224 try { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
225 logger.info("check start"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
226 indexWriter.check(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
227 File dirFile = new File(logDir,"check"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
228 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
229 Directory dir = FSDirectory.open(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
230 LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
231 playLogs(logs,checkWriter); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
232 logger.info("check lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
233 IndexReader checkReader = checkWriter.openReader(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
234 if( sortField == null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
235 int nCheck = checkReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
236 int nOrig = indexReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
237 if( nCheck != nOrig ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
238 logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
239 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
240 logger.info("numDocs="+nOrig); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
241 if( hash(indexReader) != hash(checkReader) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
242 logger.error("hash mismatch"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
243 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
244 } else { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
245 Sort sort = new Sort(sortField); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
246 String sortFieldName = sortField.getField(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
247 Query query = new PrefixQuery(new Term(sortFieldName)); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
248 DocIter origIter = new DocIter(indexReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
249 DocIter checkIter = new DocIter(checkReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
250 Map<String,Object> origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
251 Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
252 while( origFields!=null && checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
253 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
254 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
255 int cmp = origFld.compareTo(checkFld); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
256 if( cmp==0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
257 if( !origFields.equals(checkFields) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
258 logger.error(sortFieldName+" "+origFld+" not equal"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
259 logger.error("lucene = "+origFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
260 logger.error("logs = "+checkFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
261 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
262 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
263 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
264 } else if( cmp < 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
265 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
266 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
267 } else { // > |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
268 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
269 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
270 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
271 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
272 while( origFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
273 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
274 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
275 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
276 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
277 while( checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
278 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
279 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
280 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
281 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
282 //logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
283 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
284 checkReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
285 checkWriter.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
286 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
287 logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
288 } finally { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
289 indexReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
290 isNotMerging(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
291 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
292 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
293 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
294 private static abstract class HashCollector extends GoodCollector { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
295 int total = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
296 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
297 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
298 private static int hash(IndexReader reader) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
299 final IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
300 Query query = new MatchAllDocsQuery(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
301 HashCollector col = new HashCollector() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
302 public void collectDoc(int iDoc) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
303 Document doc = searcher.doc(iDoc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
304 Map<String,Object> storedFields = LuceneUtils.toMap(doc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
305 total += storedFields.hashCode(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
306 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
307 }; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
308 searcher.search(query,col); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
309 return col.total; |
1461 | 310 } |
311 | |
1465 | 312 private LogFile log() { |
313 return logs.get(logs.size()-1); | |
314 } | |
315 | |
316 public synchronized void close() throws IOException { | |
317 indexWriter.close(); | |
318 LogFile log = log(); | |
319 log.commit(); | |
320 } | |
321 | |
322 public synchronized void commit() throws IOException { | |
323 indexWriter.commit(); | |
324 LogFile log = log(); | |
325 log.commit(); | |
326 if( isMerging ) | |
327 return; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
328 if( log.end() > logs.get(0).end() ) { |
1465 | 329 logs.add( newLogFile() ); |
330 writeIndex(); | |
331 } | |
332 if( logs.size() > 3 ) { | |
333 isMerging = true; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
334 new Thread(mergeLogs).start(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
335 // mergeLogs.run(); |
1465 | 336 } |
1461 | 337 } |
338 | |
1465 | 339 public synchronized void rollback() throws IOException { |
340 indexWriter.rollback(); | |
341 LogFile log = log(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
342 log.rollback(); |
1465 | 343 } |
344 | |
345 public synchronized void deleteAll() throws IOException { | |
346 indexWriter.deleteAll(); | |
347 LogFile log = log(); | |
348 writeOp(log,OP_DELETE_ALL); | |
1461 | 349 } |
350 | |
1465 | 351 public synchronized void deleteDocuments(Query query) throws IOException { |
352 indexWriter.deleteDocuments(query); | |
353 LogFile log = log(); | |
354 writeOp(log,OP_DELETE_DOCUMENTS); | |
355 log.writeQuery(query); | |
356 } | |
357 | |
358 public synchronized void addDocument(Map<String,Object> storedFields) throws IOException { | |
359 indexWriter.addDocument(storedFields); | |
360 LogFile log = log(); | |
361 writeOp(log,OP_ADD_DOCUMENT); | |
362 log.writeMap(storedFields); | |
363 } | |
364 | |
365 public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
366 indexWriter.updateDocument(keyFieldName,storedFields); | |
367 LogFile log = log(); | |
368 writeOp(log,OP_UPDATE_DOCUMENT); | |
369 log.writeUTF(keyFieldName); | |
370 log.writeMap(storedFields); | |
371 } | |
372 | |
373 public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException { | |
1461 | 374 indexWriter.reindexDocuments(keyFieldName,query); |
375 } | |
376 | |
1465 | 377 private void writeOp(LogFile log,int op) throws IOException { |
378 log.writeLong(System.currentTimeMillis()); | |
379 log.writeByte(op); | |
380 } | |
381 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
382 public synchronized void playLogs() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
383 playLogs(logs,indexWriter); |
1465 | 384 } |
385 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
386 private static void playLogs(List<LogFile> logs,LuceneIndexWriter indexWriter) throws IOException { |
1465 | 387 if( numDocs(indexWriter) != 0 ) |
388 throw new RuntimeException ("not empty"); | |
389 for( LogFile log : logs ) { | |
390 playLog(log,indexWriter); | |
391 } | |
392 indexWriter.commit(); | |
393 } | |
394 | |
395 private static int numDocs(LuceneIndexWriter indexWriter) throws IOException { | |
396 IndexReader reader = indexWriter.openReader(); | |
397 int n = reader.numDocs(); | |
398 reader.close(); | |
399 return n; | |
400 } | |
401 | |
402 private static void playLog(LogFile log,LuceneIndexWriter indexWriter) throws IOException { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
403 LogInputStream in = log.input(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
404 while( in.available() > 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
405 playOp(in,indexWriter); |
1465 | 406 } |
407 } | |
408 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
409 private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
410 in.readLong(); // time |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
411 int op = in.readByte(); |
1461 | 412 switch(op) { |
413 case OP_DELETE_ALL: | |
414 indexWriter.deleteAll(); | |
415 return; | |
416 case OP_DELETE_DOCUMENTS: | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
417 indexWriter.deleteDocuments( in.readQuery() ); |
1461 | 418 return; |
419 case OP_ADD_DOCUMENT: | |
1465 | 420 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
421 Map storedFields = in.readMap(); |
1465 | 422 indexWriter.addDocument(storedFields); |
423 return; | |
424 } | |
1461 | 425 case OP_UPDATE_DOCUMENT: |
1465 | 426 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
427 String keyFieldName = in.readUTF(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
428 Map storedFields = in.readMap(); |
1465 | 429 indexWriter.updateDocument(keyFieldName,storedFields); |
430 return; | |
431 } | |
1461 | 432 default: |
433 throw new RuntimeException("invalid op "+op); | |
434 } | |
435 } | |
436 | |
1465 | 437 private static void dump(LuceneIndexWriter indexWriter) throws IOException { |
438 IndexReader reader = indexWriter.openReader(); | |
439 IndexSearcher searcher = new IndexSearcher(reader); | |
440 Query query = new MatchAllDocsQuery(); | |
441 TopDocs td = searcher.search(query,100); | |
442 System.out.println("totalHits = "+td.totalHits); | |
443 for( int i=0; i<td.scoreDocs.length; i++ ) { | |
444 Document doc = searcher.doc(td.scoreDocs[i].doc); | |
445 System.out.println(LuceneUtils.toMap(doc)); | |
1461 | 446 } |
1465 | 447 System.out.println(); |
448 reader.close(); | |
1461 | 449 } |
1465 | 450 |
1461 | 451 } |