Mercurial Hosting > luan
annotate src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1502:8a7b6b32c691
minor threads
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Sun, 10 May 2020 22:48:15 -0600 |
parents | e66e3d50b289 |
children | f443542d8650 |
rev | line source |
---|---|
1461 | 1 package goodjava.lucene.logging; |
2 | |
1465 | 3 import java.io.File; |
4 import java.io.RandomAccessFile; | |
5 import java.io.ByteArrayOutputStream; | |
6 import java.io.DataOutputStream; | |
7 import java.io.DataInputStream; | |
8 import java.io.FileInputStream; | |
1461 | 9 import java.io.IOException; |
10 import java.util.Map; | |
1465 | 11 import java.util.Set; |
12 import java.util.HashSet; | |
13 import java.util.List; | |
14 import java.util.ArrayList; | |
15 import java.util.Random; | |
1502 | 16 import java.util.concurrent.Executors; |
17 import java.util.concurrent.ExecutorService; | |
1465 | 18 import org.apache.lucene.document.Document; |
19 import org.apache.lucene.index.DirectoryReader; | |
20 import org.apache.lucene.index.IndexReader; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
21 import org.apache.lucene.index.Term; |
1465 | 22 import org.apache.lucene.search.IndexSearcher; |
1461 | 23 import org.apache.lucene.search.Query; |
1465 | 24 import org.apache.lucene.search.MatchAllDocsQuery; |
25 import org.apache.lucene.search.TopDocs; | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
26 import org.apache.lucene.search.PrefixQuery; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
27 import org.apache.lucene.search.SortField; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
28 import org.apache.lucene.search.Sort; |
1465 | 29 import org.apache.lucene.store.Directory; |
30 import org.apache.lucene.store.FSDirectory; | |
1473 | 31 import goodjava.io.IoUtils; |
1461 | 32 import goodjava.lucene.api.GoodIndexWriter; |
1465 | 33 import goodjava.lucene.api.LuceneIndexWriter; |
34 import goodjava.lucene.api.GoodCollector; | |
35 import goodjava.lucene.api.LuceneUtils; | |
36 import goodjava.logging.Logger; | |
37 import goodjava.logging.LoggerFactory; | |
1461 | 38 |
39 | |
1488 | 40 public class LoggingIndexWriter implements GoodIndexWriter { |
1465 | 41 private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class); |
42 private static final int version = 1; | |
1461 | 43 private static final int OP_DELETE_ALL = 1; |
44 private static final int OP_DELETE_DOCUMENTS = 2; | |
45 private static final int OP_ADD_DOCUMENT = 3; | |
46 private static final int OP_UPDATE_DOCUMENT = 4; | |
1465 | 47 private static final Random rnd = new Random(); |
1502 | 48 protected static final ExecutorService threadPool = Executors.newCachedThreadPool(); |
1461 | 49 |
1465 | 50 public final LuceneIndexWriter indexWriter; |
51 private final File logDir; | |
1488 | 52 protected final List<LogFile> logs = new ArrayList<LogFile>(); |
1486 | 53 private LogOutputStream log; |
1465 | 54 private final File index; |
55 private boolean isMerging = false; | |
1461 | 56 |
1465 | 57 public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException { |
1461 | 58 this.indexWriter = indexWriter; |
1465 | 59 this.logDir = logDir; |
1501 | 60 IoUtils.mkdirs(logDir); |
1465 | 61 if( !logDir.isDirectory() ) |
62 throw new RuntimeException(); | |
63 index = new File(logDir,"index"); | |
64 if( index.exists() ) { | |
65 DataInputStream dis = new DataInputStream(new FileInputStream(index)); | |
66 try { | |
67 if( dis.readInt() == version ) { | |
68 final int n = dis.readInt(); | |
69 for( int i=0; i<n; i++ ) { | |
70 File file = new File( logDir, dis.readUTF() ); | |
1486 | 71 logs.add( new LogFile(file) ); |
1465 | 72 } |
73 deleteUnusedFiles(); | |
1486 | 74 setLog(); |
1465 | 75 return; |
76 } | |
77 } finally { | |
78 dis.close(); | |
79 } | |
80 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
81 newLogs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
82 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
83 |
1486 | 84 private void setLog() throws IOException { |
85 if( log != null ) | |
86 log.close(); | |
87 log = logs.get(logs.size()-1).output(); | |
88 } | |
89 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
90 public synchronized boolean isMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
91 return isMerging; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
92 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
93 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
94 private synchronized void isNotMerging() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
95 isMerging = false; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
96 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
97 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
98 public synchronized void newLogs() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
99 if( isMerging ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
100 throw new RuntimeException("merging"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
101 logger.info("building new logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
102 logs.clear(); |
1465 | 103 for( int i=0; i<2; i++ ) { |
104 logs.add( newLogFile() ); | |
105 } | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
106 logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
107 writeIndex(); |
1486 | 108 setLog(); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
109 logger.info("done building new logs"); |
1461 | 110 } |
111 | |
1486 | 112 private static void logLucene(long time,LogFile logLucene,LuceneIndexWriter indexWriter) throws IOException { |
113 LogOutputStream log = logLucene.output(); | |
1465 | 114 IndexReader reader = indexWriter.openReader(); |
115 final IndexSearcher searcher = new IndexSearcher(reader); | |
116 Query query = new MatchAllDocsQuery(); | |
117 searcher.search( query, new GoodCollector(){ | |
118 public void collectDoc(int iDoc) throws IOException { | |
119 Document doc = searcher.doc(iDoc); | |
120 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
121 log.writeLong(time); | |
122 log.writeByte(OP_ADD_DOCUMENT); | |
123 log.writeMap(storedFields); | |
124 } | |
125 }); | |
126 reader.close(); | |
127 log.commit(); | |
1486 | 128 log.close(); |
1465 | 129 } |
130 | |
131 private LogFile newLogFile() throws IOException { | |
132 File file; | |
133 do { | |
134 file = new File(logDir,"_"+rnd.nextInt(100)+".log"); | |
135 } while( file.exists() ); | |
1486 | 136 return new LogFile(file); |
1461 | 137 } |
138 | |
1473 | 139 private void deleteUnusedFiles() throws IOException { |
1499 | 140 deleteUnusedFiles(logs,index); |
141 } | |
142 | |
143 private static void deleteUnusedFiles(List<LogFile> logs,File index) throws IOException { | |
1465 | 144 Set<String> used = new HashSet<String>(); |
145 used.add( index.getName() ); | |
146 for( LogFile lf : logs ) { | |
147 used.add( lf.file.getName() ); | |
148 } | |
1499 | 149 for( File f : index.getParentFile().listFiles() ) { |
1465 | 150 if( !used.contains(f.getName()) ) { |
1475 | 151 IoUtils.deleteRecursively(f); |
1465 | 152 } |
153 } | |
1461 | 154 } |
155 | |
1465 | 156 private void writeIndex() throws IOException { |
1499 | 157 writeIndex(logs,index); |
158 } | |
159 | |
160 public static void writeIndex(List<LogFile> logs,File index) throws IOException { | |
1465 | 161 ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
162 DataOutputStream dos = new DataOutputStream(baos); | |
163 dos.writeInt(version); | |
164 dos.writeInt(logs.size()); | |
165 for( LogFile lf : logs ) { | |
166 String fileName = lf.file.getName(); | |
167 dos.writeUTF(fileName); | |
168 } | |
169 dos.close(); | |
170 RandomAccessFile raf = new RandomAccessFile( index, "rwd" ); | |
171 raf.write( baos.toByteArray() ); | |
172 raf.close(); | |
1499 | 173 deleteUnusedFiles(logs,index); |
174 //logger.info("writeIndex "+logs.toString()); | |
1461 | 175 } |
176 | |
1465 | 177 private void mergeLogs() throws IOException { |
178 logger.info("merge"); | |
179 LogFile first = logs.get(0); | |
180 LogFile second = logs.get(1); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
181 long lastTime = second.file.lastModified(); |
1465 | 182 File dirFile = new File(logDir,"merge"); |
183 if( dirFile.exists() ) | |
184 throw new RuntimeException(); | |
185 Directory dir = FSDirectory.open(dirFile); | |
186 LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
187 playLog( first.input(), mergeWriter ); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
188 playLog( second.input(), mergeWriter ); |
1465 | 189 mergeWriter.commit(); |
190 LogFile merge = newLogFile(); | |
191 logLucene( lastTime, merge, mergeWriter ); | |
192 mergeWriter.close(); | |
193 synchronized(this) { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
194 //check(); |
1465 | 195 logs.remove(0); |
196 logs.set(0,merge); | |
197 writeIndex(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
198 //check(null); |
1465 | 199 } |
1461 | 200 } |
1465 | 201 private final Runnable mergeLogs = new Runnable() { public void run() { |
202 try { | |
203 mergeLogs(); | |
204 } catch(IOException e) { | |
205 throw new RuntimeException(e); | |
206 } finally { | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
207 isNotMerging(); |
1465 | 208 } |
209 } }; | |
1461 | 210 |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
211 private static class DocIter { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
212 final IndexReader reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
213 final TopDocs td; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
214 final int n; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
215 int i = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
216 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
217 DocIter(IndexReader reader,Query query,Sort sort) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
218 this.reader = reader; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
219 IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
220 this.td = searcher.search(query,10000000,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
221 this.n = td.scoreDocs.length; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
222 if( td.totalHits != n ) |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
223 throw new RuntimeException(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
224 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
225 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
226 Document next() throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
227 return i < n ? reader.document(td.scoreDocs[i++].doc) : null; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
228 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
229 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
230 |
1487 | 231 private volatile boolean isChecking = false; |
232 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
233 public void check(SortField sortField) throws IOException { |
1487 | 234 if( isChecking ) |
235 throw new RuntimeException("another check is running"); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
236 IndexReader indexReader; |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
237 List<LogInputStream> logReaders; |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
238 synchronized(this) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
239 indexReader = indexWriter.openReader(); |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
240 logReaders = logReaders(logs); |
1465 | 241 } |
1487 | 242 isChecking = true; |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
243 try { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
244 logger.info("check start"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
245 indexWriter.check(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
246 File dirFile = new File(logDir,"check"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
247 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
248 Directory dir = FSDirectory.open(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
249 LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
250 playLogs(logReaders,checkWriter); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
251 logger.info("check lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
252 IndexReader checkReader = checkWriter.openReader(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
253 if( sortField == null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
254 int nCheck = checkReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
255 int nOrig = indexReader.numDocs(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
256 if( nCheck != nOrig ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
257 logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
258 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
259 logger.info("numDocs="+nOrig); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
260 if( hash(indexReader) != hash(checkReader) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
261 logger.error("hash mismatch"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
262 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
263 } else { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
264 Sort sort = new Sort(sortField); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
265 String sortFieldName = sortField.getField(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
266 Query query = new PrefixQuery(new Term(sortFieldName)); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
267 DocIter origIter = new DocIter(indexReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
268 DocIter checkIter = new DocIter(checkReader,query,sort); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
269 Map<String,Object> origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
270 Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
271 while( origFields!=null && checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
272 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
273 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
274 int cmp = origFld.compareTo(checkFld); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
275 if( cmp==0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
276 if( !origFields.equals(checkFields) ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
277 logger.error(sortFieldName+" "+origFld+" not equal"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
278 logger.error("lucene = "+origFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
279 logger.error("logs = "+checkFields); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
280 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
281 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
282 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
283 } else if( cmp < 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
284 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
285 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
286 } else { // > |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
287 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
288 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
289 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
290 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
291 while( origFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
292 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
293 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
294 origFields = LuceneUtils.toMap(origIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
295 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
296 while( checkFields!=null ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
297 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
298 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
299 checkFields = LuceneUtils.toMap(checkIter.next()); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
300 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
301 //logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
302 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
303 checkReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
304 checkWriter.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
305 IoUtils.deleteRecursively(dirFile); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
306 logger.info("check done"); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
307 } finally { |
1487 | 308 isChecking = false; |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
309 indexReader.close(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
310 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
311 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
312 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
313 private static abstract class HashCollector extends GoodCollector { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
314 int total = 0; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
315 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
316 |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
317 private static int hash(IndexReader reader) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
318 final IndexSearcher searcher = new IndexSearcher(reader); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
319 Query query = new MatchAllDocsQuery(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
320 HashCollector col = new HashCollector() { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
321 public void collectDoc(int iDoc) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
322 Document doc = searcher.doc(iDoc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
323 Map<String,Object> storedFields = LuceneUtils.toMap(doc); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
324 total += storedFields.hashCode(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
325 } |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
326 }; |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
327 searcher.search(query,col); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
328 return col.total; |
1461 | 329 } |
330 | |
1465 | 331 public synchronized void close() throws IOException { |
332 indexWriter.close(); | |
333 log.commit(); | |
1486 | 334 log.close(); |
1465 | 335 } |
336 | |
337 public synchronized void commit() throws IOException { | |
338 indexWriter.commit(); | |
339 log.commit(); | |
340 if( isMerging ) | |
341 return; | |
1486 | 342 if( log.logFile.end() > logs.get(0).end() ) { |
1465 | 343 logs.add( newLogFile() ); |
344 writeIndex(); | |
1486 | 345 setLog(); |
1465 | 346 } |
347 if( logs.size() > 3 ) { | |
348 isMerging = true; | |
1502 | 349 threadPool.execute(mergeLogs); |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
350 // mergeLogs.run(); |
1465 | 351 } |
1461 | 352 } |
353 | |
1465 | 354 public synchronized void rollback() throws IOException { |
355 indexWriter.rollback(); | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
356 log.rollback(); |
1465 | 357 } |
358 | |
359 public synchronized void deleteAll() throws IOException { | |
360 indexWriter.deleteAll(); | |
1486 | 361 writeOp(OP_DELETE_ALL); |
1461 | 362 } |
363 | |
1465 | 364 public synchronized void deleteDocuments(Query query) throws IOException { |
365 indexWriter.deleteDocuments(query); | |
1486 | 366 writeOp(OP_DELETE_DOCUMENTS); |
1465 | 367 log.writeQuery(query); |
368 } | |
369 | |
370 public synchronized void addDocument(Map<String,Object> storedFields) throws IOException { | |
371 indexWriter.addDocument(storedFields); | |
1486 | 372 writeOp(OP_ADD_DOCUMENT); |
1465 | 373 log.writeMap(storedFields); |
374 } | |
375 | |
376 public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
377 indexWriter.updateDocument(keyFieldName,storedFields); | |
1486 | 378 writeOp(OP_UPDATE_DOCUMENT); |
1465 | 379 log.writeUTF(keyFieldName); |
380 log.writeMap(storedFields); | |
381 } | |
382 | |
383 public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException { | |
1461 | 384 indexWriter.reindexDocuments(keyFieldName,query); |
385 } | |
386 | |
1486 | 387 private void writeOp(int op) throws IOException { |
1465 | 388 log.writeLong(System.currentTimeMillis()); |
389 log.writeByte(op); | |
390 } | |
391 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
392 public synchronized void playLogs() throws IOException { |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
393 playLogs( logReaders(logs), indexWriter ); |
1465 | 394 } |
395 | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
396 private static List<LogInputStream> logReaders(List<LogFile> logs) throws IOException { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
397 List<LogInputStream> logReaders = new ArrayList<LogInputStream>(); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
398 for( LogFile log : logs ) { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
399 logReaders.add( log.input() ); |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
400 } |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
401 return logReaders; |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
402 } |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
403 |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
404 private static void playLogs(List<LogInputStream> logReaders,LuceneIndexWriter indexWriter) throws IOException { |
1465 | 405 if( numDocs(indexWriter) != 0 ) |
406 throw new RuntimeException ("not empty"); | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
407 for( LogInputStream reader : logReaders ) { |
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
408 playLog(reader,indexWriter); |
1465 | 409 } |
410 indexWriter.commit(); | |
411 } | |
412 | |
413 private static int numDocs(LuceneIndexWriter indexWriter) throws IOException { | |
414 IndexReader reader = indexWriter.openReader(); | |
415 int n = reader.numDocs(); | |
416 reader.close(); | |
417 return n; | |
418 } | |
419 | |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
420 private static void playLog(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
421 while( in.available() > 0 ) { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
422 playOp(in,indexWriter); |
1465 | 423 } |
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
424 in.close(); |
1465 | 425 } |
426 | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
427 private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
428 in.readLong(); // time |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
429 int op = in.readByte(); |
1461 | 430 switch(op) { |
431 case OP_DELETE_ALL: | |
432 indexWriter.deleteAll(); | |
433 return; | |
434 case OP_DELETE_DOCUMENTS: | |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
435 indexWriter.deleteDocuments( in.readQuery() ); |
1461 | 436 return; |
437 case OP_ADD_DOCUMENT: | |
1465 | 438 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
439 Map storedFields = in.readMap(); |
1465 | 440 indexWriter.addDocument(storedFields); |
441 return; | |
442 } | |
1461 | 443 case OP_UPDATE_DOCUMENT: |
1465 | 444 { |
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
445 String keyFieldName = in.readUTF(); |
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
446 Map storedFields = in.readMap(); |
1465 | 447 indexWriter.updateDocument(keyFieldName,storedFields); |
448 return; | |
449 } | |
1461 | 450 default: |
451 throw new RuntimeException("invalid op "+op); | |
452 } | |
453 } | |
454 | |
1465 | 455 private static void dump(LuceneIndexWriter indexWriter) throws IOException { |
456 IndexReader reader = indexWriter.openReader(); | |
457 IndexSearcher searcher = new IndexSearcher(reader); | |
458 Query query = new MatchAllDocsQuery(); | |
459 TopDocs td = searcher.search(query,100); | |
460 System.out.println("totalHits = "+td.totalHits); | |
461 for( int i=0; i<td.scoreDocs.length; i++ ) { | |
462 Document doc = searcher.doc(td.scoreDocs[i].doc); | |
463 System.out.println(LuceneUtils.toMap(doc)); | |
1461 | 464 } |
1465 | 465 System.out.println(); |
466 reader.close(); | |
1461 | 467 } |
1465 | 468 |
1461 | 469 } |