Mercurial Hosting > luan
annotate src/goodjava/lucene/logging/LoggingIndexWriter.java @ 1490:9a2a2181a58f
FixedLengthInputStream
| author | Franklin Schmidt <fschmidt@gmail.com> |
|---|---|
| date | Sat, 02 May 2020 20:42:28 -0600 |
| parents | af55cfad6e12 |
| children | 22e15cf73040 |
| rev | line source |
|---|---|
| 1461 | 1 package goodjava.lucene.logging; |
| 2 | |
| 1465 | 3 import java.io.File; |
| 4 import java.io.RandomAccessFile; | |
| 5 import java.io.ByteArrayOutputStream; | |
| 6 import java.io.DataOutputStream; | |
| 7 import java.io.DataInputStream; | |
| 8 import java.io.FileInputStream; | |
| 1461 | 9 import java.io.IOException; |
| 10 import java.util.Map; | |
| 1465 | 11 import java.util.Set; |
| 12 import java.util.HashSet; | |
| 13 import java.util.List; | |
| 14 import java.util.ArrayList; | |
| 15 import java.util.Random; | |
| 16 import org.apache.lucene.document.Document; | |
| 17 import org.apache.lucene.index.DirectoryReader; | |
| 18 import org.apache.lucene.index.IndexReader; | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
19 import org.apache.lucene.index.Term; |
| 1465 | 20 import org.apache.lucene.search.IndexSearcher; |
| 1461 | 21 import org.apache.lucene.search.Query; |
| 1465 | 22 import org.apache.lucene.search.MatchAllDocsQuery; |
| 23 import org.apache.lucene.search.TopDocs; | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
24 import org.apache.lucene.search.PrefixQuery; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
25 import org.apache.lucene.search.SortField; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
26 import org.apache.lucene.search.Sort; |
| 1465 | 27 import org.apache.lucene.store.Directory; |
| 28 import org.apache.lucene.store.FSDirectory; | |
| 1473 | 29 import goodjava.io.IoUtils; |
| 1461 | 30 import goodjava.lucene.api.GoodIndexWriter; |
| 1465 | 31 import goodjava.lucene.api.LuceneIndexWriter; |
| 32 import goodjava.lucene.api.GoodCollector; | |
| 33 import goodjava.lucene.api.LuceneUtils; | |
| 34 import goodjava.logging.Logger; | |
| 35 import goodjava.logging.LoggerFactory; | |
| 1461 | 36 |
| 37 | |
| 1488 | 38 public class LoggingIndexWriter implements GoodIndexWriter { |
| 1465 | 39 private static final Logger logger = LoggerFactory.getLogger(LoggingIndexWriter.class); |
| 40 private static final int version = 1; | |
| 1461 | 41 private static final int OP_DELETE_ALL = 1; |
| 42 private static final int OP_DELETE_DOCUMENTS = 2; | |
| 43 private static final int OP_ADD_DOCUMENT = 3; | |
| 44 private static final int OP_UPDATE_DOCUMENT = 4; | |
| 1465 | 45 private static final Random rnd = new Random(); |
| 1461 | 46 |
| 1465 | 47 public final LuceneIndexWriter indexWriter; |
| 48 private final File logDir; | |
| 1488 | 49 protected final List<LogFile> logs = new ArrayList<LogFile>(); |
| 1486 | 50 private LogOutputStream log; |
| 1465 | 51 private final File index; |
| 52 private boolean isMerging = false; | |
| 1461 | 53 |
| 1465 | 54 public LoggingIndexWriter(LuceneIndexWriter indexWriter,File logDir) throws IOException { |
| 1461 | 55 this.indexWriter = indexWriter; |
| 1465 | 56 this.logDir = logDir; |
| 57 logDir.mkdirs(); | |
| 58 if( !logDir.isDirectory() ) | |
| 59 throw new RuntimeException(); | |
| 60 index = new File(logDir,"index"); | |
| 61 if( index.exists() ) { | |
| 62 DataInputStream dis = new DataInputStream(new FileInputStream(index)); | |
| 63 try { | |
| 64 if( dis.readInt() == version ) { | |
| 65 final int n = dis.readInt(); | |
| 66 for( int i=0; i<n; i++ ) { | |
| 67 File file = new File( logDir, dis.readUTF() ); | |
| 1486 | 68 logs.add( new LogFile(file) ); |
| 1465 | 69 } |
| 70 deleteUnusedFiles(); | |
| 1486 | 71 setLog(); |
| 1465 | 72 return; |
| 73 } | |
| 74 } finally { | |
| 75 dis.close(); | |
| 76 } | |
| 77 } | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
78 newLogs(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
79 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
80 |
| 1486 | 81 private void setLog() throws IOException { |
| 82 if( log != null ) | |
| 83 log.close(); | |
| 84 log = logs.get(logs.size()-1).output(); | |
| 85 } | |
| 86 | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
87 public synchronized boolean isMerging() { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
88 return isMerging; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
89 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
90 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
91 private synchronized void isNotMerging() { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
92 isMerging = false; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
93 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
94 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
95 public synchronized void newLogs() throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
96 if( isMerging ) |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
97 throw new RuntimeException("merging"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
98 logger.info("building new logs"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
99 logs.clear(); |
| 1465 | 100 for( int i=0; i<2; i++ ) { |
| 101 logs.add( newLogFile() ); | |
| 102 } | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
103 logLucene( System.currentTimeMillis(), logs.get(0), indexWriter ); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
104 writeIndex(); |
| 1486 | 105 setLog(); |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
106 logger.info("done building new logs"); |
| 1461 | 107 } |
| 108 | |
| 1486 | 109 private static void logLucene(long time,LogFile logLucene,LuceneIndexWriter indexWriter) throws IOException { |
| 110 LogOutputStream log = logLucene.output(); | |
| 1465 | 111 IndexReader reader = indexWriter.openReader(); |
| 112 final IndexSearcher searcher = new IndexSearcher(reader); | |
| 113 Query query = new MatchAllDocsQuery(); | |
| 114 searcher.search( query, new GoodCollector(){ | |
| 115 public void collectDoc(int iDoc) throws IOException { | |
| 116 Document doc = searcher.doc(iDoc); | |
| 117 Map<String,Object> storedFields = LuceneUtils.toMap(doc); | |
| 118 log.writeLong(time); | |
| 119 log.writeByte(OP_ADD_DOCUMENT); | |
| 120 log.writeMap(storedFields); | |
| 121 } | |
| 122 }); | |
| 123 reader.close(); | |
| 124 log.commit(); | |
| 1486 | 125 log.close(); |
| 1465 | 126 } |
| 127 | |
| 128 private LogFile newLogFile() throws IOException { | |
| 129 File file; | |
| 130 do { | |
| 131 file = new File(logDir,"_"+rnd.nextInt(100)+".log"); | |
| 132 } while( file.exists() ); | |
| 1486 | 133 return new LogFile(file); |
| 1461 | 134 } |
| 135 | |
| 1473 | 136 private void deleteUnusedFiles() throws IOException { |
| 1465 | 137 Set<String> used = new HashSet<String>(); |
| 138 used.add( index.getName() ); | |
| 139 for( LogFile lf : logs ) { | |
| 140 used.add( lf.file.getName() ); | |
| 141 } | |
| 142 for( File f : logDir.listFiles() ) { | |
| 143 if( !used.contains(f.getName()) ) { | |
| 1475 | 144 IoUtils.deleteRecursively(f); |
| 1465 | 145 } |
| 146 } | |
| 1461 | 147 } |
| 148 | |
| 1465 | 149 private void writeIndex() throws IOException { |
| 150 ByteArrayOutputStream baos = new ByteArrayOutputStream(); | |
| 151 DataOutputStream dos = new DataOutputStream(baos); | |
| 152 dos.writeInt(version); | |
| 153 dos.writeInt(logs.size()); | |
| 154 for( LogFile lf : logs ) { | |
| 155 String fileName = lf.file.getName(); | |
| 156 dos.writeUTF(fileName); | |
| 157 } | |
| 158 dos.close(); | |
| 159 RandomAccessFile raf = new RandomAccessFile( index, "rwd" ); | |
| 160 raf.write( baos.toByteArray() ); | |
| 161 raf.close(); | |
| 162 deleteUnusedFiles(); | |
| 163 logger.info("writeIndex "+logs.toString()); | |
| 1461 | 164 } |
| 165 | |
| 1465 | 166 private void mergeLogs() throws IOException { |
| 167 logger.info("merge"); | |
| 168 LogFile first = logs.get(0); | |
| 169 LogFile second = logs.get(1); | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
170 long lastTime = second.file.lastModified(); |
| 1465 | 171 File dirFile = new File(logDir,"merge"); |
| 172 if( dirFile.exists() ) | |
| 173 throw new RuntimeException(); | |
| 174 Directory dir = FSDirectory.open(dirFile); | |
| 175 LuceneIndexWriter mergeWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); | |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
176 playLog( first.input(), mergeWriter ); |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
177 playLog( second.input(), mergeWriter ); |
| 1465 | 178 mergeWriter.commit(); |
| 179 LogFile merge = newLogFile(); | |
| 180 logLucene( lastTime, merge, mergeWriter ); | |
| 181 mergeWriter.close(); | |
| 182 synchronized(this) { | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
183 //check(); |
| 1465 | 184 logs.remove(0); |
| 185 logs.set(0,merge); | |
| 186 writeIndex(); | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
187 //check(null); |
| 1465 | 188 } |
| 1461 | 189 } |
| 1465 | 190 private final Runnable mergeLogs = new Runnable() { public void run() { |
| 191 try { | |
| 192 mergeLogs(); | |
| 193 } catch(IOException e) { | |
| 194 throw new RuntimeException(e); | |
| 195 } finally { | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
196 isNotMerging(); |
| 1465 | 197 } |
| 198 } }; | |
| 1461 | 199 |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
200 private static class DocIter { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
201 final IndexReader reader; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
202 final TopDocs td; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
203 final int n; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
204 int i = 0; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
205 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
206 DocIter(IndexReader reader,Query query,Sort sort) throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
207 this.reader = reader; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
208 IndexSearcher searcher = new IndexSearcher(reader); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
209 this.td = searcher.search(query,10000000,sort); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
210 this.n = td.scoreDocs.length; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
211 if( td.totalHits != n ) |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
212 throw new RuntimeException(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
213 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
214 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
215 Document next() throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
216 return i < n ? reader.document(td.scoreDocs[i++].doc) : null; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
217 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
218 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
219 |
| 1487 | 220 private volatile boolean isChecking = false; |
| 221 | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
222 public void check(SortField sortField) throws IOException { |
| 1487 | 223 if( isChecking ) |
| 224 throw new RuntimeException("another check is running"); | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
225 IndexReader indexReader; |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
226 List<LogInputStream> logReaders; |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
227 synchronized(this) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
228 indexReader = indexWriter.openReader(); |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
229 logReaders = logReaders(logs); |
| 1465 | 230 } |
| 1487 | 231 isChecking = true; |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
232 try { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
233 logger.info("check start"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
234 indexWriter.check(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
235 File dirFile = new File(logDir,"check"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
236 IoUtils.deleteRecursively(dirFile); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
237 Directory dir = FSDirectory.open(dirFile); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
238 LuceneIndexWriter checkWriter = new LuceneIndexWriter( indexWriter.luceneVersion, dir, indexWriter.goodConfig ); |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
239 playLogs(logReaders,checkWriter); |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
240 logger.info("check lucene"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
241 IndexReader checkReader = checkWriter.openReader(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
242 if( sortField == null ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
243 int nCheck = checkReader.numDocs(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
244 int nOrig = indexReader.numDocs(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
245 if( nCheck != nOrig ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
246 logger.error("numDocs mismatch: lucene="+nOrig+" logs="+nCheck); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
247 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
248 logger.info("numDocs="+nOrig); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
249 if( hash(indexReader) != hash(checkReader) ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
250 logger.error("hash mismatch"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
251 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
252 } else { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
253 Sort sort = new Sort(sortField); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
254 String sortFieldName = sortField.getField(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
255 Query query = new PrefixQuery(new Term(sortFieldName)); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
256 DocIter origIter = new DocIter(indexReader,query,sort); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
257 DocIter checkIter = new DocIter(checkReader,query,sort); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
258 Map<String,Object> origFields = LuceneUtils.toMap(origIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
259 Map<String,Object> checkFields = LuceneUtils.toMap(checkIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
260 while( origFields!=null && checkFields!=null ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
261 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
262 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
263 int cmp = origFld.compareTo(checkFld); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
264 if( cmp==0 ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
265 if( !origFields.equals(checkFields) ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
266 logger.error(sortFieldName+" "+origFld+" not equal"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
267 logger.error("lucene = "+origFields); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
268 logger.error("logs = "+checkFields); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
269 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
270 origFields = LuceneUtils.toMap(origIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
271 checkFields = LuceneUtils.toMap(checkIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
272 } else if( cmp < 0 ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
273 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
274 origFields = LuceneUtils.toMap(origIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
275 } else { // > |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
276 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
277 checkFields = LuceneUtils.toMap(checkIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
278 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
279 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
280 while( origFields!=null ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
281 Comparable origFld = (Comparable)origFields.get(sortFieldName); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
282 logger.error(sortFieldName+" "+origFld+" found in lucene but not logs"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
283 origFields = LuceneUtils.toMap(origIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
284 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
285 while( checkFields!=null ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
286 Comparable checkFld = (Comparable)checkFields.get(sortFieldName); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
287 logger.error(sortFieldName+" "+checkFld+" found in logs but not lucene"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
288 checkFields = LuceneUtils.toMap(checkIter.next()); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
289 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
290 //logger.info("check done"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
291 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
292 checkReader.close(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
293 checkWriter.close(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
294 IoUtils.deleteRecursively(dirFile); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
295 logger.info("check done"); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
296 } finally { |
| 1487 | 297 isChecking = false; |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
298 indexReader.close(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
299 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
300 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
301 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
302 private static abstract class HashCollector extends GoodCollector { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
303 int total = 0; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
304 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
305 |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
306 private static int hash(IndexReader reader) throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
307 final IndexSearcher searcher = new IndexSearcher(reader); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
308 Query query = new MatchAllDocsQuery(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
309 HashCollector col = new HashCollector() { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
310 public void collectDoc(int iDoc) throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
311 Document doc = searcher.doc(iDoc); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
312 Map<String,Object> storedFields = LuceneUtils.toMap(doc); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
313 total += storedFields.hashCode(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
314 } |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
315 }; |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
316 searcher.search(query,col); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
317 return col.total; |
| 1461 | 318 } |
| 319 | |
| 1465 | 320 public synchronized void close() throws IOException { |
| 321 indexWriter.close(); | |
| 322 log.commit(); | |
| 1486 | 323 log.close(); |
| 1465 | 324 } |
| 325 | |
| 326 public synchronized void commit() throws IOException { | |
| 327 indexWriter.commit(); | |
| 328 log.commit(); | |
| 329 if( isMerging ) | |
| 330 return; | |
| 1486 | 331 if( log.logFile.end() > logs.get(0).end() ) { |
| 1465 | 332 logs.add( newLogFile() ); |
| 333 writeIndex(); | |
| 1486 | 334 setLog(); |
| 1465 | 335 } |
| 336 if( logs.size() > 3 ) { | |
| 337 isMerging = true; | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
338 new Thread(mergeLogs).start(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
339 // mergeLogs.run(); |
| 1465 | 340 } |
| 1461 | 341 } |
| 342 | |
| 1465 | 343 public synchronized void rollback() throws IOException { |
| 344 indexWriter.rollback(); | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
345 log.rollback(); |
| 1465 | 346 } |
| 347 | |
| 348 public synchronized void deleteAll() throws IOException { | |
| 349 indexWriter.deleteAll(); | |
| 1486 | 350 writeOp(OP_DELETE_ALL); |
| 1461 | 351 } |
| 352 | |
| 1465 | 353 public synchronized void deleteDocuments(Query query) throws IOException { |
| 354 indexWriter.deleteDocuments(query); | |
| 1486 | 355 writeOp(OP_DELETE_DOCUMENTS); |
| 1465 | 356 log.writeQuery(query); |
| 357 } | |
| 358 | |
| 359 public synchronized void addDocument(Map<String,Object> storedFields) throws IOException { | |
| 360 indexWriter.addDocument(storedFields); | |
| 1486 | 361 writeOp(OP_ADD_DOCUMENT); |
| 1465 | 362 log.writeMap(storedFields); |
| 363 } | |
| 364 | |
| 365 public synchronized void updateDocument(String keyFieldName,Map<String,Object> storedFields) throws IOException { | |
| 366 indexWriter.updateDocument(keyFieldName,storedFields); | |
| 1486 | 367 writeOp(OP_UPDATE_DOCUMENT); |
| 1465 | 368 log.writeUTF(keyFieldName); |
| 369 log.writeMap(storedFields); | |
| 370 } | |
| 371 | |
| 372 public synchronized void reindexDocuments(String keyFieldName,Query query) throws IOException { | |
| 1461 | 373 indexWriter.reindexDocuments(keyFieldName,query); |
| 374 } | |
| 375 | |
| 1486 | 376 private void writeOp(int op) throws IOException { |
| 1465 | 377 log.writeLong(System.currentTimeMillis()); |
| 378 log.writeByte(op); | |
| 379 } | |
| 380 | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
381 public synchronized void playLogs() throws IOException { |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
382 playLogs( logReaders(logs), indexWriter ); |
| 1465 | 383 } |
| 384 | |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
385 private static List<LogInputStream> logReaders(List<LogFile> logs) throws IOException { |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
386 List<LogInputStream> logReaders = new ArrayList<LogInputStream>(); |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
387 for( LogFile log : logs ) { |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
388 logReaders.add( log.input() ); |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
389 } |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
390 return logReaders; |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
391 } |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
392 |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
393 private static void playLogs(List<LogInputStream> logReaders,LuceneIndexWriter indexWriter) throws IOException { |
| 1465 | 394 if( numDocs(indexWriter) != 0 ) |
| 395 throw new RuntimeException ("not empty"); | |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
396 for( LogInputStream reader : logReaders ) { |
|
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
397 playLog(reader,indexWriter); |
| 1465 | 398 } |
| 399 indexWriter.commit(); | |
| 400 } | |
| 401 | |
| 402 private static int numDocs(LuceneIndexWriter indexWriter) throws IOException { | |
| 403 IndexReader reader = indexWriter.openReader(); | |
| 404 int n = reader.numDocs(); | |
| 405 reader.close(); | |
| 406 return n; | |
| 407 } | |
| 408 | |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
409 private static void playLog(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
410 while( in.available() > 0 ) { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
411 playOp(in,indexWriter); |
| 1465 | 412 } |
|
1484
1fa6e8ec2d53
lucene.logging cleanup
Franklin Schmidt <fschmidt@gmail.com>
parents:
1476
diff
changeset
|
413 in.close(); |
| 1465 | 414 } |
| 415 | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
416 private static void playOp(LogInputStream in,LuceneIndexWriter indexWriter) throws IOException { |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
417 in.readLong(); // time |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
418 int op = in.readByte(); |
| 1461 | 419 switch(op) { |
| 420 case OP_DELETE_ALL: | |
| 421 indexWriter.deleteAll(); | |
| 422 return; | |
| 423 case OP_DELETE_DOCUMENTS: | |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
424 indexWriter.deleteDocuments( in.readQuery() ); |
| 1461 | 425 return; |
| 426 case OP_ADD_DOCUMENT: | |
| 1465 | 427 { |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
428 Map storedFields = in.readMap(); |
| 1465 | 429 indexWriter.addDocument(storedFields); |
| 430 return; | |
| 431 } | |
| 1461 | 432 case OP_UPDATE_DOCUMENT: |
| 1465 | 433 { |
|
1476
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
434 String keyFieldName = in.readUTF(); |
|
7d145095cc0b
lucene.logging check
Franklin Schmidt <fschmidt@gmail.com>
parents:
1475
diff
changeset
|
435 Map storedFields = in.readMap(); |
| 1465 | 436 indexWriter.updateDocument(keyFieldName,storedFields); |
| 437 return; | |
| 438 } | |
| 1461 | 439 default: |
| 440 throw new RuntimeException("invalid op "+op); | |
| 441 } | |
| 442 } | |
| 443 | |
| 1465 | 444 private static void dump(LuceneIndexWriter indexWriter) throws IOException { |
| 445 IndexReader reader = indexWriter.openReader(); | |
| 446 IndexSearcher searcher = new IndexSearcher(reader); | |
| 447 Query query = new MatchAllDocsQuery(); | |
| 448 TopDocs td = searcher.search(query,100); | |
| 449 System.out.println("totalHits = "+td.totalHits); | |
| 450 for( int i=0; i<td.scoreDocs.length; i++ ) { | |
| 451 Document doc = searcher.doc(td.scoreDocs[i].doc); | |
| 452 System.out.println(LuceneUtils.toMap(doc)); | |
| 1461 | 453 } |
| 1465 | 454 System.out.println(); |
| 455 reader.close(); | |
| 1461 | 456 } |
| 1465 | 457 |
| 1461 | 458 } |
