Mercurial Hosting > junotu
view src/junotu/Database.java @ 58:9cef3d27f2e8
Improved search
Now collecting title, content and tags into an indexed (but not stored) 'search' field and searching that field by default. Now search query isn't doubled (before it searched title and content separately), but each card to document conversion might take double the memory because of title, content and tag concencation. Which shouldn't be a big deal, since cards usually don't contain a lot of text.
author | Fox |
---|---|
date | Fri, 23 Dec 2022 15:01:23 +0100 |
parents | 2c1f4dc0513f |
children | 1304d3d5b4a4 |
line wrap: on
line source
package junotu; import java.lang.RuntimeException; import java.io.File; import java.util.Set; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.util.Version; import org.apache.lucene.document.Fieldable; import org.apache.lucene.document.Field; import org.apache.lucene.document.NumericField; import org.apache.lucene.document.Document; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.Sort; import org.apache.lucene.search.SortField; import org.apache.lucene.search.Query; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.NumericRangeQuery; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.BooleanClause; import junotu.Card; public class Database { public static final String DATABASE_DIRECTORY = "./database"; public static final Version LUCENE_VERSION = Version.LUCENE_30; private IndexWriter luceneWriter; private IndexSearcher luceneSearcher; private long highestIdentifier; public Database() throws Exception { Directory indexDirectory = FSDirectory.open( new File( DATABASE_DIRECTORY ) ); luceneWriter = new IndexWriter( indexDirectory, new StandardAnalyzer(LUCENE_VERSION), null, IndexWriter.MaxFieldLength.UNLIMITED ); luceneSearcher = new IndexSearcher( luceneWriter.getReader() ); /* Find highest unique identifier. */ TopDocs topDocuments = luceneSearcher.search( new MatchAllDocsQuery(), null, 1, new Sort( new SortField( Card.TAG_IDENTIFIER, SortField.LONG, true ) ) ); if( topDocuments.scoreDocs.length == 0 ) { highestIdentifier = 0; } else { /** TODO: Find a way to get NumericField from document. */ highestIdentifier = Long.valueOf( luceneSearcher.doc( topDocuments.scoreDocs[0].doc ).get( Card.TAG_IDENTIFIER ) ); } } public void databaseCommit() throws Exception { System.out.print( "Saving database to disk..\n" ); luceneWriter.commit(); } private void searcherRefresh() throws Exception { luceneSearcher = new IndexSearcher( luceneWriter.getReader() ); } private Document documentByIdentifier( long identifier ) throws Exception { TopDocs topDocuments = luceneSearcher.search( NumericRangeQuery.newLongRange( Card.TAG_IDENTIFIER, identifier, identifier, true, true ), 1 ); if( topDocuments.scoreDocs.length == 0 ) { return null; } return luceneSearcher.doc( topDocuments.scoreDocs[0].doc ); } private Document cardToDocument( Card card ) throws Exception { Document document = new Document(); String search = ""; for( String tag : card.tags.keySet() ) { Set<Object> values = card.tags.get( tag ); for( Object value : values ) { if( value == null ) { search += tag+" "; } else { search += tag+" "+value.toString()+" "; } if( value == null ) { if( !tag.equals("") ) { document.add( new Field( tag, "", Field.Store.YES, Field.Index.NOT_ANALYZED ) ); } } else if( value instanceof String ) { document.add( new Field( tag, (String)value, Field.Store.YES, Field.Index.ANALYZED ) ); } else if( value instanceof Number ) { NumericField field = new NumericField( tag, Field.Store.YES, true ); if( value instanceof Long ) { field.setLongValue( ((Long)value).longValue() ); } else { throw new RuntimeException( "Unknown tag number type." ); } document.add( field ); } } } document.add( new Field( Card.TAG_SEARCH, search, Field.Store.NO, Field.Index.ANALYZED ) ); return document; } private Card cardFromDocument( Document document ) throws Exception { Card card = new Card(); for( Fieldable field : document.getFields() ) { /** TODO: Find how to get NumericField from document. */ String value = field.stringValue(); card.tagValueAdd( field.name(), value.equals("") ? null : value ); } card.tagValueSetOnly( Card.TAG_IDENTIFIER, Long.valueOf( document.get( Card.TAG_IDENTIFIER ) ) ); return card; } public long cardAdd( Card card ) throws Exception { highestIdentifier++; card.tagValueSetOnly( Card.TAG_IDENTIFIER, new Long( highestIdentifier ) ); card.tagValueSetOnly( Card.TAG_LAST_EDIT, new Long( System.currentTimeMillis() ) ); luceneWriter.addDocument( cardToDocument( card ) ); System.out.print( "Added card with identifier "+Long.toString(highestIdentifier)+": '"+card.titleGet()+"'\n" ); searcherRefresh(); //luceneWriter.commit(); return highestIdentifier; } public void cardUpdate( Card card ) throws Exception { Query query = NumericRangeQuery.newLongRange( card.TAG_IDENTIFIER, card.identifierGet(), card.identifierGet(), true, true ); TopDocs topDocuments = luceneSearcher.search( query, 1 ); if( topDocuments.scoreDocs.length == 0 ) { throw new RuntimeException( "Failed to update card with identifier "+Long.toString( card.identifierGet() )+", not found." ); } card.tagValueSetOnly( Card.TAG_LAST_EDIT, new Long( System.currentTimeMillis() ) ); int documentNumber = topDocuments.scoreDocs[0].doc; luceneWriter.deleteDocuments( query ); luceneWriter.addDocument( cardToDocument( card ) ); System.out.print( "Updated card with identifier "+Long.toString(card.identifierGet())+": '"+card.titleGet()+"'\n" ); searcherRefresh(); //luceneWriter.commit(); } public void cardDelete( long identifier ) throws Exception { Query query = NumericRangeQuery.newLongRange( Card.TAG_IDENTIFIER, identifier, identifier, true, true ); TopDocs topDocuments = luceneSearcher.search( query, 1 ); if( topDocuments.scoreDocs.length == 0 ) { throw new RuntimeException( "Failed to delete card with identifier "+Long.toString( identifier )+", not found." ); } int documentNumber = topDocuments.scoreDocs[0].doc; luceneWriter.deleteDocuments( query ); System.out.print("Deleted card with identifier "+Long.toString(identifier)+"\n"); searcherRefresh(); } public Card cardGetByIdentifier( long identifier ) throws Exception { Document document = documentByIdentifier( identifier ); if( document == null ) { return null; } return cardFromDocument( document ); } /** Return up to 'amount' of recently modified cards. */ public Card[] searchTopRecent( int amount ) throws Exception { TopDocs topDocuments = luceneSearcher.search( new MatchAllDocsQuery(), null, amount, new Sort( new SortField( Card.TAG_LAST_EDIT, SortField.LONG, true ) ) ); Card[] cards = new Card[topDocuments.scoreDocs.length]; for( int i = 0; i < topDocuments.scoreDocs.length; i++ ) { Document document = luceneSearcher.doc( topDocuments.scoreDocs[i].doc ); cards[i] = cardFromDocument( document ); } return cards; } public Card[] searchSimple( String query ) throws Exception { Query parsedQuery; try { QueryParser queryParser = new QueryParser( LUCENE_VERSION, Card.TAG_SEARCH, new StandardAnalyzer(LUCENE_VERSION) ); queryParser.setAllowLeadingWildcard( true ); parsedQuery = queryParser.parse( query ); } catch( ParseException e ) { System.out.print( "Search query parsing exception, returning zero results: "+e.getMessage()+"\n" ); return new Card[0]; } TopDocs hits = luceneSearcher.search( parsedQuery, 32 ); Card[] cards = new Card[hits.scoreDocs.length]; for( int i = 0; i < hits.scoreDocs.length; i++ ) { Document document = luceneSearcher.doc( hits.scoreDocs[i].doc ); cards[i] = cardFromDocument( document ); } return cards; } }