Mercurial Hosting > luan
annotate src/goodjava/lucene/queryparser/StringFieldParser.java @ 2018:5203eb9b0e02
http utf-8
| author | Franklin Schmidt <fschmidt@gmail.com> | 
|---|---|
| date | Fri, 03 Oct 2025 16:45:33 -0600 | 
| parents | 83caee2ed4ad | 
| children | 
| rev | line source | 
|---|---|
| 1458 | 1 package goodjava.lucene.queryparser; | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 2 | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 3 import java.io.StringReader; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 4 import java.io.IOException; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 5 import org.apache.lucene.analysis.Analyzer; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 6 import org.apache.lucene.analysis.TokenStream; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 7 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 8 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 9 import org.apache.lucene.search.Query; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 10 import org.apache.lucene.search.TermQuery; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 11 import org.apache.lucene.search.TermRangeQuery; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 12 import org.apache.lucene.search.PhraseQuery; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 13 import org.apache.lucene.search.WildcardQuery; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 14 import org.apache.lucene.search.PrefixQuery; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 15 import org.apache.lucene.search.SortField; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 16 import org.apache.lucene.index.Term; | 
| 1402 
27efb1fcbcb5
move luan.lib to goodjava
 Franklin Schmidt <fschmidt@gmail.com> parents: 
1344diff
changeset | 17 import goodjava.parser.ParseException; | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 18 | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 19 | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 20 public class StringFieldParser implements FieldParser { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 21 public int slop = 0; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 22 public final Analyzer analyzer; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 23 | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 24 public StringFieldParser(Analyzer analyzer) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 25 this.analyzer = analyzer; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 26 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 27 | 
| 1459 | 28 @Override public Query getQuery(GoodQueryParser qp,String field,String query) throws ParseException { | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 29 String wildcard = wildcard(qp,query); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 30 if( wildcard != null ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 31 return new WildcardQuery(new Term(field,wildcard)); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 32 if( query.endsWith("*") && !query.endsWith("\\*") ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 33 return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); | 
| 1619 | 34 query = unescape(qp,query); | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 35 PhraseQuery pq = new PhraseQuery(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 36 try { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 37 TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 38 CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 39 PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 40 ts.reset(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 41 int pos = -1; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 42 while( ts.incrementToken() ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 43 pos += posAttr.getPositionIncrement(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 44 pq.add( new Term(field,termAttr.toString()), pos ); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 45 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 46 ts.end(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 47 ts.close(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 48 } catch(IOException e) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 49 throw new RuntimeException(e); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 50 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 51 Term[] terms = pq.getTerms(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 52 if( terms.length==1 && pq.getPositions()[0]==0 ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 53 return new TermQuery(terms[0]); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 54 return pq; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 55 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 56 | 
| 1459 | 57 @Override public Query getRangeQuery(GoodQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { | 
| 1619 | 58 minQuery = minQuery.equals("*") ? null : unescape(qp,minQuery); | 
| 59 maxQuery = maxQuery.equals("*") ? null : unescape(qp,maxQuery); | |
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 60 return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 61 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 62 | 
| 1619 | 63 static String unescape(GoodQueryParser qp,String s) throws ParseException { | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 64 final char[] a = s.toCharArray(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 65 int i, n; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 66 if( a[0] == '"' ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 67 if( a[a.length-1] != '"' ) throw new RuntimeException(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 68 i = 1; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 69 n = a.length - 1; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 70 } else { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 71 i = 0; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 72 n = a.length; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 73 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 74 StringBuilder sb = new StringBuilder(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 75 for( ; i<n; i++ ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 76 char c = a[i]; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 77 if( c == '\\' ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 78 if( ++i == a.length ) | 
| 1110 
38a42f437fd2
queryparser now uses parsers.Parser
 Franklin Schmidt <fschmidt@gmail.com> parents: 
775diff
changeset | 79 throw qp.exception("ends with '\\'"); | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 80 c = a[i]; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 81 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 82 sb.append(c); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 83 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 84 return sb.toString(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 85 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 86 | 
| 1459 | 87 private static String wildcard(GoodQueryParser qp,String s) throws ParseException { | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 88 final char[] a = s.toCharArray(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 89 if( a[0] == '"' ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 90 return null; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 91 boolean hasWildcard = false; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 92 StringBuilder sb = new StringBuilder(); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 93 for( int i=0; i<a.length; i++ ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 94 char c = a[i]; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 95 if( c=='?' || c=='*' && i<a.length-1 ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 96 hasWildcard = true; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 97 if( c == '\\' ) { | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 98 if( ++i == a.length ) | 
| 1110 
38a42f437fd2
queryparser now uses parsers.Parser
 Franklin Schmidt <fschmidt@gmail.com> parents: 
775diff
changeset | 99 throw qp.exception("ends with '\\'"); | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 100 c = a[i]; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 101 if( c=='?' || c=='*' ) | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 102 sb.append('\\'); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 103 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 104 sb.append(c); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 105 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 106 return hasWildcard ? sb.toString() : null; | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 107 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 108 | 
| 1459 | 109 @Override public SortField getSortField(GoodQueryParser qp,String field,boolean reverse) { | 
| 730 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 110 return new SortField( field, SortField.Type.STRING, reverse ); | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 111 } | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 112 | 
| 
01e68da6983b
add sane-lucene-queryparser source to luan
 Franklin Schmidt <fschmidt@gmail.com> parents: diff
changeset | 113 } | 
