annotate src/goodjava/lucene/queryparser/StringFieldParser.java @ 1764:527c53b91a50

lucene error handling
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 22 May 2023 20:43:52 -0600
parents 83caee2ed4ad
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1458
6b6c11c9164e goodjava.lucene
Franklin Schmidt <fschmidt@gmail.com>
parents: 1445
diff changeset
1 package goodjava.lucene.queryparser;
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 import java.io.StringReader;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 import java.io.IOException;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 import org.apache.lucene.analysis.Analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 import org.apache.lucene.analysis.TokenStream;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 import org.apache.lucene.search.Query;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10 import org.apache.lucene.search.TermQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 import org.apache.lucene.search.TermRangeQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 import org.apache.lucene.search.PhraseQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13 import org.apache.lucene.search.WildcardQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 import org.apache.lucene.search.PrefixQuery;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 import org.apache.lucene.search.SortField;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 import org.apache.lucene.index.Term;
1402
27efb1fcbcb5 move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents: 1344
diff changeset
17 import goodjava.parser.ParseException;
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20 public class StringFieldParser implements FieldParser {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 public int slop = 0;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22 public final Analyzer analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 public StringFieldParser(Analyzer analyzer) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 this.analyzer = analyzer;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27
1459
b04b8fc5f4f4 GoodQueryParser
Franklin Schmidt <fschmidt@gmail.com>
parents: 1458
diff changeset
28 @Override public Query getQuery(GoodQueryParser qp,String field,String query) throws ParseException {
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 String wildcard = wildcard(qp,query);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 if( wildcard != null )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 return new WildcardQuery(new Term(field,wildcard));
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 if( query.endsWith("*") && !query.endsWith("\\*") )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 return new PrefixQuery(new Term(field,query.substring(0,query.length()-1)));
1619
83caee2ed4ad Lucene.escape
Franklin Schmidt <fschmidt@gmail.com>
parents: 1459
diff changeset
34 query = unescape(qp,query);
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 PhraseQuery pq = new PhraseQuery();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 try {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 TokenStream ts = analyzer.tokenStream(field,new StringReader(query));
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38 CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40 ts.reset();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 int pos = -1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 while( ts.incrementToken() ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 pos += posAttr.getPositionIncrement();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 pq.add( new Term(field,termAttr.toString()), pos );
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46 ts.end();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 ts.close();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 } catch(IOException e) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 throw new RuntimeException(e);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 Term[] terms = pq.getTerms();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52 if( terms.length==1 && pq.getPositions()[0]==0 )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 return new TermQuery(terms[0]);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 return pq;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56
1459
b04b8fc5f4f4 GoodQueryParser
Franklin Schmidt <fschmidt@gmail.com>
parents: 1458
diff changeset
57 @Override public Query getRangeQuery(GoodQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException {
1619
83caee2ed4ad Lucene.escape
Franklin Schmidt <fschmidt@gmail.com>
parents: 1459
diff changeset
58 minQuery = minQuery.equals("*") ? null : unescape(qp,minQuery);
83caee2ed4ad Lucene.escape
Franklin Schmidt <fschmidt@gmail.com>
parents: 1459
diff changeset
59 maxQuery = maxQuery.equals("*") ? null : unescape(qp,maxQuery);
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60 return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62
1619
83caee2ed4ad Lucene.escape
Franklin Schmidt <fschmidt@gmail.com>
parents: 1459
diff changeset
63 static String unescape(GoodQueryParser qp,String s) throws ParseException {
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64 final char[] a = s.toCharArray();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65 int i, n;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 if( a[0] == '"' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 if( a[a.length-1] != '"' ) throw new RuntimeException();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 i = 1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69 n = a.length - 1;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 } else {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 i = 0;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72 n = a.length;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74 StringBuilder sb = new StringBuilder();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
75 for( ; i<n; i++ ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76 char c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 if( c == '\\' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
78 if( ++i == a.length )
1110
38a42f437fd2 queryparser now uses parsers.Parser
Franklin Schmidt <fschmidt@gmail.com>
parents: 775
diff changeset
79 throw qp.exception("ends with '\\'");
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
80 c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
81 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 sb.append(c);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 return sb.toString();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
86
1459
b04b8fc5f4f4 GoodQueryParser
Franklin Schmidt <fschmidt@gmail.com>
parents: 1458
diff changeset
87 private static String wildcard(GoodQueryParser qp,String s) throws ParseException {
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88 final char[] a = s.toCharArray();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89 if( a[0] == '"' )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 return null;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 boolean hasWildcard = false;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 StringBuilder sb = new StringBuilder();
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93 for( int i=0; i<a.length; i++ ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 char c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 if( c=='?' || c=='*' && i<a.length-1 )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 hasWildcard = true;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97 if( c == '\\' ) {
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 if( ++i == a.length )
1110
38a42f437fd2 queryparser now uses parsers.Parser
Franklin Schmidt <fschmidt@gmail.com>
parents: 775
diff changeset
99 throw qp.exception("ends with '\\'");
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 c = a[i];
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 if( c=='?' || c=='*' )
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102 sb.append('\\');
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104 sb.append(c);
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 return hasWildcard ? sb.toString() : null;
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108
1459
b04b8fc5f4f4 GoodQueryParser
Franklin Schmidt <fschmidt@gmail.com>
parents: 1458
diff changeset
109 @Override public SortField getSortField(GoodQueryParser qp,String field,boolean reverse) {
730
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 return new SortField( field, SortField.Type.STRING, reverse );
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111 }
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112
01e68da6983b add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
113 }