Mercurial Hosting > luan
annotate src/goodjava/lucene/queryparser/StringFieldParser.java @ 1474:13cbce740e1e
LowercaseAnalyzer
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 17 Apr 2020 21:53:09 -0600 |
parents | b04b8fc5f4f4 |
children | 83caee2ed4ad |
rev | line source |
---|---|
1458 | 1 package goodjava.lucene.queryparser; |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
2 |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
3 import java.io.StringReader; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
4 import java.io.IOException; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
5 import org.apache.lucene.analysis.Analyzer; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
6 import org.apache.lucene.analysis.TokenStream; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
7 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
8 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
9 import org.apache.lucene.search.Query; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
10 import org.apache.lucene.search.TermQuery; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
11 import org.apache.lucene.search.TermRangeQuery; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
12 import org.apache.lucene.search.PhraseQuery; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
13 import org.apache.lucene.search.WildcardQuery; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
14 import org.apache.lucene.search.PrefixQuery; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
15 import org.apache.lucene.search.SortField; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
16 import org.apache.lucene.index.Term; |
1402
27efb1fcbcb5
move luan.lib to goodjava
Franklin Schmidt <fschmidt@gmail.com>
parents:
1344
diff
changeset
|
17 import goodjava.parser.ParseException; |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
18 |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
19 |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
20 public class StringFieldParser implements FieldParser { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
21 public int slop = 0; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
22 public final Analyzer analyzer; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
23 |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
24 public StringFieldParser(Analyzer analyzer) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
25 this.analyzer = analyzer; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
26 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
27 |
1459 | 28 @Override public Query getQuery(GoodQueryParser qp,String field,String query) throws ParseException { |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
29 String wildcard = wildcard(qp,query); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
30 if( wildcard != null ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
31 return new WildcardQuery(new Term(field,wildcard)); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
32 if( query.endsWith("*") && !query.endsWith("\\*") ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
33 return new PrefixQuery(new Term(field,query.substring(0,query.length()-1))); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
34 query = escape(qp,query); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
35 PhraseQuery pq = new PhraseQuery(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
36 try { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
37 TokenStream ts = analyzer.tokenStream(field,new StringReader(query)); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
38 CharTermAttribute termAttr = ts.addAttribute(CharTermAttribute.class); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
39 PositionIncrementAttribute posAttr = ts.addAttribute(PositionIncrementAttribute.class); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
40 ts.reset(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
41 int pos = -1; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
42 while( ts.incrementToken() ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
43 pos += posAttr.getPositionIncrement(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
44 pq.add( new Term(field,termAttr.toString()), pos ); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
45 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
46 ts.end(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
47 ts.close(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
48 } catch(IOException e) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
49 throw new RuntimeException(e); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
50 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
51 Term[] terms = pq.getTerms(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
52 if( terms.length==1 && pq.getPositions()[0]==0 ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
53 return new TermQuery(terms[0]); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
54 return pq; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
55 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
56 |
1459 | 57 @Override public Query getRangeQuery(GoodQueryParser qp,String field,String minQuery,String maxQuery,boolean includeMin,boolean includeMax) throws ParseException { |
1445 | 58 minQuery = minQuery.equals("*") ? null : escape(qp,minQuery); |
59 maxQuery = maxQuery.equals("*") ? null : escape(qp,maxQuery); | |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
60 return TermRangeQuery.newStringRange(field,minQuery,maxQuery,includeMin,includeMax); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
61 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
62 |
1459 | 63 static String escape(GoodQueryParser qp,String s) throws ParseException { |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
64 final char[] a = s.toCharArray(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
65 int i, n; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
66 if( a[0] == '"' ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
67 if( a[a.length-1] != '"' ) throw new RuntimeException(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
68 i = 1; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
69 n = a.length - 1; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
70 } else { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
71 i = 0; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
72 n = a.length; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
73 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
74 StringBuilder sb = new StringBuilder(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
75 for( ; i<n; i++ ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
76 char c = a[i]; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
77 if( c == '\\' ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
78 if( ++i == a.length ) |
1110
38a42f437fd2
queryparser now uses parsers.Parser
Franklin Schmidt <fschmidt@gmail.com>
parents:
775
diff
changeset
|
79 throw qp.exception("ends with '\\'"); |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
80 c = a[i]; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
81 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
82 sb.append(c); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
83 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
84 return sb.toString(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
85 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
86 |
1459 | 87 private static String wildcard(GoodQueryParser qp,String s) throws ParseException { |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
88 final char[] a = s.toCharArray(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
89 if( a[0] == '"' ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
90 return null; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
91 boolean hasWildcard = false; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
92 StringBuilder sb = new StringBuilder(); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
93 for( int i=0; i<a.length; i++ ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
94 char c = a[i]; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
95 if( c=='?' || c=='*' && i<a.length-1 ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
96 hasWildcard = true; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
97 if( c == '\\' ) { |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
98 if( ++i == a.length ) |
1110
38a42f437fd2
queryparser now uses parsers.Parser
Franklin Schmidt <fschmidt@gmail.com>
parents:
775
diff
changeset
|
99 throw qp.exception("ends with '\\'"); |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
100 c = a[i]; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
101 if( c=='?' || c=='*' ) |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
102 sb.append('\\'); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
103 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
104 sb.append(c); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
105 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
106 return hasWildcard ? sb.toString() : null; |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
107 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
108 |
1459 | 109 @Override public SortField getSortField(GoodQueryParser qp,String field,boolean reverse) { |
730
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
110 return new SortField( field, SortField.Type.STRING, reverse ); |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
111 } |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
112 |
01e68da6983b
add sane-lucene-queryparser source to luan
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
113 } |