annotate src/luan/modules/HtmlLuan.java @ 1341:a015a0b5c388

add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 19 Feb 2019 08:14:40 -0700
parents 25746915a241
children 21f5edab1fbf
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
167
4c0131c2b650 merge luan/lib into modules
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 142
diff changeset
1 package luan.modules;
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
2
318
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
3 import java.util.List;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
4 import java.util.ArrayList;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
5 import java.util.Arrays;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
6 import java.util.Set;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
7 import java.util.HashSet;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
8 import java.util.Map;
1341
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
9 import java.util.regex.Pattern;
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
10 import java.util.regex.Matcher;
221
ec016471c6eb make LuanTable an interface
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 171
diff changeset
11 import luan.Luan;
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
12 import luan.LuanTable;
124
f537ff5e511d minor cleanup
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 112
diff changeset
13 import luan.LuanException;
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
14
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
15
168
ebe9db183eb7 rename *Lib.java to *Luan.java
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 167
diff changeset
16 public final class HtmlLuan {
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
17
646
cdc70de628b5 simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents: 625
diff changeset
18 public static String encode(String s) throws LuanException {
cdc70de628b5 simplify LuanException
Franklin Schmidt <fschmidt@gmail.com>
parents: 625
diff changeset
19 Utils.checkNotNull(s);
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
20 char[] a = s.toCharArray();
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
21 StringBuilder buf = new StringBuilder();
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
22 for( int i=0; i<a.length; i++ ) {
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
23 char c = a[i];
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
24 switch(c) {
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
25 case '&':
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
26 buf.append("&amp;");
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
27 break;
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
28 case '<':
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
29 buf.append("&lt;");
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
30 break;
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
31 case '>':
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
32 buf.append("&gt;");
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
33 break;
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
34 case '"':
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
35 buf.append("&quot;");
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
36 break;
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
37 default:
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
38 buf.append(c);
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
39 }
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
40 }
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
41 return buf.toString();
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
42 }
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
43
1341
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
44 private static final Pattern entityPtn = Pattern.compile(
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
45 "&#(\\d+);"
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
46 );
318
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
47
1341
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
48 public static String decode(String s) {
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
49 StringBuffer buf = new StringBuffer();
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
50 Matcher m = entityPtn.matcher(s);
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
51 while( m.find() ) {
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
52 String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))});
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
53 m.appendReplacement(buf,entity);
318
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
54 }
1341
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
55 m.appendTail(buf);
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
56 s = buf.toString();
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
57 s = s.replace("&nbsp;"," ");
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
58 s = s.replace("&quot;","\"");
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
59 s = s.replace("&gt;",">");
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
60 s = s.replace("&lt;","<");
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
61 s = s.replace("&amp;","&");
a015a0b5c388 add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
Franklin Schmidt <fschmidt@gmail.com>
parents: 1333
diff changeset
62 return s;
318
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
63 }
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
64
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
65 public static String quote(String s) {
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
66 StringBuilder buf = new StringBuilder();
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
67 buf.append('"');
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
68 int i = 0;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
69 while(true) {
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
70 int i2 = s.indexOf('"',i);
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
71 if( i2 == -1 ) {
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
72 buf.append(s.substring(i));
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
73 break;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
74 } else {
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
75 buf.append(s.substring(i,i2));
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
76 buf.append("&quot;");
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
77 i = i2 + 1;
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
78 }
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
79 }
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
80 buf.append('"');
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
81 return buf.toString();
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
82 }
4fe6c9fed486 add html processing
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents: 302
diff changeset
83
49
8ede219cd111 add WebShell
fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
parents:
diff changeset
84 }