annotate src/goodjava/html/Html.java @ 1827:50e570b598b2

security
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 15 Sep 2024 10:36:46 -0600
parents a045f30fa67d
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 package goodjava.html;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 import java.util.List;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 import java.util.ArrayList;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 import java.util.Set;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 import java.util.HashSet;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 import java.util.Map;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 import java.util.HashMap;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 import java.util.Collections;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10 import java.util.regex.Pattern;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 import java.util.regex.Matcher;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 import goodjava.parser.Parser;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 public final class Html {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17 private static final Pattern entityPtn = Pattern.compile(
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18 "&(#?[0-9a-zA-Z]+;)"
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19 );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 public static String encode(String s) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22 //s = s.replace("&","&amp;");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23 s = entityPtn.matcher(s).replaceAll("&amp;$1");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 s = s.replace("<","&lt;");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 s = s.replace(">","&gt;");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26 s = s.replace("\"","&quot;");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27 return s;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 private static final Pattern entityNumPtn = Pattern.compile(
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 "&#(\\d+);"
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34 public static String decode(String s) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 Matcher m = entityNumPtn.matcher(s);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 if( m.find() ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 StringBuffer buf = new StringBuffer();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38 do {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))});
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40 m.appendReplacement(buf,entity);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 } while( m.find() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 m.appendTail(buf);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 s = buf.toString();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 s = s.replace("&nbsp;"," ");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46 s = s.replace("&quot;","\"");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 s = s.replace("&gt;",">");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 s = s.replace("&lt;","<");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 s = s.replace("&amp;","&");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 return s;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 public static final class Comment {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 public final String text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 private Comment(String text) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
58 this.text = text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
59 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62 public static final class CData {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
63 public final String text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65 private CData(String text) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 this.text = text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 public static final class Tag {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 public final String name;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72 public final Map<String,Object> attributes;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 public final boolean isEmpty;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74 public final String raw;
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
75 public final Map<String,String> style;
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw) {
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
78 this(name,attributes,isEmpty,raw,null);
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
79 }
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
80
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
81 private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw,Map<String,String> style) {
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 this.name = name;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83 this.attributes = attributes;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 this.isEmpty = isEmpty;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85 this.raw = raw;
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
86 this.style = style;
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
87 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 public static final class Container {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 public final Tag tag;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 public final String text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 private Container(Tag tag,String text) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 this.tag = tag;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 this.text = text;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
99
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 private static Set<String> defaultContainerTags = new HashSet<String>();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 static {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102 Collections.addAll( defaultContainerTags, "script", "style", "textarea" );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 public static List parse(String text) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 return parse(text,defaultContainerTags);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
109 public static List parse(String text,Set<String> containerTags) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 return new Html(text,containerTags).parse();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
113 private final Parser parser;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
114 private final Set<String> containerTags;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
115
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
116 private Html(String text,Set<String> containerTags) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
117 this.parser = new Parser(text);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
118 this.containerTags = containerTags;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
119 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
120
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
121 private List parse() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
122 List list = new ArrayList();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
123 StringBuilder sb = new StringBuilder();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
124 while( !parser.endOfInput() ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
125 if( parser.test('<') ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
126 Tag tag = parseTag();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
127 if( tag != null ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
128 Object el = tag;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
129 String tagName = tag.name;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
130 if( containerTags.contains(tagName) ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
131 Container container = parseContainer(tag);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
132 if( container != null )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
133 el = container;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
134 }
1800
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
135 add(list,sb);
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
136 list.add(el);
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
137 continue;
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
138 } else {
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
139 Object el = parseComment();
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
140 if( el == null )
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
141 el = parseCdata();
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
142 if( el != null ) {
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
143 add(list,sb);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
144 list.add(el);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
145 continue;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
146 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
147 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
148 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
149 sb.append( parser.currentChar() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
150 parser.anyChar();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
151 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
152 add(list,sb);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
153 return list;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
154 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
155
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
156 private static void add(List list,StringBuilder sb) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
157 if( sb.length() > 0 ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
158 list.add(decode(sb.toString()));
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
159 sb.setLength(0);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
160 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
161 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
162
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
163 private Comment parseComment() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
164 parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
165 if( !parser.match("<!--") )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
166 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
167 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
168 while( !parser.test("-->") ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
169 if( !parser.anyChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
170 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
171 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
172 String text = parser.textFrom(start);
1800
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
173 parser.match("-->");
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
174 Comment comment = new Comment(text);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
175 return parser.success(comment);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
176 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
177
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
178 private CData parseCdata() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
179 parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
180 if( !parser.match("<![CDATA[") )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
181 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
182 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
183 while( !parser.test("]]>") ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
184 if( !parser.anyChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
185 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
186 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
187 String text = parser.textFrom(start);
1800
a045f30fa67d html fix
Franklin Schmidt <fschmidt@gmail.com>
parents: 1714
diff changeset
188 parser.match("]]>");
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
189 CData cdata = new CData(text);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
190 return parser.success(cdata);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
191 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
192
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
193 private Container parseContainer(Tag tag) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
194 String endTagName = '/' + tag.name;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
195 int start = parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
196 int end;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
197 while(true) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
198 if( parser.test('<') ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
199 end = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
200 Tag tag2 = parseTag();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
201 if( tag2.name.equals(endTagName) )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
202 break;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
203 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
204 if( !parser.anyChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
205 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
206 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
207 String text = parser.text.substring(start,end);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
208 Container container = new Container(tag,text);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
209 return parser.success(container);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
210 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
211
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
212 private Tag parseTag() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
213 int tagStart = parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
214 if( !parser.match('<') )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
215 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
216 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
217 parser.match('/');
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
218 if( !matchNameChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
219 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
220 while( matchNameChar() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
221 String name = parser.textFrom(start).toLowerCase();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
222 Map<String,Object> attributes = new HashMap<String,Object>();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
223 String attrName;
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
224 Map<String,String> style = null;
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
225 while( (attrName = parseAttrName()) != null ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
226 String attrValue = parseAttrValue();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
227 attributes.put( attrName, attrValue!=null ? attrValue : true );
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
228 if( attrName.equals("style") && attrValue!=null && style==null ) {
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
229 style = Css.style(attrValue);
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
230 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
231 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
232 while( matchSpace() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
233 boolean isEmpty = parser.match('/');
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
234 if( !parser.match('>') )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
235 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
236 String raw = parser.textFrom(tagStart);
1714
31a82b0d0a87 bbcode and html work
Franklin Schmidt <fschmidt@gmail.com>
parents: 1712
diff changeset
237 Tag tag = new Tag(name,attributes,isEmpty,raw,style);
1712
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
238 return parser.success(tag);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
239 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
240
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
241 private String parseAttrName() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
242 parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
243 if( !matchSpace() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
244 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
245 while( matchSpace() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
246 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
247 if( !matchNameChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
248 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
249 while( matchNameChar() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
250 String name = parser.textFrom(start).toLowerCase();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
251 return parser.success(name);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
252 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
253
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
254 private String parseAttrValue() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
255 parser.begin();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
256 while( matchSpace() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
257 if( !parser.match('=') )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
258 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
259 while( matchSpace() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
260 if( parser.anyOf("\"'") ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
261 char quote = parser.lastChar();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
262 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
263 while( !parser.test(quote) ) {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
264 if( !parser.anyChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
265 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
266 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
267 String value = parser.textFrom(start);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
268 parser.match(quote);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
269 value = decode(value);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
270 return parser.success(value);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
271 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
272 int start = parser.currentIndex();
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
273 if( !matchValueChar() )
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
274 return parser.failure(null);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
275 while( matchValueChar() );
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
276 String value = parser.textFrom(start);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
277 value = decode(value);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
278 return parser.success(value);
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
279 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
280
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
281 private boolean matchNameChar() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
282 return parser.inCharRange('a','z')
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
283 || parser.inCharRange('A','Z')
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
284 || parser.inCharRange('0','9')
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
285 || parser.anyOf("_.-:")
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
286 ;
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
287 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
288
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
289 private boolean matchValueChar() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
290 return parser.noneOf(" \t\r\n\"'>/=");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
291 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
292
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
293 private boolean matchSpace() {
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
294 return parser.anyOf(" \t\r\n");
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
295 }
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
296
36c28be6d432 improve html and bbcode
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
297 }