Mercurial Hosting > luan
comparison src/goodjava/html/Html.java @ 1712:36c28be6d432
improve html and bbcode
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 14 Jul 2022 22:14:21 -0600 |
parents | |
children | 31a82b0d0a87 |
comparison
equal
deleted
inserted
replaced
1711:05d14db623b6 | 1712:36c28be6d432 |
---|---|
1 package goodjava.html; | |
2 | |
3 import java.util.List; | |
4 import java.util.ArrayList; | |
5 import java.util.Set; | |
6 import java.util.HashSet; | |
7 import java.util.Map; | |
8 import java.util.HashMap; | |
9 import java.util.Collections; | |
10 import java.util.regex.Pattern; | |
11 import java.util.regex.Matcher; | |
12 import goodjava.parser.Parser; | |
13 | |
14 | |
15 public final class Html { | |
16 | |
17 private static final Pattern entityPtn = Pattern.compile( | |
18 "&(#?[0-9a-zA-Z]+;)" | |
19 ); | |
20 | |
21 public static String encode(String s) { | |
22 //s = s.replace("&","&"); | |
23 s = entityPtn.matcher(s).replaceAll("&$1"); | |
24 s = s.replace("<","<"); | |
25 s = s.replace(">",">"); | |
26 s = s.replace("\"","""); | |
27 return s; | |
28 } | |
29 | |
30 private static final Pattern entityNumPtn = Pattern.compile( | |
31 "&#(\\d+);" | |
32 ); | |
33 | |
34 public static String decode(String s) { | |
35 Matcher m = entityNumPtn.matcher(s); | |
36 if( m.find() ) { | |
37 StringBuffer buf = new StringBuffer(); | |
38 do { | |
39 String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))}); | |
40 m.appendReplacement(buf,entity); | |
41 } while( m.find() ); | |
42 m.appendTail(buf); | |
43 s = buf.toString(); | |
44 } | |
45 s = s.replace(" "," "); | |
46 s = s.replace(""","\""); | |
47 s = s.replace(">",">"); | |
48 s = s.replace("<","<"); | |
49 s = s.replace("&","&"); | |
50 return s; | |
51 } | |
52 | |
53 | |
54 public static final class Comment { | |
55 public final String text; | |
56 | |
57 private Comment(String text) { | |
58 this.text = text; | |
59 } | |
60 } | |
61 | |
62 public static final class CData { | |
63 public final String text; | |
64 | |
65 private CData(String text) { | |
66 this.text = text; | |
67 } | |
68 } | |
69 | |
70 public static final class Tag { | |
71 public final String name; | |
72 public final Map<String,Object> attributes; | |
73 public final boolean isEmpty; | |
74 public final String raw; | |
75 | |
76 private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw) { | |
77 this.name = name; | |
78 this.attributes = attributes; | |
79 this.isEmpty = isEmpty; | |
80 this.raw = raw; | |
81 } | |
82 } | |
83 | |
84 public static final class Container { | |
85 public final Tag tag; | |
86 public final String text; | |
87 | |
88 private Container(Tag tag,String text) { | |
89 this.tag = tag; | |
90 this.text = text; | |
91 } | |
92 } | |
93 | |
94 private static Set<String> defaultContainerTags = new HashSet<String>(); | |
95 static { | |
96 Collections.addAll( defaultContainerTags, "script", "style", "textarea" ); | |
97 } | |
98 | |
99 public static List parse(String text) { | |
100 return parse(text,defaultContainerTags); | |
101 } | |
102 | |
103 public static List parse(String text,Set<String> containerTags) { | |
104 return new Html(text,containerTags).parse(); | |
105 } | |
106 | |
107 private final Parser parser; | |
108 private final Set<String> containerTags; | |
109 | |
110 private Html(String text,Set<String> containerTags) { | |
111 this.parser = new Parser(text); | |
112 this.containerTags = containerTags; | |
113 } | |
114 | |
115 private List parse() { | |
116 List list = new ArrayList(); | |
117 StringBuilder sb = new StringBuilder(); | |
118 while( !parser.endOfInput() ) { | |
119 if( parser.test('<') ) { | |
120 Tag tag = parseTag(); | |
121 if( tag != null ) { | |
122 Object el = tag; | |
123 String tagName = tag.name; | |
124 if( containerTags.contains(tagName) ) { | |
125 Container container = parseContainer(tag); | |
126 if( container != null ) | |
127 el = container; | |
128 } | |
129 if( el != null | |
130 || (el = parseComment()) != null | |
131 || (el = parseCdata()) != null | |
132 ) { | |
133 add(list,sb); | |
134 list.add(el); | |
135 continue; | |
136 } | |
137 } | |
138 } | |
139 sb.append( parser.currentChar() ); | |
140 parser.anyChar(); | |
141 } | |
142 add(list,sb); | |
143 return list; | |
144 } | |
145 | |
146 private static void add(List list,StringBuilder sb) { | |
147 if( sb.length() > 0 ) { | |
148 list.add(decode(sb.toString())); | |
149 sb.setLength(0); | |
150 } | |
151 } | |
152 | |
153 private Comment parseComment() { | |
154 parser.begin(); | |
155 if( !parser.match("<!--") ) | |
156 return parser.failure(null); | |
157 int start = parser.currentIndex(); | |
158 while( !parser.test("-->") ) { | |
159 if( !parser.anyChar() ) | |
160 return parser.failure(null); | |
161 } | |
162 String text = parser.textFrom(start); | |
163 Comment comment = new Comment(text); | |
164 return parser.success(comment); | |
165 } | |
166 | |
167 private CData parseCdata() { | |
168 parser.begin(); | |
169 if( !parser.match("<![CDATA[") ) | |
170 return parser.failure(null); | |
171 int start = parser.currentIndex(); | |
172 while( !parser.test("]]>") ) { | |
173 if( !parser.anyChar() ) | |
174 return parser.failure(null); | |
175 } | |
176 String text = parser.textFrom(start); | |
177 CData cdata = new CData(text); | |
178 return parser.success(cdata); | |
179 } | |
180 | |
181 private Container parseContainer(Tag tag) { | |
182 String endTagName = '/' + tag.name; | |
183 int start = parser.begin(); | |
184 int end; | |
185 while(true) { | |
186 if( parser.test('<') ) { | |
187 end = parser.currentIndex(); | |
188 Tag tag2 = parseTag(); | |
189 if( tag2.name.equals(endTagName) ) | |
190 break; | |
191 } | |
192 if( !parser.anyChar() ) | |
193 return parser.failure(null); | |
194 } | |
195 String text = parser.text.substring(start,end); | |
196 Container container = new Container(tag,text); | |
197 return parser.success(container); | |
198 } | |
199 | |
200 private Tag parseTag() { | |
201 int tagStart = parser.begin(); | |
202 if( !parser.match('<') ) | |
203 return parser.failure(null); | |
204 int start = parser.currentIndex(); | |
205 parser.match('/'); | |
206 if( !matchNameChar() ) | |
207 return parser.failure(null); | |
208 while( matchNameChar() ); | |
209 String name = parser.textFrom(start).toLowerCase(); | |
210 Map<String,Object> attributes = new HashMap<String,Object>(); | |
211 String attrName; | |
212 while( (attrName = parseAttrName()) != null ) { | |
213 String attrValue = parseAttrValue(); | |
214 attributes.put( attrName, attrValue!=null ? attrValue : true ); | |
215 /* | |
216 if( attrName.equals("style") && attrValue!=null ) { | |
217 LuanTable style = Css.style(attrValue); | |
218 if( style!=null ) | |
219 tbl.rawPut("style",style); | |
220 } | |
221 */ | |
222 } | |
223 while( matchSpace() ); | |
224 boolean isEmpty = parser.match('/'); | |
225 if( !parser.match('>') ) | |
226 return parser.failure(null); | |
227 String raw = parser.textFrom(tagStart); | |
228 Tag tag = new Tag(name,attributes,isEmpty,raw); | |
229 return parser.success(tag); | |
230 } | |
231 | |
232 private String parseAttrName() { | |
233 parser.begin(); | |
234 if( !matchSpace() ) | |
235 return parser.failure(null); | |
236 while( matchSpace() ); | |
237 int start = parser.currentIndex(); | |
238 if( !matchNameChar() ) | |
239 return parser.failure(null); | |
240 while( matchNameChar() ); | |
241 String name = parser.textFrom(start).toLowerCase(); | |
242 return parser.success(name); | |
243 } | |
244 | |
245 private String parseAttrValue() { | |
246 parser.begin(); | |
247 while( matchSpace() ); | |
248 if( !parser.match('=') ) | |
249 return parser.failure(null); | |
250 while( matchSpace() ); | |
251 if( parser.anyOf("\"'") ) { | |
252 char quote = parser.lastChar(); | |
253 int start = parser.currentIndex(); | |
254 while( !parser.test(quote) ) { | |
255 if( !parser.anyChar() ) | |
256 return parser.failure(null); | |
257 } | |
258 String value = parser.textFrom(start); | |
259 parser.match(quote); | |
260 value = decode(value); | |
261 return parser.success(value); | |
262 } | |
263 int start = parser.currentIndex(); | |
264 if( !matchValueChar() ) | |
265 return parser.failure(null); | |
266 while( matchValueChar() ); | |
267 String value = parser.textFrom(start); | |
268 value = decode(value); | |
269 return parser.success(value); | |
270 } | |
271 | |
272 private boolean matchNameChar() { | |
273 return parser.inCharRange('a','z') | |
274 || parser.inCharRange('A','Z') | |
275 || parser.inCharRange('0','9') | |
276 || parser.anyOf("_.-:") | |
277 ; | |
278 } | |
279 | |
280 private boolean matchValueChar() { | |
281 return parser.noneOf(" \t\r\n\"'>/="); | |
282 } | |
283 | |
284 private boolean matchSpace() { | |
285 return parser.anyOf(" \t\r\n"); | |
286 } | |
287 | |
288 } |