comparison src/goodjava/html/Html.java @ 1712:36c28be6d432

improve html and bbcode
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 14 Jul 2022 22:14:21 -0600
parents
children 31a82b0d0a87
comparison
equal deleted inserted replaced
1711:05d14db623b6 1712:36c28be6d432
1 package goodjava.html;
2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.Set;
6 import java.util.HashSet;
7 import java.util.Map;
8 import java.util.HashMap;
9 import java.util.Collections;
10 import java.util.regex.Pattern;
11 import java.util.regex.Matcher;
12 import goodjava.parser.Parser;
13
14
15 public final class Html {
16
17 private static final Pattern entityPtn = Pattern.compile(
18 "&(#?[0-9a-zA-Z]+;)"
19 );
20
21 public static String encode(String s) {
22 //s = s.replace("&","&amp;");
23 s = entityPtn.matcher(s).replaceAll("&amp;$1");
24 s = s.replace("<","&lt;");
25 s = s.replace(">","&gt;");
26 s = s.replace("\"","&quot;");
27 return s;
28 }
29
30 private static final Pattern entityNumPtn = Pattern.compile(
31 "&#(\\d+);"
32 );
33
34 public static String decode(String s) {
35 Matcher m = entityNumPtn.matcher(s);
36 if( m.find() ) {
37 StringBuffer buf = new StringBuffer();
38 do {
39 String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))});
40 m.appendReplacement(buf,entity);
41 } while( m.find() );
42 m.appendTail(buf);
43 s = buf.toString();
44 }
45 s = s.replace("&nbsp;"," ");
46 s = s.replace("&quot;","\"");
47 s = s.replace("&gt;",">");
48 s = s.replace("&lt;","<");
49 s = s.replace("&amp;","&");
50 return s;
51 }
52
53
54 public static final class Comment {
55 public final String text;
56
57 private Comment(String text) {
58 this.text = text;
59 }
60 }
61
62 public static final class CData {
63 public final String text;
64
65 private CData(String text) {
66 this.text = text;
67 }
68 }
69
70 public static final class Tag {
71 public final String name;
72 public final Map<String,Object> attributes;
73 public final boolean isEmpty;
74 public final String raw;
75
76 private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw) {
77 this.name = name;
78 this.attributes = attributes;
79 this.isEmpty = isEmpty;
80 this.raw = raw;
81 }
82 }
83
84 public static final class Container {
85 public final Tag tag;
86 public final String text;
87
88 private Container(Tag tag,String text) {
89 this.tag = tag;
90 this.text = text;
91 }
92 }
93
94 private static Set<String> defaultContainerTags = new HashSet<String>();
95 static {
96 Collections.addAll( defaultContainerTags, "script", "style", "textarea" );
97 }
98
99 public static List parse(String text) {
100 return parse(text,defaultContainerTags);
101 }
102
103 public static List parse(String text,Set<String> containerTags) {
104 return new Html(text,containerTags).parse();
105 }
106
107 private final Parser parser;
108 private final Set<String> containerTags;
109
110 private Html(String text,Set<String> containerTags) {
111 this.parser = new Parser(text);
112 this.containerTags = containerTags;
113 }
114
115 private List parse() {
116 List list = new ArrayList();
117 StringBuilder sb = new StringBuilder();
118 while( !parser.endOfInput() ) {
119 if( parser.test('<') ) {
120 Tag tag = parseTag();
121 if( tag != null ) {
122 Object el = tag;
123 String tagName = tag.name;
124 if( containerTags.contains(tagName) ) {
125 Container container = parseContainer(tag);
126 if( container != null )
127 el = container;
128 }
129 if( el != null
130 || (el = parseComment()) != null
131 || (el = parseCdata()) != null
132 ) {
133 add(list,sb);
134 list.add(el);
135 continue;
136 }
137 }
138 }
139 sb.append( parser.currentChar() );
140 parser.anyChar();
141 }
142 add(list,sb);
143 return list;
144 }
145
146 private static void add(List list,StringBuilder sb) {
147 if( sb.length() > 0 ) {
148 list.add(decode(sb.toString()));
149 sb.setLength(0);
150 }
151 }
152
153 private Comment parseComment() {
154 parser.begin();
155 if( !parser.match("<!--") )
156 return parser.failure(null);
157 int start = parser.currentIndex();
158 while( !parser.test("-->") ) {
159 if( !parser.anyChar() )
160 return parser.failure(null);
161 }
162 String text = parser.textFrom(start);
163 Comment comment = new Comment(text);
164 return parser.success(comment);
165 }
166
167 private CData parseCdata() {
168 parser.begin();
169 if( !parser.match("<![CDATA[") )
170 return parser.failure(null);
171 int start = parser.currentIndex();
172 while( !parser.test("]]>") ) {
173 if( !parser.anyChar() )
174 return parser.failure(null);
175 }
176 String text = parser.textFrom(start);
177 CData cdata = new CData(text);
178 return parser.success(cdata);
179 }
180
181 private Container parseContainer(Tag tag) {
182 String endTagName = '/' + tag.name;
183 int start = parser.begin();
184 int end;
185 while(true) {
186 if( parser.test('<') ) {
187 end = parser.currentIndex();
188 Tag tag2 = parseTag();
189 if( tag2.name.equals(endTagName) )
190 break;
191 }
192 if( !parser.anyChar() )
193 return parser.failure(null);
194 }
195 String text = parser.text.substring(start,end);
196 Container container = new Container(tag,text);
197 return parser.success(container);
198 }
199
200 private Tag parseTag() {
201 int tagStart = parser.begin();
202 if( !parser.match('<') )
203 return parser.failure(null);
204 int start = parser.currentIndex();
205 parser.match('/');
206 if( !matchNameChar() )
207 return parser.failure(null);
208 while( matchNameChar() );
209 String name = parser.textFrom(start).toLowerCase();
210 Map<String,Object> attributes = new HashMap<String,Object>();
211 String attrName;
212 while( (attrName = parseAttrName()) != null ) {
213 String attrValue = parseAttrValue();
214 attributes.put( attrName, attrValue!=null ? attrValue : true );
215 /*
216 if( attrName.equals("style") && attrValue!=null ) {
217 LuanTable style = Css.style(attrValue);
218 if( style!=null )
219 tbl.rawPut("style",style);
220 }
221 */
222 }
223 while( matchSpace() );
224 boolean isEmpty = parser.match('/');
225 if( !parser.match('>') )
226 return parser.failure(null);
227 String raw = parser.textFrom(tagStart);
228 Tag tag = new Tag(name,attributes,isEmpty,raw);
229 return parser.success(tag);
230 }
231
232 private String parseAttrName() {
233 parser.begin();
234 if( !matchSpace() )
235 return parser.failure(null);
236 while( matchSpace() );
237 int start = parser.currentIndex();
238 if( !matchNameChar() )
239 return parser.failure(null);
240 while( matchNameChar() );
241 String name = parser.textFrom(start).toLowerCase();
242 return parser.success(name);
243 }
244
245 private String parseAttrValue() {
246 parser.begin();
247 while( matchSpace() );
248 if( !parser.match('=') )
249 return parser.failure(null);
250 while( matchSpace() );
251 if( parser.anyOf("\"'") ) {
252 char quote = parser.lastChar();
253 int start = parser.currentIndex();
254 while( !parser.test(quote) ) {
255 if( !parser.anyChar() )
256 return parser.failure(null);
257 }
258 String value = parser.textFrom(start);
259 parser.match(quote);
260 value = decode(value);
261 return parser.success(value);
262 }
263 int start = parser.currentIndex();
264 if( !matchValueChar() )
265 return parser.failure(null);
266 while( matchValueChar() );
267 String value = parser.textFrom(start);
268 value = decode(value);
269 return parser.success(value);
270 }
271
272 private boolean matchNameChar() {
273 return parser.inCharRange('a','z')
274 || parser.inCharRange('A','Z')
275 || parser.inCharRange('0','9')
276 || parser.anyOf("_.-:")
277 ;
278 }
279
280 private boolean matchValueChar() {
281 return parser.noneOf(" \t\r\n\"'>/=");
282 }
283
284 private boolean matchSpace() {
285 return parser.anyOf(" \t\r\n");
286 }
287
288 }