comparison core/src/luan/modules/HtmlLuan.java @ 318:4fe6c9fed486

add html processing git-svn-id: https://luan-java.googlecode.com/svn/trunk@319 21e917c8-12df-6dd8-5cb6-c86387c605b9
author fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9>
date Thu, 05 Feb 2015 03:28:23 +0000
parents 2f8938fc518c
children 23b99a5039b5
comparison
equal deleted inserted replaced
317:c730ff1e4bae 318:4fe6c9fed486
1 package luan.modules; 1 package luan.modules;
2 2
3 import java.util.List;
4 import java.util.ArrayList;
5 import java.util.Arrays;
6 import java.util.Set;
7 import java.util.HashSet;
8 import java.util.Map;
3 import luan.Luan; 9 import luan.Luan;
4 import luan.LuanState; 10 import luan.LuanState;
5 import luan.LuanTable; 11 import luan.LuanTable;
6 import luan.LuanFunction;
7 import luan.LuanException; 12 import luan.LuanException;
8 13
9 14
10 public final class HtmlLuan { 15 public final class HtmlLuan {
11 16
12 public static String encode(String s) { 17 public static String encode(LuanState luan,String s) throws LuanException {
18 Utils.checkNotNull(luan,s);
13 char[] a = s.toCharArray(); 19 char[] a = s.toCharArray();
14 StringBuilder buf = new StringBuilder(); 20 StringBuilder buf = new StringBuilder();
15 for( int i=0; i<a.length; i++ ) { 21 for( int i=0; i<a.length; i++ ) {
16 char c = a[i]; 22 char c = a[i];
17 switch(c) { 23 switch(c) {
32 } 38 }
33 } 39 }
34 return buf.toString(); 40 return buf.toString();
35 } 41 }
36 42
43 /*
44 // public static final String TEXTAREA = "textarea";
45 public static final String SCRIPT = "script";
46 public static final String STYLE = "style";
47
48 public static Set<String> containerTags = new HashSet<String>(Arrays.asList(SCRIPT,STYLE));
49 */
50 public static LuanTable parse(LuanState luan,String text,LuanTable containerTagsTbl)
51 throws LuanException
52 {
53 Utils.checkNotNull(luan,text);
54 Utils.checkNotNull(luan,containerTagsTbl);
55 Set<String> containerTags = new HashSet<String>();
56 for( Object v : containerTagsTbl.asList() ) {
57 containerTags.add((String)v);
58 }
59 List<Object> html = new ArrayList<Object>();
60 int len = text.length();
61 int i = 0;
62 outer:
63 while( i < len ) {
64 int i2 = text.indexOf('<',i);
65 while( i2 != -1 && i2+1 < len ) {
66 char c = text.charAt(i2+1);
67 if( Character.isLetter(c) || c=='/' || c=='!' )
68 break;
69 i2 = text.indexOf('<',i2+1);
70 }
71 if( i2 == -1 ) {
72 html.add( text.substring(i) );
73 break;
74 }
75 if( i < i2 )
76 html.add( text.substring(i,i2) );
77 if( text.startsWith("<!--",i2) ) {
78 i = text.indexOf("-->",i2+4);
79 if( i == -1 ) {
80 html.add( text.substring(i2) );
81 break;
82 }
83 html.add( comment( text.substring(i2+4,i) ) );
84 i += 3;
85 } else if( text.startsWith("<![CDATA[",i2) ) {
86 i = text.indexOf("]]>",i2+9);
87 if( i == -1 ) {
88 html.add( text.substring(i2) );
89 break;
90 }
91 html.add( cdata( text.substring(i2+9,i) ) );
92 i += 3;
93 } else {
94 i = text.indexOf('>',i2);
95 if( i == -1 ) {
96 html.add( text.substring(i2) );
97 break;
98 }
99 String tagText = text.substring(i2+1,i);
100 try {
101 LuanTable tag = parseTag(tagText);
102 String tagName = (String)tag.get("name");
103 if( containerTags.contains(tagName) ) {
104 i2 = i;
105 String endTagName = '/' + tagName;
106 while(true) {
107 i2 = text.indexOf('<',i2+1);
108 if( i2 == -1 )
109 break;
110 int i3 = text.indexOf('>',i2);
111 if( i3 == -1 )
112 break;
113 int j = i2+1;
114 while( j<i3 && !Character.isWhitespace(text.charAt(j)) ) j++;
115 String s = text.substring(i2+1,j);
116 if( s.equalsIgnoreCase(endTagName) ) {
117 String text2 = text.substring(i+1,i2);
118 LuanTable textContainer = textContainer(tag,text2);
119 html.add( textContainer );
120 i = i3 + 1;
121 continue outer;
122 }
123 }
124 // logger.warn("unclosed "+tagName);
125 }
126 i += 1;
127 html.add( tag );
128 } catch(BadTag e) {
129 // logger.debug("bad tag",e);
130 i += 1;
131 // if( !removeBadTags ) {
132 html.add( "&lt;" );
133 html.add( encode(luan,tagText) );
134 html.add( "&gt;" );
135 // }
136 }
137 }
138 }
139 return Luan.newTable(html);
140 }
141
142 static LuanTable comment(String text) {
143 LuanTable tbl = Luan.newTable();
144 tbl.put("type","comment");
145 tbl.put("text",text);
146 return tbl;
147 }
148
149 static LuanTable cdata(String text) {
150 LuanTable tbl = Luan.newTable();
151 tbl.put("type","cdata");
152 tbl.put("text",text);
153 return tbl;
154 }
155
156 static LuanTable textContainer(LuanTable tag,String text) {
157 LuanTable tbl = Luan.newTable();
158 tbl.put("type","container");
159 tbl.put("tag",tag);
160 tbl.put("text",text);
161 return tbl;
162 }
163
164
165
166 static final class BadTag extends RuntimeException {
167 private BadTag(String msg) {
168 super(msg);
169 }
170 }
171
172 static LuanTable parseTag(String text) {
173 LuanTable tbl = Luan.newTable();
174 tbl.put("type","tag");
175 if( text.endsWith("/") ) {
176 text = text.substring(0,text.length()-1);
177 tbl.put("is_empty",true);
178 } else {
179 tbl.put("is_empty",false);
180 }
181 int len = text.length();
182 int i = 0;
183 int i2 = i;
184 if( i2<len && text.charAt(i2)=='/' )
185 i2++;
186 while( i2<len ) {
187 char c = text.charAt(i2);
188 if( Character.isWhitespace(c) )
189 break;
190 if( !( Character.isLetterOrDigit(c) || c=='_' || c=='.' || c=='-' || c==':' ) )
191 throw new BadTag("invalid tag name for <"+text+">");
192 i2++;
193 }
194 String name = text.substring(i,i2).toLowerCase();
195 tbl.put("name",name);
196 LuanTable attributes = Luan.newTable();
197 tbl.put("attributes",attributes);
198 i = i2;
199 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++;
200 while( i<len ) {
201 i2 = toEndName(text,i,len);
202 String attrName = unquote(text.substring(i,i2).toLowerCase());
203 if( attributes.get(attrName) != null )
204 throw new BadTag("duplicate attribute: "+attrName);
205 i = i2;
206 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++;
207 if( i<len && text.charAt(i) == '=' ) {
208 i++;
209 i2 = i;
210 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++;
211 i2 = toEndValue(text,i,len);
212 String attrValue = text.substring(i,i2);
213 if( attrValue.indexOf('<') != -1 || attrValue.indexOf('>') != -1 )
214 throw new BadTag("invalid attribute value: "+attrValue);
215 attrValue = unquote(attrValue);
216 attributes.put(attrName,attrValue);
217 i = i2;
218 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++;
219 } else {
220 attributes.put(attrName,true);
221 }
222 }
223 return tbl;
224 }
225
226 private static int toEndName(String text,int i,int len) {
227 if( i==len )
228 return i;
229 char c = text.charAt(i);
230 switch(c) {
231 case '"':
232 case '\'':
233 i = text.indexOf(c,i+1);
234 return i==-1 ? len : i+1;
235 default:
236 if( Character.isWhitespace(c) ) {
237 throw new RuntimeException("text="+text+" i="+i);
238 }
239 do {
240 i++;
241 } while( i<len && (c=text.charAt(i))!='=' && !Character.isWhitespace(c) );
242 return i;
243 }
244 }
245
246 private static int toEndValue(String text,int i,int len) {
247 if( i==len )
248 return i;
249 char c = text.charAt(i);
250 switch(c) {
251 case '"':
252 case '\'':
253 i = text.indexOf(c,i+1);
254 return i==-1 ? len : i+1;
255 default:
256 if( Character.isWhitespace(c) ) {
257 throw new RuntimeException("text="+text+" i="+i);
258 }
259 do {
260 i++;
261 } while( i<len && !Character.isWhitespace(text.charAt(i)) );
262 return i;
263 }
264 }
265
266 public static String unquote(String s) {
267 if( s==null || s.length()<=1 )
268 return s;
269 char c = s.charAt(0);
270 return (c=='"' || c=='\'') && s.charAt(s.length()-1)==c
271 ? s.substring(1,s.length()-1) : s;
272 }
273
274
275
276
277 public static String to_string(LuanState luan,LuanTable tbl) throws LuanException {
278 List<Object> html = tbl.asList();
279 StringBuilder buf = new StringBuilder();
280 for( Object o : html ) {
281 if( o instanceof String ) {
282 buf.append( o );
283 } else if( o instanceof LuanTable ) {
284 LuanTable t = (LuanTable)o;
285 String type = (String)t.get("type");
286 if( type==null )
287 throw luan.exception( "no type in element of table for 'Html.to_string'" );
288 if( type.equals("comment") ) {
289 buf.append( "<!--" ).append( t.get("text") ).append( "-->" );
290 } else if( type.equals("cdata") ) {
291 buf.append( "<![CDATA[" ).append( t.get("text") ).append( "]]" );
292 } else if( type.equals("tag") ) {
293 buf.append( tagToString(t) );
294 } else if( type.equals("container") ) {
295 LuanTable tag = (LuanTable)t.get("tag");
296 buf.append( tagToString(tag) );
297 buf.append( t.get("text") );
298 buf.append( "</" ).append( tag.get("name") ).append( ">" );
299 } else {
300 throw luan.exception( "invalid element type for 'Html.to_string'" );
301 }
302 } else
303 throw luan.exception( "invalid value ("+Luan.type(o)+") in table for 'Html.to_string'" );
304 }
305 return buf.toString();
306 }
307
308 private static String tagToString(LuanTable tbl) {
309 StringBuilder buf = new StringBuilder();
310 buf.append('<');
311 buf.append(tbl.get("name"));
312 LuanTable attributes = (LuanTable)tbl.get("attributes");
313 for( Map.Entry<Object,Object> attr : attributes ) {
314 buf.append( ' ' );
315 buf.append( attr.getKey() );
316 Object val = attr.getValue();
317 if( !val.equals(Boolean.TRUE) ) {
318 buf.append( '=' );
319 buf.append( quote((String)val) );
320 }
321 }
322 if( tbl.get("is_empty").equals(Boolean.TRUE) )
323 buf.append('/');
324 buf.append('>');
325 return buf.toString();
326 }
327
328 public static String quote(String s) {
329 StringBuilder buf = new StringBuilder();
330 buf.append('"');
331 int i = 0;
332 while(true) {
333 int i2 = s.indexOf('"',i);
334 if( i2 == -1 ) {
335 buf.append(s.substring(i));
336 break;
337 } else {
338 buf.append(s.substring(i,i2));
339 buf.append("&quot;");
340 i = i2 + 1;
341 }
342 }
343 buf.append('"');
344 return buf.toString();
345 }
346
37 } 347 }