Mercurial Hosting > luan
comparison src/luan/modules/HtmlLuan.java @ 1341:a015a0b5c388
add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 19 Feb 2019 08:14:40 -0700 |
parents | 25746915a241 |
children | 21f5edab1fbf |
comparison
equal
deleted
inserted
replaced
1340:b3c4fcf29a53 | 1341:a015a0b5c388 |
---|---|
4 import java.util.ArrayList; | 4 import java.util.ArrayList; |
5 import java.util.Arrays; | 5 import java.util.Arrays; |
6 import java.util.Set; | 6 import java.util.Set; |
7 import java.util.HashSet; | 7 import java.util.HashSet; |
8 import java.util.Map; | 8 import java.util.Map; |
9 import java.util.regex.Pattern; | |
10 import java.util.regex.Matcher; | |
9 import luan.Luan; | 11 import luan.Luan; |
10 import luan.LuanTable; | 12 import luan.LuanTable; |
11 import luan.LuanException; | 13 import luan.LuanException; |
12 | 14 |
13 | 15 |
37 } | 39 } |
38 } | 40 } |
39 return buf.toString(); | 41 return buf.toString(); |
40 } | 42 } |
41 | 43 |
42 /* | 44 private static final Pattern entityPtn = Pattern.compile( |
43 // public static final String TEXTAREA = "textarea"; | 45 "&#(\\d+);" |
44 public static final String SCRIPT = "script"; | 46 ); |
45 public static final String STYLE = "style"; | |
46 | 47 |
47 public static Set<String> containerTags = new HashSet<String>(Arrays.asList(SCRIPT,STYLE)); | 48 public static String decode(String s) { |
48 */ | 49 StringBuffer buf = new StringBuffer(); |
49 /* | 50 Matcher m = entityPtn.matcher(s); |
50 public static LuanTable parse(Luan luan,String text,LuanTable containerTagsTbl) | 51 while( m.find() ) { |
51 throws LuanException | 52 String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))}); |
52 { | 53 m.appendReplacement(buf,entity); |
53 Utils.checkNotNull(luan,text); | |
54 Utils.checkNotNull(luan,containerTagsTbl); | |
55 Set<String> containerTags = new HashSet<String>(); | |
56 for( Object v : containerTagsTbl.asList() ) { | |
57 containerTags.add((String)v); | |
58 } | 54 } |
59 List<Object> html = new ArrayList<Object>(); | 55 m.appendTail(buf); |
60 int len = text.length(); | 56 s = buf.toString(); |
61 int i = 0; | 57 s = s.replace(" "," "); |
62 outer: | 58 s = s.replace(""","\""); |
63 while( i < len ) { | 59 s = s.replace(">",">"); |
64 int i2 = text.indexOf('<',i); | 60 s = s.replace("<","<"); |
65 while( i2 != -1 && i2+1 < len ) { | 61 s = s.replace("&","&"); |
66 char c = text.charAt(i2+1); | 62 return s; |
67 if( Character.isLetter(c) || c=='/' || c=='!' ) | |
68 break; | |
69 i2 = text.indexOf('<',i2+1); | |
70 } | |
71 if( i2 == -1 ) { | |
72 html.add( text.substring(i) ); | |
73 break; | |
74 } | |
75 if( i < i2 ) | |
76 html.add( text.substring(i,i2) ); | |
77 if( text.startsWith("<!--",i2) ) { | |
78 i = text.indexOf("-->",i2+4); | |
79 if( i == -1 ) { | |
80 html.add( text.substring(i2) ); | |
81 break; | |
82 } | |
83 html.add( comment( text.substring(i2+4,i) ) ); | |
84 i += 3; | |
85 } else if( text.startsWith("<![CDATA[",i2) ) { | |
86 i = text.indexOf("]]>",i2+9); | |
87 if( i == -1 ) { | |
88 html.add( text.substring(i2) ); | |
89 break; | |
90 } | |
91 html.add( cdata( text.substring(i2+9,i) ) ); | |
92 i += 3; | |
93 } else { | |
94 i = text.indexOf('>',i2); | |
95 if( i == -1 ) { | |
96 html.add( text.substring(i2) ); | |
97 break; | |
98 } | |
99 String tagText = text.substring(i2+1,i); | |
100 try { | |
101 LuanTable tag = parseTag(tagText); | |
102 String tagName = (String)tag.rawGet("name"); | |
103 if( containerTags.contains(tagName) ) { | |
104 i2 = i; | |
105 String endTagName = '/' + tagName; | |
106 while(true) { | |
107 i2 = text.indexOf('<',i2+1); | |
108 if( i2 == -1 ) | |
109 break; | |
110 int i3 = text.indexOf('>',i2); | |
111 if( i3 == -1 ) | |
112 break; | |
113 int j = i2+1; | |
114 while( j<i3 && !Character.isWhitespace(text.charAt(j)) ) j++; | |
115 String s = text.substring(i2+1,j); | |
116 if( s.equalsIgnoreCase(endTagName) ) { | |
117 String text2 = text.substring(i+1,i2); | |
118 LuanTable textContainer = textContainer(tag,text2); | |
119 html.add( textContainer ); | |
120 i = i3 + 1; | |
121 continue outer; | |
122 } | |
123 } | |
124 // logger.warn("unclosed "+tagName); | |
125 } | |
126 i += 1; | |
127 html.add( tag ); | |
128 } catch(BadTag e) { | |
129 // logger.debug("bad tag",e); | |
130 i += 1; | |
131 // if( !removeBadTags ) { | |
132 html.add( "<" ); | |
133 html.add( encode(luan,tagText) ); | |
134 html.add( ">" ); | |
135 // } | |
136 } | |
137 } | |
138 } | |
139 return new LuanTable(html); | |
140 } | 63 } |
141 | 64 |
142 static LuanTable comment(String text) { | |
143 LuanTable tbl = new LuanTable(); | |
144 tbl.rawPut("type","comment"); | |
145 tbl.rawPut("text",text); | |
146 return tbl; | |
147 } | |
148 | |
149 static LuanTable cdata(String text) { | |
150 LuanTable tbl = new LuanTable(); | |
151 tbl.rawPut("type","cdata"); | |
152 tbl.rawPut("text",text); | |
153 return tbl; | |
154 } | |
155 | |
156 static LuanTable textContainer(LuanTable tag,String text) { | |
157 LuanTable tbl = new LuanTable(); | |
158 tbl.rawPut("type","container"); | |
159 tbl.rawPut("tag",tag); | |
160 tbl.rawPut("text",text); | |
161 return tbl; | |
162 } | |
163 | |
164 | |
165 | |
166 static final class BadTag extends RuntimeException { | |
167 private BadTag(String msg) { | |
168 super(msg); | |
169 } | |
170 } | |
171 | |
172 static LuanTable parseTag(String text) { | |
173 LuanTable tbl = new LuanTable(); | |
174 tbl.rawPut("type","tag"); | |
175 if( text.endsWith("/") ) { | |
176 text = text.substring(0,text.length()-1); | |
177 tbl.rawPut("is_empty",true); | |
178 } else { | |
179 tbl.rawPut("is_empty",false); | |
180 } | |
181 int len = text.length(); | |
182 int i = 0; | |
183 int i2 = i; | |
184 if( i2<len && text.charAt(i2)=='/' ) | |
185 i2++; | |
186 while( i2<len ) { | |
187 char c = text.charAt(i2); | |
188 if( Character.isWhitespace(c) ) | |
189 break; | |
190 if( !( Character.isLetterOrDigit(c) || c=='_' || c=='.' || c=='-' || c==':' ) ) | |
191 throw new BadTag("invalid tag name for <"+text+">"); | |
192 i2++; | |
193 } | |
194 String name = text.substring(i,i2).toLowerCase(); | |
195 tbl.rawPut("name",name); | |
196 LuanTable attributes = new LuanTable(); | |
197 tbl.rawPut("attributes",attributes); | |
198 i = i2; | |
199 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; | |
200 while( i<len ) { | |
201 i2 = toEndName(text,i,len); | |
202 String attrName = unquote(text.substring(i,i2).toLowerCase()); | |
203 if( attributes.rawGet(attrName) != null ) | |
204 throw new BadTag("duplicate attribute: "+attrName); | |
205 i = i2; | |
206 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; | |
207 if( i<len && text.charAt(i) == '=' ) { | |
208 i++; | |
209 i2 = i; | |
210 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; | |
211 i2 = toEndValue(text,i,len); | |
212 String attrValue = text.substring(i,i2); | |
213 if( attrValue.indexOf('<') != -1 || attrValue.indexOf('>') != -1 ) | |
214 throw new BadTag("invalid attribute value: "+attrValue); | |
215 attrValue = unquote(attrValue); | |
216 attributes.rawPut(attrName,attrValue); | |
217 i = i2; | |
218 while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; | |
219 } else { | |
220 attributes.rawPut(attrName,true); | |
221 } | |
222 } | |
223 return tbl; | |
224 } | |
225 | |
226 private static int toEndName(String text,int i,int len) { | |
227 if( i==len ) | |
228 return i; | |
229 char c = text.charAt(i); | |
230 switch(c) { | |
231 case '"': | |
232 case '\'': | |
233 i = text.indexOf(c,i+1); | |
234 return i==-1 ? len : i+1; | |
235 default: | |
236 if( Character.isWhitespace(c) ) { | |
237 throw new RuntimeException("text="+text+" i="+i); | |
238 } | |
239 do { | |
240 i++; | |
241 } while( i<len && (c=text.charAt(i))!='=' && !Character.isWhitespace(c) ); | |
242 return i; | |
243 } | |
244 } | |
245 | |
246 private static int toEndValue(String text,int i,int len) { | |
247 if( i==len ) | |
248 return i; | |
249 char c = text.charAt(i); | |
250 switch(c) { | |
251 case '"': | |
252 case '\'': | |
253 i = text.indexOf(c,i+1); | |
254 return i==-1 ? len : i+1; | |
255 default: | |
256 if( Character.isWhitespace(c) ) { | |
257 throw new RuntimeException("text="+text+" i="+i); | |
258 } | |
259 do { | |
260 i++; | |
261 } while( i<len && !Character.isWhitespace(text.charAt(i)) ); | |
262 return i; | |
263 } | |
264 } | |
265 | |
266 public static String unquote(String s) { | |
267 if( s==null || s.length()<=1 ) | |
268 return s; | |
269 char c = s.charAt(0); | |
270 return (c=='"' || c=='\'') && s.charAt(s.length()-1)==c | |
271 ? s.substring(1,s.length()-1) : s; | |
272 } | |
273 */ | |
274 | |
275 | |
276 /* | |
277 public static String to_string(Luan luan,LuanTable tbl) throws LuanException { | |
278 List<Object> html = tbl.asList(); | |
279 StringBuilder buf = new StringBuilder(); | |
280 for( Object o : html ) { | |
281 if( o instanceof String ) { | |
282 buf.append( o ); | |
283 } else if( o instanceof LuanTable ) { | |
284 LuanTable t = (LuanTable)o; | |
285 String type = (String)t.get(luan,"type"); | |
286 if( type==null ) | |
287 throw new LuanException(luan, "no type in element of table for 'Html.to_string'" ); | |
288 if( type.equals("comment") ) { | |
289 buf.append( "<!--" ).append( t.get(luan,"text") ).append( "-->" ); | |
290 } else if( type.equals("cdata") ) { | |
291 buf.append( "<![CDATA[" ).append( t.get(luan,"text") ).append( "]]" ); | |
292 } else if( type.equals("tag") ) { | |
293 buf.append( tagToString(luan,t) ); | |
294 } else if( type.equals("container") ) { | |
295 LuanTable tag = (LuanTable)t.get(luan,"tag"); | |
296 buf.append( tagToString(luan,tag) ); | |
297 buf.append( t.get(luan,"text") ); | |
298 buf.append( "</" ).append( tag.get(luan,"name") ).append( ">" ); | |
299 } else { | |
300 throw new LuanException(luan, "invalid element type for 'Html.to_string'" ); | |
301 } | |
302 } else | |
303 throw new LuanException(luan, "invalid value ("+Luan.type(o)+") in table for 'Html.to_string'" ); | |
304 } | |
305 return buf.toString(); | |
306 } | |
307 | |
308 private static String tagToString(Luan luan,LuanTable tbl) throws LuanException { | |
309 StringBuilder buf = new StringBuilder(); | |
310 buf.append('<'); | |
311 buf.append(tbl.get(luan,"name")); | |
312 LuanTable attributes = (LuanTable)tbl.get(luan,"attributes"); | |
313 for( Map.Entry<Object,Object> attr : attributes.iterable(luan) ) { | |
314 buf.append( ' ' ); | |
315 buf.append( attr.getKey() ); | |
316 Object val = attr.getValue(); | |
317 if( !val.equals(Boolean.TRUE) ) { | |
318 buf.append( '=' ); | |
319 buf.append( quote((String)val) ); | |
320 } | |
321 } | |
322 if( tbl.get(luan,"is_empty").equals(Boolean.TRUE) ) | |
323 buf.append('/'); | |
324 buf.append('>'); | |
325 return buf.toString(); | |
326 } | |
327 */ | |
328 public static String quote(String s) { | 65 public static String quote(String s) { |
329 StringBuilder buf = new StringBuilder(); | 66 StringBuilder buf = new StringBuilder(); |
330 buf.append('"'); | 67 buf.append('"'); |
331 int i = 0; | 68 int i = 0; |
332 while(true) { | 69 while(true) { |