Mercurial Hosting > luan
annotate core/src/luan/modules/parsers/Html.java @ 625:a3c1e11fb6aa
rewrite much of Html to be more understandable;
add Lucene html_highlighter();
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 12 Jan 2016 23:52:56 -0700 |
parents | |
children |
rev | line source |
---|---|
625
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
1 package luan.modules.parsers; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
2 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
3 import java.util.List; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
4 import java.util.ArrayList; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
5 import java.util.Set; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
6 import java.util.HashSet; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
7 import luan.LuanTable; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
8 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
9 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
10 public final class Html { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
11 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
12 public static LuanTable toList(String text,LuanTable containerTagsTbl) throws ParseException { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
13 return new Html(text,containerTagsTbl).parse(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
14 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
15 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
16 private final Parser parser; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
17 private final Set<String> containerTags = new HashSet<String>(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
18 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
19 private Html(String text,LuanTable containerTagsTbl) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
20 this.parser = new Parser(text); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
21 for( Object v : containerTagsTbl.asList() ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
22 containerTags.add((String)v); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
23 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
24 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
25 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
26 private LuanTable parse() throws ParseException { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
27 List list = new ArrayList(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
28 StringBuilder sb = new StringBuilder(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
29 while( !parser.endOfInput() ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
30 if( parser.test('<') ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
31 LuanTable tbl = parseTag(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
32 if( tbl != null ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
33 String tagName = (String)tbl.rawGet("name"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
34 if( containerTags.contains(tagName) ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
35 LuanTable container = parseContainer(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
36 if( container != null ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
37 tbl = container; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
38 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
39 if( tbl != null |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
40 || (tbl = parseComment()) != null |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
41 || (tbl = parseCdata()) != null |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
42 ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
43 if( sb.length() > 0 ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
44 list.add(sb.toString()); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
45 sb.setLength(0); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
46 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
47 list.add(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
48 continue; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
49 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
50 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
51 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
52 sb.append( parser.currentChar() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
53 parser.anyChar(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
54 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
55 if( sb.length() > 0 ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
56 list.add(sb.toString()); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
57 return new LuanTable(list); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
58 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
59 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
60 private LuanTable parseComment() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
61 parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
62 if( !parser.match("<!--") ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
63 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
64 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
65 while( !parser.test("-->") ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
66 if( !parser.anyChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
67 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
68 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
69 String text = parser.textFrom(start); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
70 LuanTable tbl = new LuanTable(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
71 tbl.rawPut("type","comment"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
72 tbl.rawPut("text",text); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
73 return parser.success(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
74 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
75 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
76 private LuanTable parseCdata() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
77 parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
78 if( !parser.match("<![CDATA[") ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
79 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
80 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
81 while( !parser.test("]]>") ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
82 if( !parser.anyChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
83 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
84 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
85 String text = parser.textFrom(start); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
86 LuanTable tbl = new LuanTable(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
87 tbl.rawPut("type","cdata"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
88 tbl.rawPut("text",text); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
89 return parser.success(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
90 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
91 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
92 private LuanTable parseContainer(LuanTable tag) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
93 String endTagName = '/' + (String)tag.rawGet("name"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
94 int start = parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
95 int end; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
96 while(true) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
97 if( parser.test('<') ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
98 end = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
99 LuanTable tag2 = parseTag(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
100 String s = (String)tag2.rawGet("name"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
101 if( s.equals(endTagName) ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
102 break; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
103 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
104 if( !parser.anyChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
105 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
106 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
107 String text = parser.text.substring(start,end); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
108 LuanTable tbl = new LuanTable(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
109 tbl.rawPut("type","container"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
110 tbl.rawPut("tag",tag); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
111 tbl.rawPut("text",text); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
112 return parser.success(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
113 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
114 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
115 private LuanTable parseTag() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
116 parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
117 if( !parser.match('<') ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
118 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
119 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
120 parser.match('/'); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
121 if( !matchNameChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
122 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
123 while( matchNameChar() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
124 String name = parser.textFrom(start).toLowerCase(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
125 LuanTable attributes = new LuanTable(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
126 String attrName; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
127 while( (attrName = parseAttrName()) != null ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
128 String attrValue = parseAttrValue(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
129 attributes.rawPut( attrName, attrValue!=null ? attrValue : true ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
130 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
131 while( matchSpace() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
132 boolean isEmpty = parser.match('/'); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
133 if( !parser.match('>') ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
134 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
135 LuanTable tbl = new LuanTable(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
136 tbl.rawPut("type","tag"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
137 tbl.rawPut("name",name); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
138 tbl.rawPut("attributes",attributes); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
139 tbl.rawPut("is_empty",isEmpty); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
140 return parser.success(tbl); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
141 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
142 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
143 private String parseAttrName() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
144 parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
145 if( !matchSpace() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
146 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
147 while( matchSpace() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
148 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
149 if( !matchNameChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
150 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
151 while( matchNameChar() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
152 String name = parser.textFrom(start); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
153 return parser.success(name); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
154 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
155 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
156 private String parseAttrValue() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
157 parser.begin(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
158 while( matchSpace() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
159 if( !parser.match('=') ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
160 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
161 while( matchSpace() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
162 if( parser.anyOf("\"'") ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
163 char quote = parser.lastChar(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
164 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
165 while( !parser.test(quote) ) { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
166 if( !parser.anyChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
167 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
168 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
169 String value = parser.textFrom(start); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
170 parser.match(quote); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
171 return parser.success(value); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
172 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
173 int start = parser.currentIndex(); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
174 if( !matchValueChar() ) |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
175 return parser.failure(null); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
176 while( matchValueChar() ); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
177 String value = parser.textFrom(start); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
178 return parser.success(value); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
179 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
180 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
181 private boolean matchNameChar() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
182 return parser.inCharRange('a','z') |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
183 || parser.inCharRange('A','Z') |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
184 || parser.inCharRange('0','9') |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
185 || parser.anyOf("_.-:") |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
186 ; |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
187 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
188 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
189 private boolean matchValueChar() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
190 return parser.noneOf(" \t\r\n\"'>/="); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
191 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
192 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
193 private boolean matchSpace() { |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
194 return parser.anyOf(" \t\r\n"); |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
195 } |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
196 |
a3c1e11fb6aa
rewrite much of Html to be more understandable;
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff
changeset
|
197 } |