Mercurial Hosting > luan
annotate src/goodjava/xml/XmlParser.java @ 1822:aeaf2b65c005
better parse tables
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 09 Jul 2024 12:46:27 -0600 |
parents | 9daae8cf4bef |
children |
rev | line source |
---|---|
1466 | 1 package goodjava.xml; |
2 | |
3 import java.util.Map; | |
4 import java.util.AbstractMap; | |
5 import java.util.LinkedHashMap; | |
6 import java.util.List; | |
7 import java.util.ArrayList; | |
1790 | 8 import java.util.regex.Pattern; |
9 import java.util.regex.Matcher; | |
1466 | 10 import goodjava.parser.Parser; |
11 import goodjava.parser.ParseException; | |
12 | |
13 | |
14 public final class XmlParser { | |
15 | |
1790 | 16 public static Xml parse(String text) throws ParseException { |
1466 | 17 return new XmlParser(text).parse(); |
18 } | |
19 | |
20 private final Parser parser; | |
21 | |
22 private XmlParser(String text) { | |
23 this.parser = new Parser(text); | |
24 } | |
25 | |
26 private ParseException exception(String msg) { | |
27 return new ParseException(parser,msg); | |
28 } | |
29 | |
1790 | 30 private Xml parse() throws ParseException { |
31 Xml xml = new Xml(); | |
1466 | 32 spaces(); |
1790 | 33 xml.declaration = declaration(); |
1466 | 34 spaces(); |
35 XmlElement element = element(); | |
36 spaces(); | |
37 if( !parser.endOfInput() ) | |
38 throw exception("unexpected text"); | |
1723
0246add03d11
dont allow empty xml
Franklin Schmidt <fschmidt@gmail.com>
parents:
1468
diff
changeset
|
39 if( element==null ) |
0246add03d11
dont allow empty xml
Franklin Schmidt <fschmidt@gmail.com>
parents:
1468
diff
changeset
|
40 throw exception("empty document not allowed"); |
1790 | 41 xml.setElement(element); |
42 return xml; | |
1466 | 43 } |
44 | |
1790 | 45 private String declaration() throws ParseException { |
46 int start = parser.begin(); | |
1466 | 47 if( !parser.match("<?xml") ) |
1790 | 48 return parser.failure(null); |
1466 | 49 while( attribute() != null ); |
50 spaces(); | |
51 required("?>"); | |
1790 | 52 String s = parser.textFrom(start); |
53 return parser.success(s); | |
1466 | 54 } |
55 | |
56 private XmlElement element() throws ParseException { | |
57 parser.begin(); | |
58 if( !parser.match('<') || parser.test('/') ) | |
59 return parser.failure(null); | |
60 //spaces(); | |
61 String name = name(); | |
62 if( name==null ) | |
63 throw exception("element name not found"); | |
64 Map<String,String> attributes = new LinkedHashMap<String,String>(); | |
65 Map.Entry<String,String> attribute; | |
66 while( (attribute=attribute()) != null ) { | |
67 attributes.put(attribute.getKey(),attribute.getValue()); | |
68 } | |
69 spaces(); | |
1468 | 70 if( parser.match("/>") ) { |
71 XmlElement element = new XmlElement(name,attributes); | |
72 return parser.success(element); | |
73 } | |
1466 | 74 required(">"); |
75 String s = string(name); | |
76 if( s != null ) { | |
1790 | 77 XmlElement element = new XmlElement(name,attributes); |
78 element.setContent(s); | |
1466 | 79 return parser.success(element); |
80 } | |
81 List<XmlElement> elements = elements(name); | |
82 if( elements != null ) { | |
1790 | 83 XmlElement element = new XmlElement(name,attributes); |
84 element.setContent( elements.toArray(new XmlElement[0]) ); | |
1466 | 85 return parser.success(element); |
86 } | |
87 throw exception("bad element"); | |
88 } | |
89 | |
90 private String string(String name) throws ParseException { | |
91 int start = parser.begin(); | |
1819 | 92 while( parser.noneOf("<") ); |
1466 | 93 String s = parser.textFrom(start); |
1468 | 94 s = decode(s); |
1466 | 95 if( !endTag(name) ) |
96 return parser.failure(null); | |
97 return parser.success(s); | |
98 } | |
99 | |
100 private List<XmlElement> elements(String name) throws ParseException { | |
101 parser.begin(); | |
102 List<XmlElement> elements = new ArrayList<XmlElement>(); | |
103 spaces(); | |
104 XmlElement element; | |
105 while( (element=element()) != null ) { | |
106 elements.add(element); | |
107 spaces(); | |
108 } | |
109 if( !endTag(name) ) | |
110 return parser.failure(null); | |
111 return parser.success(elements); | |
112 } | |
113 | |
114 private boolean endTag(String name) throws ParseException { | |
115 parser.begin(); | |
116 if( !parser.match("</") || !parser.match(name) ) | |
117 return parser.failure(); | |
118 spaces(); | |
119 if( !parser.match('>') ) | |
120 return parser.failure(); | |
121 return parser.success(); | |
122 } | |
123 | |
124 private Map.Entry<String,String> attribute() throws ParseException { | |
125 parser.begin(); | |
126 if( !matchSpace() ) | |
127 return parser.failure(null); | |
128 spaces(); | |
129 String name = name(); | |
130 if( name==null ) | |
131 return parser.failure(null); | |
132 spaces(); | |
133 required("="); | |
134 spaces(); | |
135 if( !parser.anyOf("\"'") ) | |
136 throw exception("quote expected"); | |
137 char quote = parser.lastChar(); | |
138 int start = parser.currentIndex(); | |
139 while( !parser.test(quote) ) { | |
140 if( !parser.anyChar() ) | |
141 throw exception("unclosed attribute value"); | |
142 } | |
143 String value = parser.textFrom(start); | |
1468 | 144 value = decode(value); |
1466 | 145 parser.match(quote); |
146 Map.Entry<String,String> attribute = new AbstractMap.SimpleImmutableEntry<String,String>(name,value); | |
147 return parser.success(attribute); | |
148 } | |
149 | |
150 private String name() { | |
151 int start = parser.currentIndex(); | |
152 if( !matchNameChar() ) | |
153 return null; | |
154 while( matchNameChar() ); | |
155 return parser.textFrom(start); | |
156 } | |
157 | |
158 private boolean matchNameChar() { | |
159 return parser.inCharRange('a','z') | |
160 || parser.inCharRange('A','Z') | |
161 || parser.inCharRange('0','9') | |
162 || parser.anyOf("_.-:") | |
163 ; | |
164 } | |
165 | |
166 private void required(String s) throws ParseException { | |
167 if( !parser.match(s) ) | |
168 exception("'"+s+"' expected"); | |
169 } | |
170 | |
171 private void spaces() throws ParseException { | |
172 while( matchSpace() || matchComment() ); | |
173 } | |
174 | |
175 private boolean matchComment() throws ParseException { | |
176 if( !parser.match("<!--") ) | |
177 return false; | |
178 while( !parser.match("-->") ) { | |
179 if( !parser.anyChar() ) | |
180 throw exception("unclosed comment"); | |
181 } | |
182 return true; | |
183 } | |
184 | |
185 private boolean matchSpace() { | |
186 return parser.anyOf(" \t\r\n"); | |
187 } | |
188 | |
1790 | 189 private static final Pattern dec = Pattern.compile("&#(\\d+);"); |
190 | |
1468 | 191 private static String decode(String s) { |
1790 | 192 Matcher m = dec.matcher(s); |
193 StringBuffer sb = new StringBuffer(); | |
194 while( m.find() ) { | |
195 String d = m.group(1); | |
196 int i = Integer.parseInt(d); | |
197 d = String.valueOf((char)i); | |
198 m.appendReplacement(sb, d); | |
199 } | |
200 m.appendTail(sb); | |
201 s = sb.toString(); | |
1468 | 202 s = s.replace("<","<"); |
203 s = s.replace(">",">"); | |
204 s = s.replace(""","\""); | |
205 s = s.replace("'","'"); | |
206 s = s.replace("&","&"); | |
207 return s; | |
208 } | |
209 | |
1466 | 210 } |