Mercurial Hosting > luan
annotate src/goodjava/xml/XmlParser.java @ 1773:b914a726061f
video multipart
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 18 Jul 2023 11:56:40 -0600 |
parents | 0246add03d11 |
children | a8c685a894b4 |
rev | line source |
---|---|
1466 | 1 package goodjava.xml; |
2 | |
3 import java.util.Map; | |
4 import java.util.AbstractMap; | |
5 import java.util.LinkedHashMap; | |
6 import java.util.List; | |
7 import java.util.ArrayList; | |
8 import goodjava.parser.Parser; | |
9 import goodjava.parser.ParseException; | |
10 | |
11 | |
12 public final class XmlParser { | |
13 | |
14 public static XmlElement parse(String text) throws ParseException { | |
15 return new XmlParser(text).parse(); | |
16 } | |
17 | |
18 private final Parser parser; | |
19 | |
20 private XmlParser(String text) { | |
21 this.parser = new Parser(text); | |
22 } | |
23 | |
24 private ParseException exception(String msg) { | |
25 return new ParseException(parser,msg); | |
26 } | |
27 | |
28 private XmlElement parse() throws ParseException { | |
29 spaces(); | |
30 prolog(); | |
31 spaces(); | |
32 XmlElement element = element(); | |
33 spaces(); | |
34 if( !parser.endOfInput() ) | |
35 throw exception("unexpected text"); | |
1723
0246add03d11
dont allow empty xml
Franklin Schmidt <fschmidt@gmail.com>
parents:
1468
diff
changeset
|
36 if( element==null ) |
0246add03d11
dont allow empty xml
Franklin Schmidt <fschmidt@gmail.com>
parents:
1468
diff
changeset
|
37 throw exception("empty document not allowed"); |
1466 | 38 return element; |
39 } | |
40 | |
41 private void prolog() throws ParseException { | |
42 if( !parser.match("<?xml") ) | |
43 return; | |
44 while( attribute() != null ); | |
45 spaces(); | |
46 required("?>"); | |
47 } | |
48 | |
49 private XmlElement element() throws ParseException { | |
50 parser.begin(); | |
51 if( !parser.match('<') || parser.test('/') ) | |
52 return parser.failure(null); | |
53 //spaces(); | |
54 String name = name(); | |
55 if( name==null ) | |
56 throw exception("element name not found"); | |
57 Map<String,String> attributes = new LinkedHashMap<String,String>(); | |
58 Map.Entry<String,String> attribute; | |
59 while( (attribute=attribute()) != null ) { | |
60 attributes.put(attribute.getKey(),attribute.getValue()); | |
61 } | |
62 spaces(); | |
1468 | 63 if( parser.match("/>") ) { |
64 XmlElement element = new XmlElement(name,attributes); | |
65 return parser.success(element); | |
66 } | |
1466 | 67 required(">"); |
68 String s = string(name); | |
69 if( s != null ) { | |
70 XmlElement element = new XmlElement(name,attributes,s); | |
71 return parser.success(element); | |
72 } | |
73 List<XmlElement> elements = elements(name); | |
74 if( elements != null ) { | |
75 XmlElement element = new XmlElement(name,attributes,elements.toArray(new XmlElement[0])); | |
76 return parser.success(element); | |
77 } | |
78 throw exception("bad element"); | |
79 } | |
80 | |
81 private String string(String name) throws ParseException { | |
82 int start = parser.begin(); | |
83 while( parser.noneOf("<") ); | |
84 String s = parser.textFrom(start); | |
1468 | 85 s = decode(s); |
1466 | 86 if( !endTag(name) ) |
87 return parser.failure(null); | |
88 return parser.success(s); | |
89 } | |
90 | |
91 private List<XmlElement> elements(String name) throws ParseException { | |
92 parser.begin(); | |
93 List<XmlElement> elements = new ArrayList<XmlElement>(); | |
94 spaces(); | |
95 XmlElement element; | |
96 while( (element=element()) != null ) { | |
97 elements.add(element); | |
98 spaces(); | |
99 } | |
100 if( !endTag(name) ) | |
101 return parser.failure(null); | |
102 return parser.success(elements); | |
103 } | |
104 | |
105 private boolean endTag(String name) throws ParseException { | |
106 parser.begin(); | |
107 if( !parser.match("</") || !parser.match(name) ) | |
108 return parser.failure(); | |
109 spaces(); | |
110 if( !parser.match('>') ) | |
111 return parser.failure(); | |
112 return parser.success(); | |
113 } | |
114 | |
115 private Map.Entry<String,String> attribute() throws ParseException { | |
116 parser.begin(); | |
117 if( !matchSpace() ) | |
118 return parser.failure(null); | |
119 spaces(); | |
120 String name = name(); | |
121 if( name==null ) | |
122 return parser.failure(null); | |
123 spaces(); | |
124 required("="); | |
125 spaces(); | |
126 if( !parser.anyOf("\"'") ) | |
127 throw exception("quote expected"); | |
128 char quote = parser.lastChar(); | |
129 int start = parser.currentIndex(); | |
130 while( !parser.test(quote) ) { | |
131 if( !parser.anyChar() ) | |
132 throw exception("unclosed attribute value"); | |
133 } | |
134 String value = parser.textFrom(start); | |
1468 | 135 value = decode(value); |
1466 | 136 parser.match(quote); |
137 Map.Entry<String,String> attribute = new AbstractMap.SimpleImmutableEntry<String,String>(name,value); | |
138 return parser.success(attribute); | |
139 } | |
140 | |
141 private String name() { | |
142 int start = parser.currentIndex(); | |
143 if( !matchNameChar() ) | |
144 return null; | |
145 while( matchNameChar() ); | |
146 return parser.textFrom(start); | |
147 } | |
148 | |
149 private boolean matchNameChar() { | |
150 return parser.inCharRange('a','z') | |
151 || parser.inCharRange('A','Z') | |
152 || parser.inCharRange('0','9') | |
153 || parser.anyOf("_.-:") | |
154 ; | |
155 } | |
156 | |
157 private void required(String s) throws ParseException { | |
158 if( !parser.match(s) ) | |
159 exception("'"+s+"' expected"); | |
160 } | |
161 | |
162 private void spaces() throws ParseException { | |
163 while( matchSpace() || matchComment() ); | |
164 } | |
165 | |
166 private boolean matchComment() throws ParseException { | |
167 if( !parser.match("<!--") ) | |
168 return false; | |
169 while( !parser.match("-->") ) { | |
170 if( !parser.anyChar() ) | |
171 throw exception("unclosed comment"); | |
172 } | |
173 return true; | |
174 } | |
175 | |
176 private boolean matchSpace() { | |
177 return parser.anyOf(" \t\r\n"); | |
178 } | |
179 | |
1468 | 180 private static String decode(String s) { |
181 s = s.replace("<","<"); | |
182 s = s.replace(">",">"); | |
183 s = s.replace(""","\""); | |
184 s = s.replace("'","'"); | |
185 s = s.replace("&","&"); | |
186 return s; | |
187 } | |
188 | |
1466 | 189 } |