annotate src/goodjava/xml/XmlParser.java @ 1467:509d49c493c0

minor
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 13 Apr 2020 22:16:59 -0600
parents 670b7d089699
children 35f3bfd4f51d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1466
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 package goodjava.xml;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 import java.util.Map;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 import java.util.AbstractMap;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 import java.util.LinkedHashMap;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 import java.util.List;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 import java.util.ArrayList;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 import goodjava.parser.Parser;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 import goodjava.parser.ParseException;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 public final class XmlParser {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 public static XmlElement parse(String text) throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15 return new XmlParser(text).parse();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18 private final Parser parser;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20 private XmlParser(String text) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 this.parser = new Parser(text);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 private ParseException exception(String msg) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 return new ParseException(parser,msg);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 private XmlElement parse() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30 prolog();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 XmlElement element = element();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34 if( !parser.endOfInput() )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 throw exception("unexpected text");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 return element;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 private void prolog() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40 if( !parser.match("<?xml") )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 return;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 while( attribute() != null );
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 required("?>");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 private XmlElement element() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 parser.begin();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 if( !parser.match('<') || parser.test('/') )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 return parser.failure(null);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 //spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52 String name = name();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 if( name==null )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 throw exception("element name not found");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 Map<String,String> attributes = new LinkedHashMap<String,String>();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56 Map.Entry<String,String> attribute;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 while( (attribute=attribute()) != null ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
58 attributes.put(attribute.getKey(),attribute.getValue());
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
59 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
60 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
61 required(">");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
62 String s = string(name);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
63 if( s != null ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
64 XmlElement element = new XmlElement(name,attributes,s);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
65 return parser.success(element);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
66 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
67 List<XmlElement> elements = elements(name);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
68 if( elements != null ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
69 XmlElement element = new XmlElement(name,attributes,elements.toArray(new XmlElement[0]));
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
70 return parser.success(element);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
71 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
72 throw exception("bad element");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
73 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
74
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
75 private String string(String name) throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
76 int start = parser.begin();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
77 while( parser.noneOf("<") );
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
78 String s = parser.textFrom(start);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
79 if( !endTag(name) )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
80 return parser.failure(null);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
81 return parser.success(s);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
82 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
83
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
84 private List<XmlElement> elements(String name) throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
85 parser.begin();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
86 List<XmlElement> elements = new ArrayList<XmlElement>();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
87 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
88 XmlElement element;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
89 while( (element=element()) != null ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
90 elements.add(element);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
91 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
92 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
93 if( !endTag(name) )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
94 return parser.failure(null);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
95 return parser.success(elements);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
96 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
97
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
98 private boolean endTag(String name) throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
99 parser.begin();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
100 if( !parser.match("</") || !parser.match(name) )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
101 return parser.failure();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
102 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
103 if( !parser.match('>') )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
104 return parser.failure();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
105 return parser.success();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
106 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
107
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
108 private Map.Entry<String,String> attribute() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
109 parser.begin();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
110 if( !matchSpace() )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
111 return parser.failure(null);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
112 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
113 String name = name();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
114 if( name==null )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
115 return parser.failure(null);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
116 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
117 required("=");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
118 spaces();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
119 if( !parser.anyOf("\"'") )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
120 throw exception("quote expected");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
121 char quote = parser.lastChar();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
122 int start = parser.currentIndex();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
123 while( !parser.test(quote) ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
124 if( !parser.anyChar() )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
125 throw exception("unclosed attribute value");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
126 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
127 String value = parser.textFrom(start);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
128 parser.match(quote);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
129 Map.Entry<String,String> attribute = new AbstractMap.SimpleImmutableEntry<String,String>(name,value);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
130 return parser.success(attribute);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
131 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
132
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
133 private String name() {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
134 int start = parser.currentIndex();
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
135 if( !matchNameChar() )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
136 return null;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
137 while( matchNameChar() );
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
138 return parser.textFrom(start);
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
139 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
140
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
141 private boolean matchNameChar() {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
142 return parser.inCharRange('a','z')
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
143 || parser.inCharRange('A','Z')
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
144 || parser.inCharRange('0','9')
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
145 || parser.anyOf("_.-:")
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
146 ;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
147 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
148
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
149 private void required(String s) throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
150 if( !parser.match(s) )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
151 exception("'"+s+"' expected");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
152 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
153
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
154 private void spaces() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
155 while( matchSpace() || matchComment() );
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
156 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
157
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
158 private boolean matchComment() throws ParseException {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
159 if( !parser.match("<!--") )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
160 return false;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
161 while( !parser.match("-->") ) {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
162 if( !parser.anyChar() )
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
163 throw exception("unclosed comment");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
164 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
165 return true;
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
166 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
167
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
168 private boolean matchSpace() {
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
169 return parser.anyOf(" \t\r\n");
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
170 }
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
171
670b7d089699 xml support
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
172 }