comparison src/goodjava/xml/XmlParser.java @ 1790:a8c685a894b4

start xml work
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 25 Dec 2023 23:07:59 -0700
parents 0246add03d11
children
comparison
equal deleted inserted replaced
1789:bac27b119ff2 1790:a8c685a894b4
3 import java.util.Map; 3 import java.util.Map;
4 import java.util.AbstractMap; 4 import java.util.AbstractMap;
5 import java.util.LinkedHashMap; 5 import java.util.LinkedHashMap;
6 import java.util.List; 6 import java.util.List;
7 import java.util.ArrayList; 7 import java.util.ArrayList;
8 import java.util.regex.Pattern;
9 import java.util.regex.Matcher;
8 import goodjava.parser.Parser; 10 import goodjava.parser.Parser;
9 import goodjava.parser.ParseException; 11 import goodjava.parser.ParseException;
10 12
11 13
12 public final class XmlParser { 14 public final class XmlParser {
13 15
14 public static XmlElement parse(String text) throws ParseException { 16 public static Xml parse(String text) throws ParseException {
15 return new XmlParser(text).parse(); 17 return new XmlParser(text).parse();
16 } 18 }
17 19
18 private final Parser parser; 20 private final Parser parser;
19 21
23 25
24 private ParseException exception(String msg) { 26 private ParseException exception(String msg) {
25 return new ParseException(parser,msg); 27 return new ParseException(parser,msg);
26 } 28 }
27 29
28 private XmlElement parse() throws ParseException { 30 private Xml parse() throws ParseException {
29 spaces(); 31 Xml xml = new Xml();
30 prolog(); 32 spaces();
33 xml.declaration = declaration();
31 spaces(); 34 spaces();
32 XmlElement element = element(); 35 XmlElement element = element();
33 spaces(); 36 spaces();
34 if( !parser.endOfInput() ) 37 if( !parser.endOfInput() )
35 throw exception("unexpected text"); 38 throw exception("unexpected text");
36 if( element==null ) 39 if( element==null )
37 throw exception("empty document not allowed"); 40 throw exception("empty document not allowed");
38 return element; 41 xml.setElement(element);
39 } 42 return xml;
40 43 }
41 private void prolog() throws ParseException { 44
45 private String declaration() throws ParseException {
46 int start = parser.begin();
42 if( !parser.match("<?xml") ) 47 if( !parser.match("<?xml") )
43 return; 48 return parser.failure(null);
44 while( attribute() != null ); 49 while( attribute() != null );
45 spaces(); 50 spaces();
46 required("?>"); 51 required("?>");
52 String s = parser.textFrom(start);
53 return parser.success(s);
47 } 54 }
48 55
49 private XmlElement element() throws ParseException { 56 private XmlElement element() throws ParseException {
50 parser.begin(); 57 parser.begin();
51 if( !parser.match('<') || parser.test('/') ) 58 if( !parser.match('<') || parser.test('/') )
65 return parser.success(element); 72 return parser.success(element);
66 } 73 }
67 required(">"); 74 required(">");
68 String s = string(name); 75 String s = string(name);
69 if( s != null ) { 76 if( s != null ) {
70 XmlElement element = new XmlElement(name,attributes,s); 77 XmlElement element = new XmlElement(name,attributes);
78 element.setContent(s);
71 return parser.success(element); 79 return parser.success(element);
72 } 80 }
73 List<XmlElement> elements = elements(name); 81 List<XmlElement> elements = elements(name);
74 if( elements != null ) { 82 if( elements != null ) {
75 XmlElement element = new XmlElement(name,attributes,elements.toArray(new XmlElement[0])); 83 XmlElement element = new XmlElement(name,attributes);
84 element.setContent( elements.toArray(new XmlElement[0]) );
76 return parser.success(element); 85 return parser.success(element);
77 } 86 }
78 throw exception("bad element"); 87 throw exception("bad element");
79 } 88 }
80 89
175 184
176 private boolean matchSpace() { 185 private boolean matchSpace() {
177 return parser.anyOf(" \t\r\n"); 186 return parser.anyOf(" \t\r\n");
178 } 187 }
179 188
189 private static final Pattern dec = Pattern.compile("&#(\\d+);");
190
180 private static String decode(String s) { 191 private static String decode(String s) {
192 Matcher m = dec.matcher(s);
193 StringBuffer sb = new StringBuffer();
194 while( m.find() ) {
195 String d = m.group(1);
196 int i = Integer.parseInt(d);
197 d = String.valueOf((char)i);
198 m.appendReplacement(sb, d);
199 }
200 m.appendTail(sb);
201 s = sb.toString();
181 s = s.replace("&lt;","<"); 202 s = s.replace("&lt;","<");
182 s = s.replace("&gt;",">"); 203 s = s.replace("&gt;",">");
183 s = s.replace("&quot;","\""); 204 s = s.replace("&quot;","\"");
184 s = s.replace("&apos;","'"); 205 s = s.replace("&apos;","'");
185 s = s.replace("&amp;","&"); 206 s = s.replace("&amp;","&");