diff src/goodjava/xml/XmlParser.java @ 1790:a8c685a894b4

start xml work
author Franklin Schmidt <fschmidt@gmail.com>
date Mon, 25 Dec 2023 23:07:59 -0700
parents 0246add03d11
children 3dabc11da660
line wrap: on
line diff
--- a/src/goodjava/xml/XmlParser.java	Sun Dec 24 16:52:35 2023 -0700
+++ b/src/goodjava/xml/XmlParser.java	Mon Dec 25 23:07:59 2023 -0700
@@ -5,13 +5,15 @@
 import java.util.LinkedHashMap;
 import java.util.List;
 import java.util.ArrayList;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 import goodjava.parser.Parser;
 import goodjava.parser.ParseException;
 
 
 public final class XmlParser {
 
-	public static XmlElement parse(String text) throws ParseException {
+	public static Xml parse(String text) throws ParseException {
 		return new XmlParser(text).parse();
 	}
 
@@ -25,9 +27,10 @@
 		return new ParseException(parser,msg);
 	}
 
-	private XmlElement parse() throws ParseException {
+	private Xml parse() throws ParseException {
+		Xml xml = new Xml();
 		spaces();
-		prolog();
+		xml.declaration = declaration();
 		spaces();
 		XmlElement element = element();
 		spaces();
@@ -35,15 +38,19 @@
 			throw exception("unexpected text");
 		if( element==null )
 			throw exception("empty document not allowed");
-		return element;
+		xml.setElement(element);
+		return xml;
 	}
 
-	private void prolog() throws ParseException {
+	private String declaration() throws ParseException {
+		int start = parser.begin();
 		if( !parser.match("<?xml") )
-			return;
+			return parser.failure(null);
 		while( attribute() != null );
 		spaces();
 		required("?>");
+		String s = parser.textFrom(start);
+		return parser.success(s);
 	}
 
 	private XmlElement element() throws ParseException {
@@ -67,12 +74,14 @@
 		required(">");
 		String s = string(name);
 		if( s != null ) {
-			XmlElement element = new XmlElement(name,attributes,s);
+			XmlElement element = new XmlElement(name,attributes);
+			element.setContent(s);
 			return parser.success(element);
 		}
 		List<XmlElement> elements = elements(name);
 		if( elements != null ) {
-			XmlElement element = new XmlElement(name,attributes,elements.toArray(new XmlElement[0]));
+			XmlElement element = new XmlElement(name,attributes);
+			element.setContent( elements.toArray(new XmlElement[0]) );
 			return parser.success(element);
 		}
 		throw exception("bad element");
@@ -177,7 +186,19 @@
 		return parser.anyOf(" \t\r\n");
 	}
 
+	private static final Pattern dec = Pattern.compile("&#(\\d+);");
+
 	private static String decode(String s) {
+		Matcher m = dec.matcher(s);
+		StringBuffer sb = new StringBuffer();
+		while( m.find() ) {
+			String d = m.group(1);
+			int i = Integer.parseInt(d);
+			d = String.valueOf((char)i);
+			m.appendReplacement(sb, d);
+		}
+		m.appendTail(sb);
+		s = sb.toString();
 		s = s.replace("&lt;","<");
 		s = s.replace("&gt;",">");
 		s = s.replace("&quot;","\"");