Mercurial Hosting > luan
view src/goodjava/xml/XmlParser.java @ 1822:aeaf2b65c005
better parse tables
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 09 Jul 2024 12:46:27 -0600 |
parents | 9daae8cf4bef |
children |
line wrap: on
line source
package goodjava.xml; import java.util.Map; import java.util.AbstractMap; import java.util.LinkedHashMap; import java.util.List; import java.util.ArrayList; import java.util.regex.Pattern; import java.util.regex.Matcher; import goodjava.parser.Parser; import goodjava.parser.ParseException; public final class XmlParser { public static Xml parse(String text) throws ParseException { return new XmlParser(text).parse(); } private final Parser parser; private XmlParser(String text) { this.parser = new Parser(text); } private ParseException exception(String msg) { return new ParseException(parser,msg); } private Xml parse() throws ParseException { Xml xml = new Xml(); spaces(); xml.declaration = declaration(); spaces(); XmlElement element = element(); spaces(); if( !parser.endOfInput() ) throw exception("unexpected text"); if( element==null ) throw exception("empty document not allowed"); xml.setElement(element); return xml; } private String declaration() throws ParseException { int start = parser.begin(); if( !parser.match("<?xml") ) return parser.failure(null); while( attribute() != null ); spaces(); required("?>"); String s = parser.textFrom(start); return parser.success(s); } private XmlElement element() throws ParseException { parser.begin(); if( !parser.match('<') || parser.test('/') ) return parser.failure(null); //spaces(); String name = name(); if( name==null ) throw exception("element name not found"); Map<String,String> attributes = new LinkedHashMap<String,String>(); Map.Entry<String,String> attribute; while( (attribute=attribute()) != null ) { attributes.put(attribute.getKey(),attribute.getValue()); } spaces(); if( parser.match("/>") ) { XmlElement element = new XmlElement(name,attributes); return parser.success(element); } required(">"); String s = string(name); if( s != null ) { XmlElement element = new XmlElement(name,attributes); element.setContent(s); return parser.success(element); } List<XmlElement> elements = elements(name); if( elements != null ) { XmlElement element = new XmlElement(name,attributes); element.setContent( elements.toArray(new XmlElement[0]) ); return parser.success(element); } throw exception("bad element"); } private String string(String name) throws ParseException { int start = parser.begin(); while( parser.noneOf("<") ); String s = parser.textFrom(start); s = decode(s); if( !endTag(name) ) return parser.failure(null); return parser.success(s); } private List<XmlElement> elements(String name) throws ParseException { parser.begin(); List<XmlElement> elements = new ArrayList<XmlElement>(); spaces(); XmlElement element; while( (element=element()) != null ) { elements.add(element); spaces(); } if( !endTag(name) ) return parser.failure(null); return parser.success(elements); } private boolean endTag(String name) throws ParseException { parser.begin(); if( !parser.match("</") || !parser.match(name) ) return parser.failure(); spaces(); if( !parser.match('>') ) return parser.failure(); return parser.success(); } private Map.Entry<String,String> attribute() throws ParseException { parser.begin(); if( !matchSpace() ) return parser.failure(null); spaces(); String name = name(); if( name==null ) return parser.failure(null); spaces(); required("="); spaces(); if( !parser.anyOf("\"'") ) throw exception("quote expected"); char quote = parser.lastChar(); int start = parser.currentIndex(); while( !parser.test(quote) ) { if( !parser.anyChar() ) throw exception("unclosed attribute value"); } String value = parser.textFrom(start); value = decode(value); parser.match(quote); Map.Entry<String,String> attribute = new AbstractMap.SimpleImmutableEntry<String,String>(name,value); return parser.success(attribute); } private String name() { int start = parser.currentIndex(); if( !matchNameChar() ) return null; while( matchNameChar() ); return parser.textFrom(start); } private boolean matchNameChar() { return parser.inCharRange('a','z') || parser.inCharRange('A','Z') || parser.inCharRange('0','9') || parser.anyOf("_.-:") ; } private void required(String s) throws ParseException { if( !parser.match(s) ) exception("'"+s+"' expected"); } private void spaces() throws ParseException { while( matchSpace() || matchComment() ); } private boolean matchComment() throws ParseException { if( !parser.match("<!--") ) return false; while( !parser.match("-->") ) { if( !parser.anyChar() ) throw exception("unclosed comment"); } return true; } private boolean matchSpace() { return parser.anyOf(" \t\r\n"); } private static final Pattern dec = Pattern.compile("&#(\\d+);"); private static String decode(String s) { Matcher m = dec.matcher(s); StringBuffer sb = new StringBuffer(); while( m.find() ) { String d = m.group(1); int i = Integer.parseInt(d); d = String.valueOf((char)i); m.appendReplacement(sb, d); } m.appendTail(sb); s = sb.toString(); s = s.replace("<","<"); s = s.replace(">",">"); s = s.replace(""","\""); s = s.replace("'","'"); s = s.replace("&","&"); return s; } }