Mercurial Hosting > luan
annotate src/goodjava/xml/XmlParser.java @ 1732:8a6e48371c66
content type woff
| author | Franklin Schmidt <fschmidt@gmail.com> | 
|---|---|
| date | Mon, 12 Sep 2022 19:13:13 -0400 | 
| parents | 0246add03d11 | 
| children | a8c685a894b4 | 
| rev | line source | 
|---|---|
| 1466 | 1 package goodjava.xml; | 
| 2 | |
| 3 import java.util.Map; | |
| 4 import java.util.AbstractMap; | |
| 5 import java.util.LinkedHashMap; | |
| 6 import java.util.List; | |
| 7 import java.util.ArrayList; | |
| 8 import goodjava.parser.Parser; | |
| 9 import goodjava.parser.ParseException; | |
| 10 | |
| 11 | |
| 12 public final class XmlParser { | |
| 13 | |
| 14 public static XmlElement parse(String text) throws ParseException { | |
| 15 return new XmlParser(text).parse(); | |
| 16 } | |
| 17 | |
| 18 private final Parser parser; | |
| 19 | |
| 20 private XmlParser(String text) { | |
| 21 this.parser = new Parser(text); | |
| 22 } | |
| 23 | |
| 24 private ParseException exception(String msg) { | |
| 25 return new ParseException(parser,msg); | |
| 26 } | |
| 27 | |
| 28 private XmlElement parse() throws ParseException { | |
| 29 spaces(); | |
| 30 prolog(); | |
| 31 spaces(); | |
| 32 XmlElement element = element(); | |
| 33 spaces(); | |
| 34 if( !parser.endOfInput() ) | |
| 35 throw exception("unexpected text"); | |
| 
1723
 
0246add03d11
dont allow empty xml
 
Franklin Schmidt <fschmidt@gmail.com> 
parents: 
1468 
diff
changeset
 | 
36 if( element==null ) | 
| 
 
0246add03d11
dont allow empty xml
 
Franklin Schmidt <fschmidt@gmail.com> 
parents: 
1468 
diff
changeset
 | 
37 throw exception("empty document not allowed"); | 
| 1466 | 38 return element; | 
| 39 } | |
| 40 | |
| 41 private void prolog() throws ParseException { | |
| 42 if( !parser.match("<?xml") ) | |
| 43 return; | |
| 44 while( attribute() != null ); | |
| 45 spaces(); | |
| 46 required("?>"); | |
| 47 } | |
| 48 | |
| 49 private XmlElement element() throws ParseException { | |
| 50 parser.begin(); | |
| 51 if( !parser.match('<') || parser.test('/') ) | |
| 52 return parser.failure(null); | |
| 53 //spaces(); | |
| 54 String name = name(); | |
| 55 if( name==null ) | |
| 56 throw exception("element name not found"); | |
| 57 Map<String,String> attributes = new LinkedHashMap<String,String>(); | |
| 58 Map.Entry<String,String> attribute; | |
| 59 while( (attribute=attribute()) != null ) { | |
| 60 attributes.put(attribute.getKey(),attribute.getValue()); | |
| 61 } | |
| 62 spaces(); | |
| 1468 | 63 if( parser.match("/>") ) { | 
| 64 XmlElement element = new XmlElement(name,attributes); | |
| 65 return parser.success(element); | |
| 66 } | |
| 1466 | 67 required(">"); | 
| 68 String s = string(name); | |
| 69 if( s != null ) { | |
| 70 XmlElement element = new XmlElement(name,attributes,s); | |
| 71 return parser.success(element); | |
| 72 } | |
| 73 List<XmlElement> elements = elements(name); | |
| 74 if( elements != null ) { | |
| 75 XmlElement element = new XmlElement(name,attributes,elements.toArray(new XmlElement[0])); | |
| 76 return parser.success(element); | |
| 77 } | |
| 78 throw exception("bad element"); | |
| 79 } | |
| 80 | |
| 81 private String string(String name) throws ParseException { | |
| 82 int start = parser.begin(); | |
| 83 while( parser.noneOf("<") ); | |
| 84 String s = parser.textFrom(start); | |
| 1468 | 85 s = decode(s); | 
| 1466 | 86 if( !endTag(name) ) | 
| 87 return parser.failure(null); | |
| 88 return parser.success(s); | |
| 89 } | |
| 90 | |
| 91 private List<XmlElement> elements(String name) throws ParseException { | |
| 92 parser.begin(); | |
| 93 List<XmlElement> elements = new ArrayList<XmlElement>(); | |
| 94 spaces(); | |
| 95 XmlElement element; | |
| 96 while( (element=element()) != null ) { | |
| 97 elements.add(element); | |
| 98 spaces(); | |
| 99 } | |
| 100 if( !endTag(name) ) | |
| 101 return parser.failure(null); | |
| 102 return parser.success(elements); | |
| 103 } | |
| 104 | |
| 105 private boolean endTag(String name) throws ParseException { | |
| 106 parser.begin(); | |
| 107 if( !parser.match("</") || !parser.match(name) ) | |
| 108 return parser.failure(); | |
| 109 spaces(); | |
| 110 if( !parser.match('>') ) | |
| 111 return parser.failure(); | |
| 112 return parser.success(); | |
| 113 } | |
| 114 | |
| 115 private Map.Entry<String,String> attribute() throws ParseException { | |
| 116 parser.begin(); | |
| 117 if( !matchSpace() ) | |
| 118 return parser.failure(null); | |
| 119 spaces(); | |
| 120 String name = name(); | |
| 121 if( name==null ) | |
| 122 return parser.failure(null); | |
| 123 spaces(); | |
| 124 required("="); | |
| 125 spaces(); | |
| 126 if( !parser.anyOf("\"'") ) | |
| 127 throw exception("quote expected"); | |
| 128 char quote = parser.lastChar(); | |
| 129 int start = parser.currentIndex(); | |
| 130 while( !parser.test(quote) ) { | |
| 131 if( !parser.anyChar() ) | |
| 132 throw exception("unclosed attribute value"); | |
| 133 } | |
| 134 String value = parser.textFrom(start); | |
| 1468 | 135 value = decode(value); | 
| 1466 | 136 parser.match(quote); | 
| 137 Map.Entry<String,String> attribute = new AbstractMap.SimpleImmutableEntry<String,String>(name,value); | |
| 138 return parser.success(attribute); | |
| 139 } | |
| 140 | |
| 141 private String name() { | |
| 142 int start = parser.currentIndex(); | |
| 143 if( !matchNameChar() ) | |
| 144 return null; | |
| 145 while( matchNameChar() ); | |
| 146 return parser.textFrom(start); | |
| 147 } | |
| 148 | |
| 149 private boolean matchNameChar() { | |
| 150 return parser.inCharRange('a','z') | |
| 151 || parser.inCharRange('A','Z') | |
| 152 || parser.inCharRange('0','9') | |
| 153 || parser.anyOf("_.-:") | |
| 154 ; | |
| 155 } | |
| 156 | |
| 157 private void required(String s) throws ParseException { | |
| 158 if( !parser.match(s) ) | |
| 159 exception("'"+s+"' expected"); | |
| 160 } | |
| 161 | |
| 162 private void spaces() throws ParseException { | |
| 163 while( matchSpace() || matchComment() ); | |
| 164 } | |
| 165 | |
| 166 private boolean matchComment() throws ParseException { | |
| 167 if( !parser.match("<!--") ) | |
| 168 return false; | |
| 169 while( !parser.match("-->") ) { | |
| 170 if( !parser.anyChar() ) | |
| 171 throw exception("unclosed comment"); | |
| 172 } | |
| 173 return true; | |
| 174 } | |
| 175 | |
| 176 private boolean matchSpace() { | |
| 177 return parser.anyOf(" \t\r\n"); | |
| 178 } | |
| 179 | |
| 1468 | 180 private static String decode(String s) { | 
| 181 s = s.replace("<","<"); | |
| 182 s = s.replace(">",">"); | |
| 183 s = s.replace(""","\""); | |
| 184 s = s.replace("'","'"); | |
| 185 s = s.replace("&","&"); | |
| 186 return s; | |
| 187 } | |
| 188 | |
| 1466 | 189 } | 
