Mercurial Hosting > luan
view src/luan/modules/parsers/Html.java @ 1181:51d1342e25ad
luanhost password handling
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Tue, 20 Feb 2018 19:50:30 -0700 |
parents | 88b5b81cad4a |
children | 9fa8b8389578 |
line wrap: on
line source
package luan.modules.parsers; import java.util.List; import java.util.ArrayList; import java.util.Set; import java.util.HashSet; import luan.LuanTable; import luan.lib.parser.Parser; import luan.lib.parser.ParseException; public final class Html { public static LuanTable toList(String text,LuanTable containerTagsTbl) throws ParseException { return new Html(text,containerTagsTbl).parse(); } private final Parser parser; private final Set<String> containerTags = new HashSet<String>(); private Html(String text,LuanTable containerTagsTbl) { this.parser = new Parser(text); for( Object v : containerTagsTbl.asList() ) { containerTags.add((String)v); } } private LuanTable parse() throws ParseException { List list = new ArrayList(); StringBuilder sb = new StringBuilder(); while( !parser.endOfInput() ) { if( parser.test('<') ) { LuanTable tbl = parseTag(); if( tbl != null ) { String tagName = (String)tbl.rawGet("name"); if( containerTags.contains(tagName) ) { LuanTable container = parseContainer(tbl); if( container != null ) tbl = container; } if( tbl != null || (tbl = parseComment()) != null || (tbl = parseCdata()) != null ) { if( sb.length() > 0 ) { list.add(sb.toString()); sb.setLength(0); } list.add(tbl); continue; } } } sb.append( parser.currentChar() ); parser.anyChar(); } if( sb.length() > 0 ) list.add(sb.toString()); return new LuanTable(list); } private LuanTable parseComment() { parser.begin(); if( !parser.match("<!--") ) return parser.failure(null); int start = parser.currentIndex(); while( !parser.test("-->") ) { if( !parser.anyChar() ) return parser.failure(null); } String text = parser.textFrom(start); LuanTable tbl = new LuanTable(); tbl.rawPut("type","comment"); tbl.rawPut("text",text); return parser.success(tbl); } private LuanTable parseCdata() { parser.begin(); if( !parser.match("<![CDATA[") ) return parser.failure(null); int start = parser.currentIndex(); while( !parser.test("]]>") ) { if( !parser.anyChar() ) return parser.failure(null); } String text = parser.textFrom(start); LuanTable tbl = new LuanTable(); tbl.rawPut("type","cdata"); tbl.rawPut("text",text); return parser.success(tbl); } private LuanTable parseContainer(LuanTable tag) { String endTagName = '/' + (String)tag.rawGet("name"); int start = parser.begin(); int end; while(true) { if( parser.test('<') ) { end = parser.currentIndex(); LuanTable tag2 = parseTag(); String s = (String)tag2.rawGet("name"); if( s.equals(endTagName) ) break; } if( !parser.anyChar() ) return parser.failure(null); } String text = parser.text.substring(start,end); LuanTable tbl = new LuanTable(); tbl.rawPut("type","container"); tbl.rawPut("tag",tag); tbl.rawPut("text",text); return parser.success(tbl); } private LuanTable parseTag() { parser.begin(); if( !parser.match('<') ) return parser.failure(null); int start = parser.currentIndex(); parser.match('/'); if( !matchNameChar() ) return parser.failure(null); while( matchNameChar() ); String name = parser.textFrom(start).toLowerCase(); LuanTable attributes = new LuanTable(); String attrName; while( (attrName = parseAttrName()) != null ) { String attrValue = parseAttrValue(); attributes.rawPut( attrName, attrValue!=null ? attrValue : true ); } while( matchSpace() ); boolean isEmpty = parser.match('/'); if( !parser.match('>') ) return parser.failure(null); LuanTable tbl = new LuanTable(); tbl.rawPut("type","tag"); tbl.rawPut("name",name); tbl.rawPut("attributes",attributes); tbl.rawPut("is_empty",isEmpty); return parser.success(tbl); } private String parseAttrName() { parser.begin(); if( !matchSpace() ) return parser.failure(null); while( matchSpace() ); int start = parser.currentIndex(); if( !matchNameChar() ) return parser.failure(null); while( matchNameChar() ); String name = parser.textFrom(start); return parser.success(name); } private String parseAttrValue() { parser.begin(); while( matchSpace() ); if( !parser.match('=') ) return parser.failure(null); while( matchSpace() ); if( parser.anyOf("\"'") ) { char quote = parser.lastChar(); int start = parser.currentIndex(); while( !parser.test(quote) ) { if( !parser.anyChar() ) return parser.failure(null); } String value = parser.textFrom(start); parser.match(quote); return parser.success(value); } int start = parser.currentIndex(); if( !matchValueChar() ) return parser.failure(null); while( matchValueChar() ); String value = parser.textFrom(start); return parser.success(value); } private boolean matchNameChar() { return parser.inCharRange('a','z') || parser.inCharRange('A','Z') || parser.inCharRange('0','9') || parser.anyOf("_.-:") ; } private boolean matchValueChar() { return parser.noneOf(" \t\r\n\"'>/="); } private boolean matchSpace() { return parser.anyOf(" \t\r\n"); } }