Mercurial Hosting > luan
changeset 1800:a045f30fa67d
html fix
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 08 Mar 2024 10:24:00 -0700 |
parents | 1578324d2aac |
children | 32e77b071e09 |
files | src/goodjava/html/Html.java src/luan/modules/parsers/Css.java src/luan/modules/parsers/Html.java |
diffstat | 3 files changed, 10 insertions(+), 277 deletions(-) [+] |
line wrap: on
line diff
--- a/src/goodjava/html/Html.java Thu Feb 22 08:34:55 2024 -0700 +++ b/src/goodjava/html/Html.java Fri Mar 08 10:24:00 2024 -0700 @@ -132,10 +132,14 @@ if( container != null ) el = container; } - if( el != null - || (el = parseComment()) != null - || (el = parseCdata()) != null - ) { + add(list,sb); + list.add(el); + continue; + } else { + Object el = parseComment(); + if( el == null ) + el = parseCdata(); + if( el != null ) { add(list,sb); list.add(el); continue; @@ -166,6 +170,7 @@ return parser.failure(null); } String text = parser.textFrom(start); + parser.match("-->"); Comment comment = new Comment(text); return parser.success(comment); } @@ -180,6 +185,7 @@ return parser.failure(null); } String text = parser.textFrom(start); + parser.match("]]>"); CData cdata = new CData(text); return parser.success(cdata); }
--- a/src/luan/modules/parsers/Css.java Thu Feb 22 08:34:55 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,62 +0,0 @@ -package luan.modules.parsers; - -import luan.Luan; -import luan.LuanTable; -import luan.LuanException; -import goodjava.parser.Parser; - - -public final class Css { - - public static LuanTable style(String text) { - try { - return new Css(text).parseStyle(); - } catch(LuanException e) { - throw new RuntimeException(e); - } - } - - private final Parser parser; - - private Css(String text) { - this.parser = new Parser(text); - } - - private LuanTable parseStyle() throws LuanException { - LuanTable tbl = new LuanTable(); - while( matchSpace() ); - while( !parser.endOfInput() ) { - int start = parser.currentIndex(); - if( !matchPropertyChar() ) - return null; - while( matchPropertyChar() ); - String prop = parser.textFrom(start).toLowerCase(); - - while( matchSpace() ); - if( !parser.match(':') ) - return null; - - start = parser.currentIndex(); - while( !parser.endOfInput() && parser.noneOf(";") ); - String val = parser.textFrom(start).trim(); - - tbl.rawPut(prop,val); - parser.match(';'); - while( matchSpace() ); - } - return tbl; - } - - private boolean matchPropertyChar() { - return parser.inCharRange('a','z') - || parser.inCharRange('A','Z') - || parser.inCharRange('0','9') - || parser.anyOf("_-") - ; - } - - private boolean matchSpace() { - return parser.anyOf(" \t\r\n"); - } - -}
--- a/src/luan/modules/parsers/Html.java Thu Feb 22 08:34:55 2024 -0700 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,211 +0,0 @@ -package luan.modules.parsers; - -import java.util.List; -import java.util.ArrayList; -import java.util.Set; -import java.util.HashSet; -import luan.Luan; -import luan.LuanTable; -import luan.LuanException; -import goodjava.parser.Parser; - - -public final class Html { - - public static LuanTable toList(String text,LuanTable containerTagsTbl) { - try { - return new Html(text,containerTagsTbl).parse(); - } catch(LuanException e) { - throw new RuntimeException(e); - } - } - - private final Parser parser; - private final Set<String> containerTags = new HashSet<String>(); - - private Html(String text,LuanTable containerTagsTbl) { - this.parser = new Parser(text); - for( Object v : containerTagsTbl.asList() ) { - containerTags.add((String)v); - } - } - - private LuanTable parse() throws LuanException { - List list = new ArrayList(); - StringBuilder sb = new StringBuilder(); - while( !parser.endOfInput() ) { - if( parser.test('<') ) { - LuanTable tbl = parseTag(); - if( tbl != null ) { - String tagName = (String)tbl.rawGet("name"); - if( containerTags.contains(tagName) ) { - LuanTable container = parseContainer(tbl); - if( container != null ) - tbl = container; - } - if( tbl != null - || (tbl = parseComment()) != null - || (tbl = parseCdata()) != null - ) { - if( sb.length() > 0 ) { - list.add(sb.toString()); - sb.setLength(0); - } - list.add(tbl); - continue; - } - } - } - sb.append( parser.currentChar() ); - parser.anyChar(); - } - if( sb.length() > 0 ) - list.add(sb.toString()); - return new LuanTable(list); - } - - private LuanTable parseComment() throws LuanException { - parser.begin(); - if( !parser.match("<!--") ) - return parser.failure(null); - int start = parser.currentIndex(); - while( !parser.test("-->") ) { - if( !parser.anyChar() ) - return parser.failure(null); - } - String text = parser.textFrom(start); - LuanTable tbl = new LuanTable(); - tbl.rawPut("type","comment"); - tbl.rawPut("text",text); - return parser.success(tbl); - } - - private LuanTable parseCdata() throws LuanException { - parser.begin(); - if( !parser.match("<![CDATA[") ) - return parser.failure(null); - int start = parser.currentIndex(); - while( !parser.test("]]>") ) { - if( !parser.anyChar() ) - return parser.failure(null); - } - String text = parser.textFrom(start); - LuanTable tbl = new LuanTable(); - tbl.rawPut("type","cdata"); - tbl.rawPut("text",text); - return parser.success(tbl); - } - - private LuanTable parseContainer(LuanTable tag) throws LuanException { - String endTagName = '/' + (String)tag.rawGet("name"); - int start = parser.begin(); - int end; - while(true) { - if( parser.test('<') ) { - end = parser.currentIndex(); - LuanTable tag2 = parseTag(); - String s = (String)tag2.rawGet("name"); - if( s.equals(endTagName) ) - break; - } - if( !parser.anyChar() ) - return parser.failure(null); - } - String text = parser.text.substring(start,end); - LuanTable tbl = new LuanTable(); - tbl.rawPut("type","container"); - tbl.rawPut("tag",tag); - tbl.rawPut("text",text); - return parser.success(tbl); - } - - private LuanTable parseTag() throws LuanException { - LuanTable tbl = new LuanTable(); - tbl.rawPut("type","tag"); - int tagStart = parser.begin(); - if( !parser.match('<') ) - return parser.failure(null); - int start = parser.currentIndex(); - parser.match('/'); - if( !matchNameChar() ) - return parser.failure(null); - while( matchNameChar() ); - String name = parser.textFrom(start).toLowerCase(); - tbl.rawPut("name",name); - LuanTable attributes = new LuanTable(); - tbl.rawPut("attributes",attributes); - String attrName; - while( (attrName = parseAttrName()) != null ) { - String attrValue = parseAttrValue(); - attributes.rawPut( attrName, attrValue!=null ? attrValue : true ); - if( attrName.equals("style") && attrValue!=null ) { - LuanTable style = Css.style(attrValue); - if( style!=null ) - tbl.rawPut("style",style); - } - } - while( matchSpace() ); - boolean isEmpty = parser.match('/'); - tbl.rawPut("is_empty",isEmpty); - if( !parser.match('>') ) - return parser.failure(null); - String raw = parser.textFrom(tagStart); - tbl.rawPut("raw",raw); - return parser.success(tbl); - } - - private String parseAttrName() { - parser.begin(); - if( !matchSpace() ) - return parser.failure(null); - while( matchSpace() ); - int start = parser.currentIndex(); - if( !matchNameChar() ) - return parser.failure(null); - while( matchNameChar() ); - String name = parser.textFrom(start).toLowerCase(); - return parser.success(name); - } - - private String parseAttrValue() { - parser.begin(); - while( matchSpace() ); - if( !parser.match('=') ) - return parser.failure(null); - while( matchSpace() ); - if( parser.anyOf("\"'") ) { - char quote = parser.lastChar(); - int start = parser.currentIndex(); - while( !parser.test(quote) ) { - if( !parser.anyChar() ) - return parser.failure(null); - } - String value = parser.textFrom(start); - parser.match(quote); - return parser.success(value); - } - int start = parser.currentIndex(); - if( !matchValueChar() ) - return parser.failure(null); - while( matchValueChar() ); - String value = parser.textFrom(start); - return parser.success(value); - } - - private boolean matchNameChar() { - return parser.inCharRange('a','z') - || parser.inCharRange('A','Z') - || parser.inCharRange('0','9') - || parser.anyOf("_.-:") - ; - } - - private boolean matchValueChar() { - return parser.noneOf(" \t\r\n\"'>/="); - } - - private boolean matchSpace() { - return parser.anyOf(" \t\r\n"); - } - -}