Mercurial Hosting > luan
diff src/goodjava/html/Html.java @ 1712:36c28be6d432
improve html and bbcode
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 14 Jul 2022 22:14:21 -0600 |
parents | |
children | 31a82b0d0a87 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/html/Html.java Thu Jul 14 22:14:21 2022 -0600 @@ -0,0 +1,288 @@ +package goodjava.html; + +import java.util.List; +import java.util.ArrayList; +import java.util.Set; +import java.util.HashSet; +import java.util.Map; +import java.util.HashMap; +import java.util.Collections; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import goodjava.parser.Parser; + + +public final class Html { + + private static final Pattern entityPtn = Pattern.compile( + "&(#?[0-9a-zA-Z]+;)" + ); + + public static String encode(String s) { + //s = s.replace("&","&"); + s = entityPtn.matcher(s).replaceAll("&$1"); + s = s.replace("<","<"); + s = s.replace(">",">"); + s = s.replace("\"","""); + return s; + } + + private static final Pattern entityNumPtn = Pattern.compile( + "&#(\\d+);" + ); + + public static String decode(String s) { + Matcher m = entityNumPtn.matcher(s); + if( m.find() ) { + StringBuffer buf = new StringBuffer(); + do { + String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))}); + m.appendReplacement(buf,entity); + } while( m.find() ); + m.appendTail(buf); + s = buf.toString(); + } + s = s.replace(" "," "); + s = s.replace(""","\""); + s = s.replace(">",">"); + s = s.replace("<","<"); + s = s.replace("&","&"); + return s; + } + + + public static final class Comment { + public final String text; + + private Comment(String text) { + this.text = text; + } + } + + public static final class CData { + public final String text; + + private CData(String text) { + this.text = text; + } + } + + public static final class Tag { + public final String name; + public final Map<String,Object> attributes; + public final boolean isEmpty; + public final String raw; + + private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw) { + this.name = name; + this.attributes = attributes; + this.isEmpty = isEmpty; + this.raw = raw; + } + } + + public static final class Container { + public final Tag tag; + public final String text; + + private Container(Tag tag,String text) { + this.tag = tag; + this.text = text; + } + } + + private static Set<String> defaultContainerTags = new HashSet<String>(); + static { + Collections.addAll( defaultContainerTags, "script", "style", "textarea" ); + } + + public static List parse(String text) { + return parse(text,defaultContainerTags); + } + + public static List parse(String text,Set<String> containerTags) { + return new Html(text,containerTags).parse(); + } + + private final Parser parser; + private final Set<String> containerTags; + + private Html(String text,Set<String> containerTags) { + this.parser = new Parser(text); + this.containerTags = containerTags; + } + + private List parse() { + List list = new ArrayList(); + StringBuilder sb = new StringBuilder(); + while( !parser.endOfInput() ) { + if( parser.test('<') ) { + Tag tag = parseTag(); + if( tag != null ) { + Object el = tag; + String tagName = tag.name; + if( containerTags.contains(tagName) ) { + Container container = parseContainer(tag); + if( container != null ) + el = container; + } + if( el != null + || (el = parseComment()) != null + || (el = parseCdata()) != null + ) { + add(list,sb); + list.add(el); + continue; + } + } + } + sb.append( parser.currentChar() ); + parser.anyChar(); + } + add(list,sb); + return list; + } + + private static void add(List list,StringBuilder sb) { + if( sb.length() > 0 ) { + list.add(decode(sb.toString())); + sb.setLength(0); + } + } + + private Comment parseComment() { + parser.begin(); + if( !parser.match("<!--") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( !parser.test("-->") ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.textFrom(start); + Comment comment = new Comment(text); + return parser.success(comment); + } + + private CData parseCdata() { + parser.begin(); + if( !parser.match("<![CDATA[") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( !parser.test("]]>") ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.textFrom(start); + CData cdata = new CData(text); + return parser.success(cdata); + } + + private Container parseContainer(Tag tag) { + String endTagName = '/' + tag.name; + int start = parser.begin(); + int end; + while(true) { + if( parser.test('<') ) { + end = parser.currentIndex(); + Tag tag2 = parseTag(); + if( tag2.name.equals(endTagName) ) + break; + } + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.text.substring(start,end); + Container container = new Container(tag,text); + return parser.success(container); + } + + private Tag parseTag() { + int tagStart = parser.begin(); + if( !parser.match('<') ) + return parser.failure(null); + int start = parser.currentIndex(); + parser.match('/'); + if( !matchNameChar() ) + return parser.failure(null); + while( matchNameChar() ); + String name = parser.textFrom(start).toLowerCase(); + Map<String,Object> attributes = new HashMap<String,Object>(); + String attrName; + while( (attrName = parseAttrName()) != null ) { + String attrValue = parseAttrValue(); + attributes.put( attrName, attrValue!=null ? attrValue : true ); +/* + if( attrName.equals("style") && attrValue!=null ) { + LuanTable style = Css.style(attrValue); + if( style!=null ) + tbl.rawPut("style",style); + } +*/ + } + while( matchSpace() ); + boolean isEmpty = parser.match('/'); + if( !parser.match('>') ) + return parser.failure(null); + String raw = parser.textFrom(tagStart); + Tag tag = new Tag(name,attributes,isEmpty,raw); + return parser.success(tag); + } + + private String parseAttrName() { + parser.begin(); + if( !matchSpace() ) + return parser.failure(null); + while( matchSpace() ); + int start = parser.currentIndex(); + if( !matchNameChar() ) + return parser.failure(null); + while( matchNameChar() ); + String name = parser.textFrom(start).toLowerCase(); + return parser.success(name); + } + + private String parseAttrValue() { + parser.begin(); + while( matchSpace() ); + if( !parser.match('=') ) + return parser.failure(null); + while( matchSpace() ); + if( parser.anyOf("\"'") ) { + char quote = parser.lastChar(); + int start = parser.currentIndex(); + while( !parser.test(quote) ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String value = parser.textFrom(start); + parser.match(quote); + value = decode(value); + return parser.success(value); + } + int start = parser.currentIndex(); + if( !matchValueChar() ) + return parser.failure(null); + while( matchValueChar() ); + String value = parser.textFrom(start); + value = decode(value); + return parser.success(value); + } + + private boolean matchNameChar() { + return parser.inCharRange('a','z') + || parser.inCharRange('A','Z') + || parser.inCharRange('0','9') + || parser.anyOf("_.-:") + ; + } + + private boolean matchValueChar() { + return parser.noneOf(" \t\r\n\"'>/="); + } + + private boolean matchSpace() { + return parser.anyOf(" \t\r\n"); + } + +}