Mercurial Hosting > luan
changeset 1712:36c28be6d432
improve html and bbcode
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 14 Jul 2022 22:14:21 -0600 |
parents | 05d14db623b6 |
children | 4d70e9543ef2 |
files | src/goodjava/bbcode/BBCode.java src/goodjava/html/Html.java src/luan/modules/Html.luan src/luan/modules/HtmlLuan.java src/luan/modules/Parsers.luan |
diffstat | 5 files changed, 359 insertions(+), 71 deletions(-) [+] |
line wrap: on
line diff
--- a/src/goodjava/bbcode/BBCode.java Wed Jul 13 21:50:41 2022 -0600 +++ b/src/goodjava/bbcode/BBCode.java Thu Jul 14 22:14:21 2022 -0600 @@ -11,6 +11,15 @@ public final class BBCode { + private static final Pattern tagPtn = Pattern.compile( + "\\[(/?[a-zA-Z]+(=[^ \\n\\t\\[\\]]*)?)\\]" + ); + + public static String encode(String s) { + return tagPtn.matcher(s).replaceAll("[brackets]$1[/brackets]"); + } + + public final class Element { public final String name; public final String param; @@ -216,7 +225,7 @@ parser.matchIgnoreCase("s"); if( !parser.matchIgnoreCase("://") ) return parser.failure(null); - while( parser.noneOf(" []'") ); + while( parser.noneOf(" \n\t[]") ); String url = parser.textFrom(start); while( parser.match(' ') ); return parser.success(url); @@ -233,7 +242,7 @@ end = "[/code]"; } else if( parser.match('=') ) { int start = parser.currentIndex(); - while( parser.noneOf("[]") ); + while( parser.noneOf("[]\n") ); param = parser.textFrom(start); if( !parser.match(']') ) return parser.failure(null); @@ -347,8 +356,8 @@ name = null; } else if( parser.match('=') ) { int start = parser.currentIndex(); - while( parser.noneOf("[]") ); - name = parser.textFrom(start).trim(); + while( parser.noneOf("[]\n") ); + name = parser.textFrom(start); if( !parser.match(']') ) return parser.failure(null); } else
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/goodjava/html/Html.java Thu Jul 14 22:14:21 2022 -0600 @@ -0,0 +1,288 @@ +package goodjava.html; + +import java.util.List; +import java.util.ArrayList; +import java.util.Set; +import java.util.HashSet; +import java.util.Map; +import java.util.HashMap; +import java.util.Collections; +import java.util.regex.Pattern; +import java.util.regex.Matcher; +import goodjava.parser.Parser; + + +public final class Html { + + private static final Pattern entityPtn = Pattern.compile( + "&(#?[0-9a-zA-Z]+;)" + ); + + public static String encode(String s) { + //s = s.replace("&","&"); + s = entityPtn.matcher(s).replaceAll("&$1"); + s = s.replace("<","<"); + s = s.replace(">",">"); + s = s.replace("\"","""); + return s; + } + + private static final Pattern entityNumPtn = Pattern.compile( + "&#(\\d+);" + ); + + public static String decode(String s) { + Matcher m = entityNumPtn.matcher(s); + if( m.find() ) { + StringBuffer buf = new StringBuffer(); + do { + String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))}); + m.appendReplacement(buf,entity); + } while( m.find() ); + m.appendTail(buf); + s = buf.toString(); + } + s = s.replace(" "," "); + s = s.replace(""","\""); + s = s.replace(">",">"); + s = s.replace("<","<"); + s = s.replace("&","&"); + return s; + } + + + public static final class Comment { + public final String text; + + private Comment(String text) { + this.text = text; + } + } + + public static final class CData { + public final String text; + + private CData(String text) { + this.text = text; + } + } + + public static final class Tag { + public final String name; + public final Map<String,Object> attributes; + public final boolean isEmpty; + public final String raw; + + private Tag(String name,Map<String,Object> attributes,boolean isEmpty,String raw) { + this.name = name; + this.attributes = attributes; + this.isEmpty = isEmpty; + this.raw = raw; + } + } + + public static final class Container { + public final Tag tag; + public final String text; + + private Container(Tag tag,String text) { + this.tag = tag; + this.text = text; + } + } + + private static Set<String> defaultContainerTags = new HashSet<String>(); + static { + Collections.addAll( defaultContainerTags, "script", "style", "textarea" ); + } + + public static List parse(String text) { + return parse(text,defaultContainerTags); + } + + public static List parse(String text,Set<String> containerTags) { + return new Html(text,containerTags).parse(); + } + + private final Parser parser; + private final Set<String> containerTags; + + private Html(String text,Set<String> containerTags) { + this.parser = new Parser(text); + this.containerTags = containerTags; + } + + private List parse() { + List list = new ArrayList(); + StringBuilder sb = new StringBuilder(); + while( !parser.endOfInput() ) { + if( parser.test('<') ) { + Tag tag = parseTag(); + if( tag != null ) { + Object el = tag; + String tagName = tag.name; + if( containerTags.contains(tagName) ) { + Container container = parseContainer(tag); + if( container != null ) + el = container; + } + if( el != null + || (el = parseComment()) != null + || (el = parseCdata()) != null + ) { + add(list,sb); + list.add(el); + continue; + } + } + } + sb.append( parser.currentChar() ); + parser.anyChar(); + } + add(list,sb); + return list; + } + + private static void add(List list,StringBuilder sb) { + if( sb.length() > 0 ) { + list.add(decode(sb.toString())); + sb.setLength(0); + } + } + + private Comment parseComment() { + parser.begin(); + if( !parser.match("<!--") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( !parser.test("-->") ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.textFrom(start); + Comment comment = new Comment(text); + return parser.success(comment); + } + + private CData parseCdata() { + parser.begin(); + if( !parser.match("<![CDATA[") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( !parser.test("]]>") ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.textFrom(start); + CData cdata = new CData(text); + return parser.success(cdata); + } + + private Container parseContainer(Tag tag) { + String endTagName = '/' + tag.name; + int start = parser.begin(); + int end; + while(true) { + if( parser.test('<') ) { + end = parser.currentIndex(); + Tag tag2 = parseTag(); + if( tag2.name.equals(endTagName) ) + break; + } + if( !parser.anyChar() ) + return parser.failure(null); + } + String text = parser.text.substring(start,end); + Container container = new Container(tag,text); + return parser.success(container); + } + + private Tag parseTag() { + int tagStart = parser.begin(); + if( !parser.match('<') ) + return parser.failure(null); + int start = parser.currentIndex(); + parser.match('/'); + if( !matchNameChar() ) + return parser.failure(null); + while( matchNameChar() ); + String name = parser.textFrom(start).toLowerCase(); + Map<String,Object> attributes = new HashMap<String,Object>(); + String attrName; + while( (attrName = parseAttrName()) != null ) { + String attrValue = parseAttrValue(); + attributes.put( attrName, attrValue!=null ? attrValue : true ); +/* + if( attrName.equals("style") && attrValue!=null ) { + LuanTable style = Css.style(attrValue); + if( style!=null ) + tbl.rawPut("style",style); + } +*/ + } + while( matchSpace() ); + boolean isEmpty = parser.match('/'); + if( !parser.match('>') ) + return parser.failure(null); + String raw = parser.textFrom(tagStart); + Tag tag = new Tag(name,attributes,isEmpty,raw); + return parser.success(tag); + } + + private String parseAttrName() { + parser.begin(); + if( !matchSpace() ) + return parser.failure(null); + while( matchSpace() ); + int start = parser.currentIndex(); + if( !matchNameChar() ) + return parser.failure(null); + while( matchNameChar() ); + String name = parser.textFrom(start).toLowerCase(); + return parser.success(name); + } + + private String parseAttrValue() { + parser.begin(); + while( matchSpace() ); + if( !parser.match('=') ) + return parser.failure(null); + while( matchSpace() ); + if( parser.anyOf("\"'") ) { + char quote = parser.lastChar(); + int start = parser.currentIndex(); + while( !parser.test(quote) ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String value = parser.textFrom(start); + parser.match(quote); + value = decode(value); + return parser.success(value); + } + int start = parser.currentIndex(); + if( !matchValueChar() ) + return parser.failure(null); + while( matchValueChar() ); + String value = parser.textFrom(start); + value = decode(value); + return parser.success(value); + } + + private boolean matchNameChar() { + return parser.inCharRange('a','z') + || parser.inCharRange('A','Z') + || parser.inCharRange('0','9') + || parser.anyOf("_.-:") + ; + } + + private boolean matchValueChar() { + return parser.noneOf(" \t\r\n\"'>/="); + } + + private boolean matchSpace() { + return parser.anyOf(" \t\r\n"); + } + +}
--- a/src/luan/modules/Html.luan Wed Jul 13 21:50:41 2022 -0600 +++ b/src/luan/modules/Html.luan Thu Jul 14 22:14:21 2022 -0600 @@ -1,6 +1,5 @@ require "java" local HtmlLuan = require "java:luan.modules.HtmlLuan" -local HtmlParser = require "java:luan.modules.parsers.Html" local URLEncoder = require "java:java.net.URLEncoder" local URLDecoder = require "java:java.net.URLDecoder" local JsonToString = require "java:goodjava.json.JsonToString" @@ -16,16 +15,14 @@ local Html = {} Html.decode = HtmlLuan.decode -Html.encode = HtmlLuan.encode +local encode = HtmlLuan.encode +Html.encode = encode Html.javascript_encode = JsonToString.javascriptEncode -local quote = HtmlLuan.quote -Html.quote = quote - function Html.parse(text,container_tags) text or error "text required" container_tags = container_tags or {"script","style","textarea"} - return HtmlParser.toList(text,container_tags) + return HtmlLuan.parse(text,container_tags) end function Html.url_encode(s) @@ -41,7 +38,7 @@ for name, value in pairs(tag.attributes) do %> <%= name %><% if value ~= true then - %>=<%= quote(value) %><% + %>="<%= encode(value) %>"<% end end if tag.is_empty then @@ -55,7 +52,7 @@ for _, obj in ipairs(list) do local tp = type(obj) if tp == "string" then - %><%= obj %><% + %><%= encode(obj) %><% elseif tp == "table" then tp = obj.type if tp == nil then
--- a/src/luan/modules/HtmlLuan.java Wed Jul 13 21:50:41 2022 -0600 +++ b/src/luan/modules/HtmlLuan.java Thu Jul 14 22:14:21 2022 -0600 @@ -2,13 +2,9 @@ import java.util.List; import java.util.ArrayList; -import java.util.Arrays; import java.util.Set; import java.util.HashSet; -import java.util.Map; -import java.util.regex.Pattern; -import java.util.regex.Matcher; -import luan.Luan; +import goodjava.html.Html; import luan.LuanTable; import luan.LuanException; @@ -17,67 +13,63 @@ public static String encode(String s) throws LuanException { Utils.checkNotNull(s); - final char[] a = s.toCharArray(); - StringBuilder buf = new StringBuilder(); - for( char c : a ) { - switch(c) { - case '&': - buf.append("&"); - break; - case '<': - buf.append("<"); - break; - case '>': - buf.append(">"); - break; - case '"': - buf.append("""); - break; - default: - buf.append(c); - } - } - return buf.toString(); + return Html.encode(s); } - private static final Pattern entityPtn = Pattern.compile( - "&#(\\d+);" - ); - public static String decode(String s) { - StringBuffer buf = new StringBuffer(); - Matcher m = entityPtn.matcher(s); - while( m.find() ) { - String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))}); - m.appendReplacement(buf,entity); - } - m.appendTail(buf); - s = buf.toString(); - s = s.replace(" "," "); - s = s.replace(""","\""); - s = s.replace(">",">"); - s = s.replace("<","<"); - s = s.replace("&","&"); - return s; + return Html.decode(s); } - public static String quote(String s) { - StringBuilder buf = new StringBuilder(); - buf.append('"'); - int i = 0; - while(true) { - int i2 = s.indexOf('"',i); - if( i2 == -1 ) { - buf.append(s.substring(i)); - break; - } else { - buf.append(s.substring(i,i2)); - buf.append("""); - i = i2 + 1; + private static LuanTable tag(Html.Tag tag) throws LuanException { + LuanTable tbl = new LuanTable(); + tbl.rawPut("type","tag"); + tbl.rawPut("name",tag.name); + tbl.rawPut("attributes",new LuanTable(tag.attributes)); + tbl.rawPut("is_empty",tag.isEmpty); + tbl.rawPut("raw",tag.raw); + return tbl; + } + public static LuanTable parse(String text,LuanTable containerTagsTbl) { + try { + Set<String> containerTags = new HashSet(); + for( Object s : containerTagsTbl.asList() ) { + containerTags.add((String)s); } + List list = Html.parse(text,containerTags); + List rtn = new ArrayList(); + for( Object el : list ) { + if( el instanceof String ) { + rtn.add(el); + } else if( el instanceof Html.Tag ) { + Html.Tag tag = (Html.Tag)el; + rtn.add(tag(tag)); + } else if( el instanceof Html.Comment ) { + Html.Comment comment = (Html.Comment)el; + LuanTable tbl = new LuanTable(); + tbl.rawPut("type","comment"); + tbl.rawPut("text",comment.text); + rtn.add(tbl); + } else if( el instanceof Html.CData ) { + Html.CData cdata = (Html.CData)el; + LuanTable tbl = new LuanTable(); + tbl.rawPut("type","cdata"); + tbl.rawPut("text",cdata.text); + rtn.add(tbl); + } else if( el instanceof Html.Container ) { + Html.Container container = (Html.Container)el; + LuanTable tbl = new LuanTable(); + tbl.rawPut("type","container"); + tbl.rawPut("tag",tag(container.tag)); + tbl.rawPut("text",container.text); + rtn.add(tbl); + } else { + throw new RuntimeException("invalid el "+el); + } + } + return new LuanTable(rtn); + } catch(LuanException e) { + throw new RuntimeException(e); } - buf.append('"'); - return buf.toString(); } }
--- a/src/luan/modules/Parsers.luan Wed Jul 13 21:50:41 2022 -0600 +++ b/src/luan/modules/Parsers.luan Thu Jul 14 22:14:21 2022 -0600 @@ -1,4 +1,5 @@ require "java" +local BBCode = require "java:goodjava.bbcode.BBCode" local BBCodeLuan = require "java:luan.modules.parsers.BBCodeLuan" local Csv = require "java:luan.modules.parsers.Csv" local Theme = require "java:luan.modules.parsers.Theme" @@ -9,6 +10,7 @@ local Parsers = {} +Parsers.bbcode_encode = BBCode.encode Parsers.bbcode_parse = BBCodeLuan.parse Parsers.csv_to_list = Csv.toList Parsers.json_string = BasicLuan.json_string