Mercurial Hosting > luan
changeset 318:4fe6c9fed486
add html processing
git-svn-id: https://luan-java.googlecode.com/svn/trunk@319 21e917c8-12df-6dd8-5cb6-c86387c605b9
author | fschmidt@gmail.com <fschmidt@gmail.com@21e917c8-12df-6dd8-5cb6-c86387c605b9> |
---|---|
date | Thu, 05 Feb 2015 03:28:23 +0000 |
parents | c730ff1e4bae |
children | f6db49c294a7 |
files | core/src/luan/Luan.java core/src/luan/modules/Html.luan core/src/luan/modules/HtmlLuan.java |
diffstat | 3 files changed, 348 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/core/src/luan/Luan.java Tue Dec 30 15:33:46 2014 +0000 +++ b/core/src/luan/Luan.java Thu Feb 05 03:28:23 2015 +0000 @@ -1,5 +1,6 @@ package luan; +import java.util.List; import luan.modules.BasicLuan; @@ -152,6 +153,10 @@ return new LuanTableImpl(); } + public static LuanTable newTable(List<Object> list) { + return new LuanTableImpl(list); + } + public static LuanTable newPropertyTable() { return new LuanPropertyTable(); }
--- a/core/src/luan/modules/Html.luan Tue Dec 30 15:33:46 2014 +0000 +++ b/core/src/luan/modules/Html.luan Thu Feb 05 03:28:23 2015 +0000 @@ -2,3 +2,34 @@ import "java:luan.modules.HtmlLuan" encode = HtmlLuan.encode +parse = HtmlLuan.parse +_ENV.to_string = HtmlLuan.to_string + + + +-- extras + +function process_url_tags(html) + for i, v in ipairs(html) do + if type(v) == "table" and v.type == "tag" then + if v.name == "url" then + local url = v.attributes.url or html[i+1] + v.name = "a" + v.attributes.url = nil + v.attributes.href = url + elseif v.name == "/url" then + v.name = "/a" + end + end + end +end + +function add_nofollow(html) + for i, v in ipairs(html) do + if type(v) == "table" and v.type == "tag" then + if v.name == "a" then + v.attributes.rel = "nofollow" + end + end + end +end
--- a/core/src/luan/modules/HtmlLuan.java Tue Dec 30 15:33:46 2014 +0000 +++ b/core/src/luan/modules/HtmlLuan.java Thu Feb 05 03:28:23 2015 +0000 @@ -1,15 +1,21 @@ package luan.modules; +import java.util.List; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; +import java.util.HashSet; +import java.util.Map; import luan.Luan; import luan.LuanState; import luan.LuanTable; -import luan.LuanFunction; import luan.LuanException; public final class HtmlLuan { - public static String encode(String s) { + public static String encode(LuanState luan,String s) throws LuanException { + Utils.checkNotNull(luan,s); char[] a = s.toCharArray(); StringBuilder buf = new StringBuilder(); for( int i=0; i<a.length; i++ ) { @@ -34,4 +40,308 @@ return buf.toString(); } +/* +// public static final String TEXTAREA = "textarea"; + public static final String SCRIPT = "script"; + public static final String STYLE = "style"; + + public static Set<String> containerTags = new HashSet<String>(Arrays.asList(SCRIPT,STYLE)); +*/ + public static LuanTable parse(LuanState luan,String text,LuanTable containerTagsTbl) + throws LuanException + { + Utils.checkNotNull(luan,text); + Utils.checkNotNull(luan,containerTagsTbl); + Set<String> containerTags = new HashSet<String>(); + for( Object v : containerTagsTbl.asList() ) { + containerTags.add((String)v); + } + List<Object> html = new ArrayList<Object>(); + int len = text.length(); + int i = 0; +outer: + while( i < len ) { + int i2 = text.indexOf('<',i); + while( i2 != -1 && i2+1 < len ) { + char c = text.charAt(i2+1); + if( Character.isLetter(c) || c=='/' || c=='!' ) + break; + i2 = text.indexOf('<',i2+1); + } + if( i2 == -1 ) { + html.add( text.substring(i) ); + break; + } + if( i < i2 ) + html.add( text.substring(i,i2) ); + if( text.startsWith("<!--",i2) ) { + i = text.indexOf("-->",i2+4); + if( i == -1 ) { + html.add( text.substring(i2) ); + break; + } + html.add( comment( text.substring(i2+4,i) ) ); + i += 3; + } else if( text.startsWith("<![CDATA[",i2) ) { + i = text.indexOf("]]>",i2+9); + if( i == -1 ) { + html.add( text.substring(i2) ); + break; + } + html.add( cdata( text.substring(i2+9,i) ) ); + i += 3; + } else { + i = text.indexOf('>',i2); + if( i == -1 ) { + html.add( text.substring(i2) ); + break; + } + String tagText = text.substring(i2+1,i); + try { + LuanTable tag = parseTag(tagText); + String tagName = (String)tag.get("name"); + if( containerTags.contains(tagName) ) { + i2 = i; + String endTagName = '/' + tagName; + while(true) { + i2 = text.indexOf('<',i2+1); + if( i2 == -1 ) + break; + int i3 = text.indexOf('>',i2); + if( i3 == -1 ) + break; + int j = i2+1; + while( j<i3 && !Character.isWhitespace(text.charAt(j)) ) j++; + String s = text.substring(i2+1,j); + if( s.equalsIgnoreCase(endTagName) ) { + String text2 = text.substring(i+1,i2); + LuanTable textContainer = textContainer(tag,text2); + html.add( textContainer ); + i = i3 + 1; + continue outer; + } + } +// logger.warn("unclosed "+tagName); + } + i += 1; + html.add( tag ); + } catch(BadTag e) { +// logger.debug("bad tag",e); + i += 1; +// if( !removeBadTags ) { + html.add( "<" ); + html.add( encode(luan,tagText) ); + html.add( ">" ); +// } + } + } + } + return Luan.newTable(html); + } + + static LuanTable comment(String text) { + LuanTable tbl = Luan.newTable(); + tbl.put("type","comment"); + tbl.put("text",text); + return tbl; + } + + static LuanTable cdata(String text) { + LuanTable tbl = Luan.newTable(); + tbl.put("type","cdata"); + tbl.put("text",text); + return tbl; + } + + static LuanTable textContainer(LuanTable tag,String text) { + LuanTable tbl = Luan.newTable(); + tbl.put("type","container"); + tbl.put("tag",tag); + tbl.put("text",text); + return tbl; + } + + + + static final class BadTag extends RuntimeException { + private BadTag(String msg) { + super(msg); + } + } + + static LuanTable parseTag(String text) { + LuanTable tbl = Luan.newTable(); + tbl.put("type","tag"); + if( text.endsWith("/") ) { + text = text.substring(0,text.length()-1); + tbl.put("is_empty",true); + } else { + tbl.put("is_empty",false); + } + int len = text.length(); + int i = 0; + int i2 = i; + if( i2<len && text.charAt(i2)=='/' ) + i2++; + while( i2<len ) { + char c = text.charAt(i2); + if( Character.isWhitespace(c) ) + break; + if( !( Character.isLetterOrDigit(c) || c=='_' || c=='.' || c=='-' || c==':' ) ) + throw new BadTag("invalid tag name for <"+text+">"); + i2++; + } + String name = text.substring(i,i2).toLowerCase(); + tbl.put("name",name); + LuanTable attributes = Luan.newTable(); + tbl.put("attributes",attributes); + i = i2; + while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; + while( i<len ) { + i2 = toEndName(text,i,len); + String attrName = unquote(text.substring(i,i2).toLowerCase()); + if( attributes.get(attrName) != null ) + throw new BadTag("duplicate attribute: "+attrName); + i = i2; + while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; + if( i<len && text.charAt(i) == '=' ) { + i++; + i2 = i; + while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; + i2 = toEndValue(text,i,len); + String attrValue = text.substring(i,i2); + if( attrValue.indexOf('<') != -1 || attrValue.indexOf('>') != -1 ) + throw new BadTag("invalid attribute value: "+attrValue); + attrValue = unquote(attrValue); + attributes.put(attrName,attrValue); + i = i2; + while( i<len && Character.isWhitespace(text.charAt(i)) ) i++; + } else { + attributes.put(attrName,true); + } + } + return tbl; + } + + private static int toEndName(String text,int i,int len) { + if( i==len ) + return i; + char c = text.charAt(i); + switch(c) { + case '"': + case '\'': + i = text.indexOf(c,i+1); + return i==-1 ? len : i+1; + default: + if( Character.isWhitespace(c) ) { + throw new RuntimeException("text="+text+" i="+i); + } + do { + i++; + } while( i<len && (c=text.charAt(i))!='=' && !Character.isWhitespace(c) ); + return i; + } + } + + private static int toEndValue(String text,int i,int len) { + if( i==len ) + return i; + char c = text.charAt(i); + switch(c) { + case '"': + case '\'': + i = text.indexOf(c,i+1); + return i==-1 ? len : i+1; + default: + if( Character.isWhitespace(c) ) { + throw new RuntimeException("text="+text+" i="+i); + } + do { + i++; + } while( i<len && !Character.isWhitespace(text.charAt(i)) ); + return i; + } + } + + public static String unquote(String s) { + if( s==null || s.length()<=1 ) + return s; + char c = s.charAt(0); + return (c=='"' || c=='\'') && s.charAt(s.length()-1)==c + ? s.substring(1,s.length()-1) : s; + } + + + + + public static String to_string(LuanState luan,LuanTable tbl) throws LuanException { + List<Object> html = tbl.asList(); + StringBuilder buf = new StringBuilder(); + for( Object o : html ) { + if( o instanceof String ) { + buf.append( o ); + } else if( o instanceof LuanTable ) { + LuanTable t = (LuanTable)o; + String type = (String)t.get("type"); + if( type==null ) + throw luan.exception( "no type in element of table for 'Html.to_string'" ); + if( type.equals("comment") ) { + buf.append( "<!--" ).append( t.get("text") ).append( "-->" ); + } else if( type.equals("cdata") ) { + buf.append( "<![CDATA[" ).append( t.get("text") ).append( "]]" ); + } else if( type.equals("tag") ) { + buf.append( tagToString(t) ); + } else if( type.equals("container") ) { + LuanTable tag = (LuanTable)t.get("tag"); + buf.append( tagToString(tag) ); + buf.append( t.get("text") ); + buf.append( "</" ).append( tag.get("name") ).append( ">" ); + } else { + throw luan.exception( "invalid element type for 'Html.to_string'" ); + } + } else + throw luan.exception( "invalid value ("+Luan.type(o)+") in table for 'Html.to_string'" ); + } + return buf.toString(); + } + + private static String tagToString(LuanTable tbl) { + StringBuilder buf = new StringBuilder(); + buf.append('<'); + buf.append(tbl.get("name")); + LuanTable attributes = (LuanTable)tbl.get("attributes"); + for( Map.Entry<Object,Object> attr : attributes ) { + buf.append( ' ' ); + buf.append( attr.getKey() ); + Object val = attr.getValue(); + if( !val.equals(Boolean.TRUE) ) { + buf.append( '=' ); + buf.append( quote((String)val) ); + } + } + if( tbl.get("is_empty").equals(Boolean.TRUE) ) + buf.append('/'); + buf.append('>'); + return buf.toString(); + } + + public static String quote(String s) { + StringBuilder buf = new StringBuilder(); + buf.append('"'); + int i = 0; + while(true) { + int i2 = s.indexOf('"',i); + if( i2 == -1 ) { + buf.append(s.substring(i)); + break; + } else { + buf.append(s.substring(i,i2)); + buf.append("""); + i = i2 + 1; + } + } + buf.append('"'); + return buf.toString(); + } + }