Mercurial Hosting > luan
changeset 585:bb3818249dfb
add Parsers
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Fri, 14 Aug 2015 06:35:20 -0600 |
parents | 0742ac78fa69 |
children | a140be489a72 |
files | core/src/luan/modules/Parsers.luan core/src/luan/modules/parsers/BBCode.java core/src/luan/modules/parsers/Csv.java core/src/luan/modules/parsers/ParseException.java core/src/luan/modules/parsers/Parser.java |
diffstat | 5 files changed, 577 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/Parsers.luan --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/src/luan/modules/Parsers.luan Fri Aug 14 06:35:20 2015 -0600 @@ -0,0 +1,11 @@ +java() +local BBCode = require "java:luan.modules.parsers.BBCode" +local Csv = require "java:luan.modules.parsers.Csv" + +local M = {} + +M.bbcode_to_html = BBCode.toHtml +M.bbcode_to_text = BBCode.toText +M.csv_to_list = Csv.toList + +return M
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/BBCode.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/src/luan/modules/parsers/BBCode.java Fri Aug 14 06:35:20 2015 -0600 @@ -0,0 +1,298 @@ +package luan.modules.parsers; + + +public final class BBCode { + + public static String toHtml(String bbcode) { + return new BBCode(bbcode,true).parse(); + } + + public static String toText(String bbcode) { + return new BBCode(bbcode,false).parse(); + } + + private final Parser parser; + private final boolean toHtml; + + private BBCode(String text,boolean toHtml) { + this.parser = new Parser(text); + this.toHtml = toHtml; + } + + private String parse() { + StringBuilder sb = new StringBuilder(); + while( !parser.endOfInput() ) { + String block = parseBlock(); + if( block != null ) + sb.append(block); + else { + sb.append( parser.currentChar() ); + parser.anyChar(); + } + } + return sb.toString(); + } + + private String parseWellFormed() { + StringBuilder sb = new StringBuilder(); + while( !parser.endOfInput() ) { + String block = parseBlock(); + if( block != null ) { + sb.append(block); + continue; + } + if( couldBeTag() ) + break; + sb.append( parser.currentChar() ); + parser.anyChar(); + } + return sb.toString(); + } + + private boolean couldBeTag() { + if( parser.currentChar() != '[' ) + return false; + return parser.testIgnoreCase("[b]") + || parser.testIgnoreCase("[/b]") + || parser.testIgnoreCase("[i]") + || parser.testIgnoreCase("[/i]") + || parser.testIgnoreCase("[u]") + || parser.testIgnoreCase("[/u]") + || parser.testIgnoreCase("[url]") + || parser.testIgnoreCase("[url=") + || parser.testIgnoreCase("[/url]") + || parser.testIgnoreCase("[code]") + || parser.testIgnoreCase("[/code]") + || parser.testIgnoreCase("[img]") + || parser.testIgnoreCase("[/img]") + || parser.testIgnoreCase("[color=") + || parser.testIgnoreCase("[/color]") + || parser.testIgnoreCase("[size=") + || parser.testIgnoreCase("[/size]") + || parser.testIgnoreCase("[youtube]") + || parser.testIgnoreCase("[/youtube]") + || parser.testIgnoreCase("[quote]") + || parser.testIgnoreCase("[quote=") + || parser.testIgnoreCase("[/quote]") + ; + } + + private String parseBlock() { + if( parser.currentChar() != '[' ) + return null; + String s; + s = parseB(); if(s!=null) return s; + s = parseI(); if(s!=null) return s; + s = parseU(); if(s!=null) return s; + s = parseUrl1(); if(s!=null) return s; + s = parseUrl2(); if(s!=null) return s; + s = parseCode(); if(s!=null) return s; + s = parseImg(); if(s!=null) return s; + s = parseColor(); if(s!=null) return s; + s = parseSize(); if(s!=null) return s; + s = parseYouTube(); if(s!=null) return s; + s = parseQuote1(); if(s!=null) return s; + s = parseQuote2(); if(s!=null) return s; + return null; + } + + private String parseB() { + parser.begin(); + if( !parser.matchIgnoreCase("[b]") ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/b]") ) + return parser.failure(null); + String rtn = toHtml ? "<b>"+content+"</b>" : content; + return parser.success(rtn); + } + + private String parseI() { + parser.begin(); + if( !parser.matchIgnoreCase("[i]") ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/i]") ) + return parser.failure(null); + String rtn = toHtml ? "<i>"+content+"</i>" : content; + return parser.success(rtn); + } + + private String parseU() { + parser.begin(); + if( !parser.matchIgnoreCase("[u]") ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/u]") ) + return parser.failure(null); + String rtn = toHtml ? "<u>"+content+"</u>" : content; + return parser.success(rtn); + } + + private String parseUrl1() { + parser.begin(); + if( !parser.matchIgnoreCase("[url]") ) + return parser.failure(null); + String url = parseRealUrl(); + if( !parser.matchIgnoreCase("[/url]") ) + return parser.failure(null); + String rtn = toHtml ? "<a href='"+url+"'>"+url+"</u>" : url; + return parser.success(rtn); + } + + private String parseUrl2() { + parser.begin(); + if( !parser.matchIgnoreCase("[url=") ) + return parser.failure(null); + String url = parseRealUrl(); + if( !parser.match(']') ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/url]") ) + return parser.failure(null); + String rtn = toHtml ? "<a href='"+url+"'>"+content+"</u>" : content; + return parser.success(rtn); + } + + private String parseRealUrl() { + parser.begin(); + while( parser.match(' ') ); + int start = parser.currentIndex(); + if( !parser.matchIgnoreCase("http") ) + return parser.failure(null); + parser.matchIgnoreCase("s"); + if( !parser.matchIgnoreCase("://") ) + return parser.failure(null); + while( parser.noneOf(" []'") ); + String url = parser.textFrom(start); + while( parser.match(' ') ); + return parser.success(url); + } + + private String parseCode() { + parser.begin(); + if( !parser.matchIgnoreCase("[code]") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( !parser.testIgnoreCase("[/code]") ) { + if( !parser.anyChar() ) + return parser.failure(null); + } + String content = parser.textFrom(start); + if( !parser.matchIgnoreCase("[/code]") ) throw new RuntimeException(); + String rtn = toHtml ? "<code>"+content+"</code>" : content; + return parser.success(rtn); + } + + private String parseImg() { + parser.begin(); + if( !parser.matchIgnoreCase("[img]") ) + return parser.failure(null); + String url = parseRealUrl(); + if( !parser.matchIgnoreCase("[/img]") ) + return parser.failure(null); + String rtn = toHtml ? "<img src='"+url+"'>" : ""; + return parser.success(rtn); + } + + private String parseColor() { + parser.begin(); + if( !parser.matchIgnoreCase("[color=") ) + return parser.failure(null); + int start = parser.currentIndex(); + parser.match('#'); + while( parser.inCharRange('0','9') + || parser.inCharRange('a','z') + || parser.inCharRange('A','Z') + ); + String color = parser.textFrom(start); + if( !parser.match(']') ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/color]") ) + return parser.failure(null); + String rtn = toHtml ? "<span style='color: "+color+"'>"+content+"</span>" : content; + return parser.success(rtn); + } + + private String parseSize() { + parser.begin(); + if( !parser.matchIgnoreCase("[size=") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( parser.match('.') || parser.inCharRange('0','9') ); + String size = parser.textFrom(start); + if( !parser.match(']') ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/size]") ) + return parser.failure(null); + String rtn = toHtml ? "<span style='font-size: "+size+"em'>"+content+"</span>" : content; + return parser.success(rtn); + } + + private String parseYouTube() { + parser.begin(); + if( !parser.matchIgnoreCase("[youtube]") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( parser.inCharRange('0','9') + || parser.inCharRange('a','z') + || parser.inCharRange('A','Z') + || parser.match('-') + || parser.match('_') + ); + String id = parser.textFrom(start); + if( id.length()==0 || !parser.matchIgnoreCase("[/youtube]") ) + return parser.failure(null); + String rtn = toHtml ? "<iframe width='420' height='315' src='https://www.youtube.com/embed/"+id+"' frameborder='0' allowfullscreen></iframe>" : ""; + return parser.success(rtn); + } + + private String parseQuote1() { + parser.begin(); + if( !parser.matchIgnoreCase("[quote]") ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/quote]") ) + return parser.failure(null); + String rtn = toHtml ? "<blockquote>"+content+"</blockquote>" : ""; + return parser.success(rtn); + } + + private String parseQuote2() { + parser.begin(); + if( !parser.matchIgnoreCase("[quote=") ) + return parser.failure(null); + int start = parser.currentIndex(); + while( parser.noneOf("[];") ); + String name = parser.textFrom(start).trim(); + if( name.length() == 0 ) + return parser.failure(null); + String src = null; + if( parser.match(';') ) { + start = parser.currentIndex(); + while( parser.noneOf("[]'") ); + src = parser.textFrom(start).trim(); + if( src.length() == 0 ) + return parser.failure(null); + } + if( !parser.match(']') ) + return parser.failure(null); + String content = parseWellFormed(); + if( !parser.matchIgnoreCase("[/quote]") ) + return parser.failure(null); + if( !toHtml ) + return parser.success(""); + StringBuilder sb = new StringBuilder(); + sb.append( "<blockquote><div quoted>" ); + if( src != null ) + sb.append( "<a href='/thread?id=" ).append( src ).append( "'>" ); + sb.append( name ).append( " wrote" ); + if( src != null ) + sb.append( "</a>" ); + sb.append( ":</div>" ).append( content ).append( "</blockquote>" ); + return parser.success(sb.toString()); + } + +}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/Csv.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/src/luan/modules/parsers/Csv.java Fri Aug 14 06:35:20 2015 -0600 @@ -0,0 +1,61 @@ +package luan.modules.parsers; + +import luan.LuanTable; + + +public final class Csv { + + public static LuanTable toList(String line) throws ParseException { + return new Csv(line).parse(); + } + + private final Parser parser; + + private Csv(String line) { + this.parser = new Parser(line); + } + + private ParseException exception(String msg) { + return new ParseException(parser,msg); + } + + private LuanTable parse() throws ParseException { + LuanTable list = new LuanTable(); + while(true) { + Spaces(); + String field = parseField(); + list.rawPut(list.rawLength()+1,field); + Spaces(); + if( parser.endOfInput() ) + return list; + if( !parser.match(',') ) + throw exception("unexpected char"); + } + } + + private String parseField() throws ParseException { + parser.begin(); + String rtn; + if( parser.match('"') ) { + int start = parser.currentIndex(); + do { + if( parser.endOfInput() ) { + parser.failure(); + throw exception("unclosed quote"); + } + } while( parser.noneOf("\"") ); + rtn = parser.textFrom(start); + parser.match('"'); + } else { + int start = parser.currentIndex(); + while( !parser.endOfInput() && parser.noneOf(",") ); + rtn = parser.textFrom(start).trim(); + } + return parser.success(rtn); + } + + private void Spaces() { + while( parser.anyOf(" \t") ); + } + +}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/ParseException.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/src/luan/modules/parsers/ParseException.java Fri Aug 14 06:35:20 2015 -0600 @@ -0,0 +1,51 @@ +package luan.modules.parsers; + + +public final class ParseException extends Exception { + public final String text; + public final int errorIndex; + public final int highIndex; + + ParseException(Parser parser,String msg) { + super(msg); + this.text = parser.text; + this.errorIndex = parser.currentIndex(); + this.highIndex = parser.highIndex(); + } + + private class Location { + final int line; + final int pos; + + Location(int index) { + int line = 0; + int i = -1; + while(true) { + int j = text.indexOf('\n',i+1); + if( j == -1 || j >= index ) + break; + i = j; + line++; + } + this.line = line; + this.pos = index - i - 1; + } + } + + private String[] lines() { + return text.split("\n",-1); + } + + @Override public String getMessage() { + Location loc = new Location(errorIndex); + String line = lines()[loc.line]; + String msg = super.getMessage() + " (line " + (loc.line+1) + ", pos " + (loc.pos+1) + ")\n"; + StringBuilder sb = new StringBuilder(msg); + sb.append( line + "\n" ); + for( int i=0; i<loc.pos; i++ ) { + sb.append( line.charAt(i)=='\t' ? '\t' : ' ' ); + } + sb.append("^\n"); + return sb.toString(); + } +}
diff -r 0742ac78fa69 -r bb3818249dfb core/src/luan/modules/parsers/Parser.java --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/core/src/luan/modules/parsers/Parser.java Fri Aug 14 06:35:20 2015 -0600 @@ -0,0 +1,156 @@ +package luan.modules.parsers; + + +public class Parser { + public final String text; + private final int len; + private int[] stack = new int[256]; + private int frame = 0; + private int iHigh; + + Parser(String text) { + this.text = text; + this.len = text.length(); + } + + private int i() { + return stack[frame]; + } + + private void i(int i) { + stack[frame] += i; + if( iHigh < stack[frame] ) + iHigh = stack[frame]; + } + + public int begin() { + frame++; + if( frame == stack.length ) { + int[] a = new int[2*frame]; + System.arraycopy(stack,0,a,0,frame); + stack = a; + } + stack[frame] = stack[frame-1]; + return i(); + } + + public void rollback() { + stack[frame] = stack[frame-1]; + } + + public <T> T success(T t) { + success(); + return t; + } + + public boolean success() { + frame--; + stack[frame] = stack[frame+1]; + return true; + } + + public <T> T failure(T t) { + failure(); + return t; + } + + public boolean failure() { + frame--; + return false; + } + + public int currentIndex() { + return i(); + } +/* + public int errorIndex() { + return frame > 0 ? stack[frame-1] : 0; + } +*/ + public int highIndex() { + return iHigh; + } + + public char lastChar() { + return text.charAt(i()-1); + } + + public char currentChar() { + return text.charAt(i()); + } + + public boolean endOfInput() { + return i() >= len; + } + + public boolean match(char c) { + if( endOfInput() || text.charAt(i()) != c ) + return false; + i(1); + return true; + } + + public boolean match(String s) { + int n = s.length(); + if( !text.regionMatches(i(),s,0,n) ) + return false; + i(n); + return true; + } + + public boolean matchIgnoreCase(String s) { + int n = s.length(); + if( !text.regionMatches(true,i(),s,0,n) ) + return false; + i(n); + return true; + } + + public boolean anyOf(String s) { + if( endOfInput() || s.indexOf(text.charAt(i())) == -1 ) + return false; + i(1); + return true; + } + + public boolean noneOf(String s) { + if( endOfInput() || s.indexOf(text.charAt(i())) != -1 ) + return false; + i(1); + return true; + } + + public boolean inCharRange(char cLow, char cHigh) { + if( endOfInput() ) + return false; + char c = text.charAt(i()); + if( !(cLow <= c && c <= cHigh) ) + return false; + i(1); + return true; + } + + public boolean anyChar() { + if( endOfInput() ) + return false; + i(1); + return true; + } + + public boolean test(char c) { + return !endOfInput() && text.charAt(i()) == c; + } + + public boolean test(String s) { + return text.regionMatches(i(),s,0,s.length()); + } + + public boolean testIgnoreCase(String s) { + return text.regionMatches(true,i(),s,0,s.length()); + } + + public String textFrom(int start) { + return text.substring(start,i()); + } + +}