changeset 1341:a015a0b5c388

add Html.decode(), Lucene.count_tokens(), lucene boosts, Sql.database.set()
author Franklin Schmidt <fschmidt@gmail.com>
date Tue, 19 Feb 2019 08:14:40 -0700 (2019-02-19)
parents b3c4fcf29a53
children 60599adc27b8
files src/luan/modules/Html.luan src/luan/modules/HtmlLuan.java src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/LuceneIndex.java src/luan/modules/sql/Database.java src/luan/modules/sql/Sql.luan
diffstat 6 files changed, 105 insertions(+), 292 deletions(-) [+]
line wrap: on
line diff
--- a/src/luan/modules/Html.luan	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/Html.luan	Tue Feb 19 08:14:40 2019 -0700
@@ -14,6 +14,7 @@
 
 local Html = {}
 
+Html.decode = HtmlLuan.decode
 Html.encode = HtmlLuan.encode
 Html.javascript_encode = JsonToString.javascriptEncode
 
--- a/src/luan/modules/HtmlLuan.java	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/HtmlLuan.java	Tue Feb 19 08:14:40 2019 -0700
@@ -6,6 +6,8 @@
 import java.util.Set;
 import java.util.HashSet;
 import java.util.Map;
+import java.util.regex.Pattern;
+import java.util.regex.Matcher;
 import luan.Luan;
 import luan.LuanTable;
 import luan.LuanException;
@@ -39,292 +41,27 @@
 		return buf.toString();
 	}
 
-/*
-//	public static final String TEXTAREA = "textarea";
-	public static final String SCRIPT = "script";
-	public static final String STYLE = "style";
+	private static final Pattern entityPtn = Pattern.compile(
+		"&#(\\d+);"
+	);
 
-	public static Set<String> containerTags = new HashSet<String>(Arrays.asList(SCRIPT,STYLE));
-*/
-/*
-	public static LuanTable parse(Luan luan,String text,LuanTable containerTagsTbl)
-		throws LuanException
-	{
-		Utils.checkNotNull(luan,text);
-		Utils.checkNotNull(luan,containerTagsTbl);
-		Set<String> containerTags = new HashSet<String>();
-		for( Object v : containerTagsTbl.asList() ) {
-			containerTags.add((String)v);
+	public static String decode(String s) {
+		StringBuffer buf = new StringBuffer();
+		Matcher m = entityPtn.matcher(s);
+		while( m.find() ) {
+			String entity = new String(new char[]{(char)Integer.parseInt(m.group(1))});
+			m.appendReplacement(buf,entity);
 		}
-		List<Object> html = new ArrayList<Object>();
-		int len = text.length();
-		int i = 0;
-outer:
-		while( i < len ) {
-			int i2 = text.indexOf('<',i);
-			while( i2 != -1 && i2+1 < len ) {
-				char c = text.charAt(i2+1);
-				if( Character.isLetter(c) || c=='/' || c=='!' )
-					break;
-				i2 = text.indexOf('<',i2+1);
-			}
-			if( i2 == -1 ) {
-				html.add( text.substring(i) );
-				break;
-			}
-			if( i < i2 )
-				html.add( text.substring(i,i2) );
-			if( text.startsWith("<!--",i2) ) {
-				i = text.indexOf("-->",i2+4);
-				if( i == -1 ) {
-					html.add( text.substring(i2) );
-					break;
-				}
-				html.add( comment( text.substring(i2+4,i) ) );
-				i += 3;
-			} else if( text.startsWith("<![CDATA[",i2) ) {
-				i = text.indexOf("]]>",i2+9);
-				if( i == -1 ) {
-					html.add( text.substring(i2) );
-					break;
-				}
-				html.add( cdata( text.substring(i2+9,i) ) );
-				i += 3;
-			} else {
-				i = text.indexOf('>',i2);
-				if( i == -1 ) {
-					html.add( text.substring(i2) );
-					break;
-				}
-				String tagText = text.substring(i2+1,i);
-				try {
-					LuanTable tag = parseTag(tagText);
-					String tagName = (String)tag.rawGet("name");
-					if( containerTags.contains(tagName) ) {
-						i2 = i;
-						String endTagName = '/' + tagName;
-						while(true) {
-							i2 = text.indexOf('<',i2+1);
-							if( i2 == -1 )
-								break;
-							int i3 = text.indexOf('>',i2);
-							if( i3 == -1 )
-								break;
-							int j = i2+1;
-							while( j<i3 && !Character.isWhitespace(text.charAt(j)) )  j++;
-							String s = text.substring(i2+1,j);
-							if( s.equalsIgnoreCase(endTagName) ) {
-								String text2 = text.substring(i+1,i2);
-								LuanTable textContainer = textContainer(tag,text2);
-								html.add( textContainer );
-								i = i3 + 1;
-								continue outer;
-							}
-						}
-//						logger.warn("unclosed "+tagName);
-					}
-					i += 1;
-					html.add( tag );
-				} catch(BadTag e) {
-//					logger.debug("bad tag",e);
-					i += 1;
-//					if( !removeBadTags ) {
-						html.add( "&lt;" );
-						html.add( encode(luan,tagText) );
-						html.add( "&gt;" );
-//					}
-				}
-			}
-		}
-		return new LuanTable(html);
-	}
-
-	static LuanTable comment(String text) {
-		LuanTable tbl = new LuanTable();
-		tbl.rawPut("type","comment");
-		tbl.rawPut("text",text);
-		return tbl;
-	}
-
-	static LuanTable cdata(String text) {
-		LuanTable tbl = new LuanTable();
-		tbl.rawPut("type","cdata");
-		tbl.rawPut("text",text);
-		return tbl;
-	}
-
-	static LuanTable textContainer(LuanTable tag,String text) {
-		LuanTable tbl = new LuanTable();
-		tbl.rawPut("type","container");
-		tbl.rawPut("tag",tag);
-		tbl.rawPut("text",text);
-		return tbl;
-	}
-
-
-
-	static final class BadTag extends RuntimeException {
-		private BadTag(String msg) {
-			super(msg);
-		}
+		m.appendTail(buf);
+		s = buf.toString();
+		s = s.replace("&nbsp;"," ");
+		s = s.replace("&quot;","\"");
+		s = s.replace("&gt;",">");
+		s = s.replace("&lt;","<");
+		s = s.replace("&amp;","&");
+		return s;
 	}
 
-	static LuanTable parseTag(String text) {
-		LuanTable tbl = new LuanTable();
-		tbl.rawPut("type","tag");
-		if( text.endsWith("/") ) {
-			text = text.substring(0,text.length()-1);
-			tbl.rawPut("is_empty",true);
-		} else {
-			tbl.rawPut("is_empty",false);
-		}
-		int len = text.length();
-		int i = 0;
-		int i2 = i;
-		if( i2<len && text.charAt(i2)=='/' )
-			i2++;
-		while( i2<len ) {
-			char c = text.charAt(i2);
-			if( Character.isWhitespace(c) )
-				break;
-			if( !( Character.isLetterOrDigit(c) || c=='_' || c=='.' || c=='-' || c==':' ) )
-				throw new BadTag("invalid tag name for <"+text+">");
-			i2++;
-		}
-		String name = text.substring(i,i2).toLowerCase();
-		tbl.rawPut("name",name);
-		LuanTable attributes = new LuanTable();
-		tbl.rawPut("attributes",attributes);
-		i = i2;
-		while( i<len && Character.isWhitespace(text.charAt(i)) )  i++;
-		while( i<len ) {
-			i2 = toEndName(text,i,len);
-			String attrName = unquote(text.substring(i,i2).toLowerCase());
-			if( attributes.rawGet(attrName) != null )
-				throw new BadTag("duplicate attribute: "+attrName);
-			i = i2;
-			while( i<len && Character.isWhitespace(text.charAt(i)) )  i++;
-			if( i<len && text.charAt(i) == '=' ) {
-				i++;
-				i2 = i;
-				while( i<len && Character.isWhitespace(text.charAt(i)) )  i++;
-				i2 = toEndValue(text,i,len);
-				String attrValue = text.substring(i,i2);
-				if( attrValue.indexOf('<') != -1 || attrValue.indexOf('>') != -1 )
-					throw new BadTag("invalid attribute value: "+attrValue);
-				attrValue = unquote(attrValue);
-				attributes.rawPut(attrName,attrValue);
-				i = i2;
-				while( i<len && Character.isWhitespace(text.charAt(i)) )  i++;
-			} else {
-				attributes.rawPut(attrName,true);
-			}
-		}
-		return tbl;
-	}
-
-	private static int toEndName(String text,int i,int len) {
-		if( i==len )
-			return i;
-		char c = text.charAt(i);
-		switch(c) {
-		case '"':
-		case '\'':
-			i = text.indexOf(c,i+1);
-			return i==-1 ? len : i+1;
-		default:
-			if( Character.isWhitespace(c) ) {
-				throw new RuntimeException("text="+text+" i="+i);
-			}
-			do {
-				i++;
-			} while( i<len && (c=text.charAt(i))!='=' && !Character.isWhitespace(c) );
-			return i;
-		}
-	}
-
-	private static int toEndValue(String text,int i,int len) {
-		if( i==len )
-			return i;
-		char c = text.charAt(i);
-		switch(c) {
-		case '"':
-		case '\'':
-			i = text.indexOf(c,i+1);
-			return i==-1 ? len : i+1;
-		default:
-			if( Character.isWhitespace(c) ) {
-				throw new RuntimeException("text="+text+" i="+i);
-			}
-			do {
-				i++;
-			} while( i<len && !Character.isWhitespace(text.charAt(i)) );
-			return i;
-		}
-	}
-
-	public static String unquote(String s) {
-		if( s==null || s.length()<=1 )
-			return s;
-		char c = s.charAt(0);
-		return (c=='"' || c=='\'') && s.charAt(s.length()-1)==c
-			? s.substring(1,s.length()-1) : s;
-	}
-*/
-
-
-/*
-	public static String to_string(Luan luan,LuanTable tbl) throws LuanException {
-		List<Object> html = tbl.asList();
-		StringBuilder buf = new StringBuilder();
-		for( Object o : html ) {
-			if( o instanceof String ) {
-				buf.append( o );
-			} else if( o instanceof LuanTable ) {
-				LuanTable t = (LuanTable)o;
-				String type = (String)t.get(luan,"type");
-				if( type==null )
-					throw new LuanException(luan, "no type in element of table for 'Html.to_string'" );
-				if( type.equals("comment") ) {
-					buf.append( "<!--" ).append( t.get(luan,"text") ).append( "-->" );
-				} else if( type.equals("cdata") ) {
-					buf.append( "<![CDATA[" ).append( t.get(luan,"text") ).append( "]]" );
-				} else if( type.equals("tag") ) {
-					buf.append( tagToString(luan,t) );
-				} else if( type.equals("container") ) {
-					LuanTable tag  = (LuanTable)t.get(luan,"tag");
-					buf.append( tagToString(luan,tag) );
-					buf.append( t.get(luan,"text") );
-					buf.append( "</" ).append( tag.get(luan,"name") ).append( ">" );
-				} else {
-					throw new LuanException(luan, "invalid element type for 'Html.to_string'" );
-				}
-			} else 
-				throw new LuanException(luan, "invalid value ("+Luan.type(o)+") in table for 'Html.to_string'" );
-		}
-		return buf.toString();
-	}
-
-	private static String tagToString(Luan luan,LuanTable tbl) throws LuanException {
-		StringBuilder buf = new StringBuilder();
-		buf.append('<');
-		buf.append(tbl.get(luan,"name"));
-		LuanTable attributes = (LuanTable)tbl.get(luan,"attributes");
-		for( Map.Entry<Object,Object> attr : attributes.iterable(luan) ) {
-			buf.append( ' ' );
-			buf.append( attr.getKey() );
-			Object val = attr.getValue();
-			if( !val.equals(Boolean.TRUE) ) {
-				buf.append( '=' );
-				buf.append( quote((String)val) );
-			}
-		}
-		if( tbl.get(luan,"is_empty").equals(Boolean.TRUE) )
-			buf.append('/');
-		buf.append('>');
-		return buf.toString();
-	}
-*/
 	public static String quote(String s) {
 		StringBuilder buf = new StringBuilder();
 		buf.append('"');
--- a/src/luan/modules/lucene/Lucene.luan	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/lucene/Lucene.luan	Tue Feb 19 08:14:40 2019 -0700
@@ -64,6 +64,7 @@
 	index.next_id = java_index.nextId
 	index.highlighter = java_index.highlighter
 	index.indexed_only_field = java_index.indexed_only_field
+	index.count_tokens = java_index.count_tokens
 
 	Lucene.instances[index] = true
 
--- a/src/luan/modules/lucene/LuceneIndex.java	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/lucene/LuceneIndex.java	Tue Feb 19 08:14:40 2019 -0700
@@ -20,6 +20,7 @@
 import java.util.zip.ZipOutputStream;
 import java.util.zip.ZipEntry;
 import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.KeywordAnalyzer;
 import org.apache.lucene.document.Document;
 import org.apache.lucene.document.Field;
@@ -166,7 +167,9 @@
 		return new Term(key,br);
 	}
 
-	public void delete(Luan luan,String queryStr) throws LuanException, IOException, ParseException {
+	public void delete(Luan luan,String queryStr)
+		throws LuanException, IOException, ParseException
+	{
 		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
 
 		boolean commit = !writeLock.isHeldByCurrentThread();
@@ -187,7 +190,9 @@
 		map.put(field,fn);
 	}
 
-	public void save(Luan luan,LuanTable doc) throws LuanException, IOException {
+	public void save(Luan luan,LuanTable doc,LuanTable boosts)
+		throws LuanException, IOException
+	{
 		Set indexedOnlySet = new HashSet();
 		Object typeObj = doc.get("type");
 		if( typeObj==null )
@@ -219,9 +224,9 @@
 			if( id == null ) {
 				id = nextId(luan);
 				doc.put("id",id);
-				writer.addDocument(toLucene(doc,indexedOnlySet));
+				writer.addDocument(toLucene(doc,indexedOnlySet,boosts));
 			} else {
-				writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet) );
+				writer.updateDocument( term("id",id), toLucene(doc,indexedOnlySet,boosts) );
 			}
 			if(commit) writer.commit();
 		} finally {
@@ -283,7 +288,7 @@
 			LuanTable doc = new LuanTable(luan);
 			doc.rawPut( "type", "next_id" );
 			doc.rawPut( FLD_NEXT_ID, idLim );
-			writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET));
+			writer.updateDocument(new Term("type","next_id"),toLucene(doc,Collections.EMPTY_SET,null));
 			wrote();
 		}
 		return id;
@@ -403,7 +408,9 @@
 		close(openSearcher());
 	}
 
-	public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr ) throws LuanException, IOException, ParseException {
+	public int advanced_search( final Luan luan, String queryStr, LuanFunction fn, Integer n, String sortStr )
+		throws LuanException, IOException, ParseException
+	{
 		Utils.checkNotNull(queryStr);
 		Query query = SaneQueryParser.parseQuery(mfp,queryStr);
 		IndexSearcher searcher = threadLocalSearcher.get();
@@ -479,7 +486,16 @@
 	}
 
 
-	private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed)
+	private IndexableField newField(String name,Object value,Field.Store store,Set<String> indexed,Float boost)
+		throws LuanException
+	{
+		IndexableField fld = newField2(name,value,store,indexed);
+		if( boost != null )
+			((Field)fld).setBoost(boost);
+		return fld;
+	}
+
+	private IndexableField newField2(String name,Object value,Field.Store store,Set<String> indexed)
 		throws LuanException
 	{
 		if( value instanceof String ) {
@@ -522,7 +538,7 @@
 			throw new LuanException("invalid value type "+value.getClass()+"' for '"+name+"'");
 	}
 
-	private Document toLucene(LuanTable table,Set indexOnly) throws LuanException {
+	private Document toLucene(LuanTable table,Set indexOnly,LuanTable boosts) throws LuanException {
 		Set<String> indexed = mfp.fields.keySet();
 		Document doc = new Document();
 		for( Map.Entry<Object,Object> entry : table.iterable() ) {
@@ -532,12 +548,21 @@
 			String name = (String)key;
 			Object value = entry.getValue();
 			Field.Store store = indexOnly.contains(name) ? Field.Store.NO : Field.Store.YES;
+			Float boost = null;
+			if( boosts != null ) {
+				Object obj = boosts.get(name);
+				if( obj != null ) {
+					if( !(obj instanceof Number) )
+						throw new LuanException("boost '"+name+"' must be number");
+					boost = ((Number)obj).floatValue();
+				}
+			}
 			if( !(value instanceof LuanTable) ) {
-				doc.add(newField(name, value, store, indexed));
+				doc.add(newField( name, value, store, indexed, boost ));
 			} else { // list
 				LuanTable list = (LuanTable)value;
 				for( Object el : list.asList() ) {
-					doc.add(newField(name, el, store, indexed));
+					doc.add(newField( name, el, store, indexed, boost ));
 				}
 			}
 		}
@@ -642,4 +667,18 @@
 			}
 		};
 	}
+
+	public int count_tokens(String text)
+		throws IOException
+	{
+		int n = 0;
+		TokenStream ts = analyzer.tokenStream(null,text);
+		ts.reset();
+		while( ts.incrementToken() ) {
+			n++;
+		}
+		ts.close();
+		return n;
+	}
+
 }
--- a/src/luan/modules/sql/Database.java	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/sql/Database.java	Tue Feb 19 08:14:40 2019 -0700
@@ -11,6 +11,7 @@
 import java.util.Properties;
 import luan.lib.logging.Logger;
 import luan.lib.logging.LoggerFactory;
+import luan.Luan;
 import luan.LuanTable;
 import luan.LuanException;
 
@@ -20,6 +21,7 @@
 
 	public final Connection con;
 	private final Map<String,PreparedStatement> pstmts = new HashMap<String,PreparedStatement>();
+	private int fetchSize = 0;
 
 	public Database(Connection con) {
 		this.con = con;
@@ -35,6 +37,30 @@
 		Properties props = new Properties();
 		props.putAll(spec);
 		this.con = DriverManager.getConnection(url,props);
+		set(spec);
+	}
+
+	public void set(LuanTable options) throws LuanException, SQLException {
+		set(options.asMap());
+	}
+
+	private void set(Map<Object,Object> options) throws LuanException, SQLException {
+		Object obj;
+		obj = options.remove("auto_commit");
+		if( obj != null ) {
+			if( !(obj instanceof Boolean) )
+				throw new LuanException( "parameter 'auto_commit' must be a boolean" );
+			con.setAutoCommit((Boolean)obj);
+		}
+		obj = options.remove("fetch_size");
+		if( obj != null ) {
+			Integer n = Luan.asInteger(obj);
+			if( n == null )
+				throw new LuanException( "parameter 'fetch_size' must be an integer" );
+			fetchSize = n;
+		}
+		if( !options.isEmpty() )
+			throw new LuanException( "unrecognized parameters: "+options );
 	}
 
 	private static String getString(Map spec,String key) throws LuanException {
@@ -46,10 +72,16 @@
 		return (String)val;
 	}
 
+	private void fix(Statement stmt) throws SQLException {
+		if( fetchSize > 0 )
+			stmt.setFetchSize(fetchSize);
+	}
+
 	private PreparedStatement prepareStatement(String sql,Object[] args) throws SQLException {
 		PreparedStatement pstmt = pstmts.get(sql);
 		if( pstmt==null ) {
 			pstmt = con.prepareStatement(sql);
+			fix(pstmt);
 			pstmts.put(sql,pstmt);
 		}
 		for( int i=0; i<args.length; i++ ) {
@@ -61,6 +93,7 @@
 	public ResultSet query(String sql,Object... args) throws SQLException {
 		if( args.length == 0 ) {
 			Statement stmt = con.createStatement();
+			fix(stmt);
 			return stmt.executeQuery(sql);
 		} else {
 			PreparedStatement pstmt = prepareStatement(sql,args);
@@ -71,6 +104,7 @@
 	public int update(String sql,Object... args) throws SQLException {
 		if( args.length == 0 ) {
 			Statement stmt = con.createStatement();
+			fix(stmt);
 			int n = stmt.executeUpdate(sql);
 			stmt.close();
 			return n;
--- a/src/luan/modules/sql/Sql.luan	Mon Feb 18 05:11:50 2019 -0700
+++ b/src/luan/modules/sql/Sql.luan	Tue Feb 19 08:14:40 2019 -0700
@@ -28,6 +28,7 @@
 	database.close = java_database.con.close
 	database.is_closed = java_database.con.isClosed
 	database.update = java_database.update
+	database.set = java_database.set
 
 	function database.query(sql,...)
 		local rs = java_database.query(sql,...)