changeset 1422:e48290f3d9fb

better quoting
author Franklin Schmidt <fschmidt@gmail.com>
date Sun, 17 Nov 2019 16:28:51 -0700
parents 8ab2f0fc3829
children 2c06a7ff4173
files conv.txt src/goodjava/queryparser/SaneQueryParser.java src/luan/host/main.luan src/luan/modules/String.luan src/luan/modules/lucene/Lucene.luan src/luan/modules/lucene/Versioning.luan website/src/manual.html
diffstat 7 files changed, 87 insertions(+), 30 deletions(-) [+]
line wrap: on
line diff
--- a/conv.txt	Sat Oct 26 22:28:46 2019 -0600
+++ b/conv.txt	Sun Nov 17 16:28:51 2019 -0700
@@ -1,3 +1,5 @@
+literal
+
 JsonToString
 
 Hosting.send_mail
--- a/src/goodjava/queryparser/SaneQueryParser.java	Sat Oct 26 22:28:46 2019 -0600
+++ b/src/goodjava/queryparser/SaneQueryParser.java	Sun Nov 17 16:28:51 2019 -0700
@@ -19,10 +19,15 @@
 		return new SaneQueryParser(fieldParser,query).parseQuery();
 	}
 
-	private static Pattern specialChar = Pattern.compile("[ \\t\\r\\n\":\\[\\]{}^+\\-(),?*\\\\]");
-
-	public static String literal(String s) {
-		return specialChar.matcher(s).replaceAll("\\\\$0");
+	public static String quote(String s) {
+		s = s.replace("\\","\\\\");
+		s = s.replace("\b","\\b");
+		s = s.replace("\f","\\f");
+		s = s.replace("\n","\\n");
+		s = s.replace("\r","\\r");
+		s = s.replace("\t","\\t");
+		s = s.replace("\"","\\\"");
+		return "\""+s+"\"";
 	}
 
 	public static Sort parseSort(FieldParser fieldParser,String sort) throws ParseException {
@@ -167,20 +172,9 @@
 
 	private String SimpleTerm(String exclude) throws ParseException {
 		parser.begin();
-		String match;
-		if( parser.match('"') ) {
-			int start = parser.currentIndex() - 1;
-			while( !parser.match('"') ) {
-				if( parser.endOfInput() )
-					throw exception("unclosed quotes");
-				parser.anyChar();
-				checkEscape();
-			}
-			match = parser.textFrom(start);
-			Spaces();
-		} else {
+		String match = Quoted();
+		if( match==null )
 			match = Unquoted(exclude);
-		}
 		if( match.length() == 0 )
 			throw exception("invalid input");
 		return parser.success(match);
@@ -204,6 +198,67 @@
 		return parser.success(match);
 	}
 
+	private String Quoted() throws ParseException {
+		parser.begin();
+		if( !parser.match('"') )
+			return parser.failure(null);
+		StringBuilder sb = new StringBuilder();
+		while( parser.anyChar() ) {
+			char c = parser.lastChar();
+			switch(c) {
+			case '"':
+				return parser.success(sb.toString());
+			case '\\':
+				if( parser.anyChar() ) {
+					c = parser.lastChar();
+					switch(c) {
+					case '"':
+					case '\\':
+						sb.append(c);
+						continue;
+					case 'b':
+						sb.append('\b');
+						continue;
+					case 'f':
+						sb.append('\f');
+						continue;
+					case 'n':
+						sb.append('\n');
+						continue;
+					case 'r':
+						sb.append('\r');
+						continue;
+					case 't':
+						sb.append('\t');
+						continue;
+					case 'u':
+						int n = 0;
+						for( int i=0; i<4; i++ ) {
+							int d;
+							if( parser.inCharRange('0','9') ) {
+								d = parser.lastChar() - '0';
+							} else if( parser.inCharRange('a','f') ) {
+								d = parser.lastChar() - 'a' + 10;
+							} else if( parser.inCharRange('A','F') ) {
+								d = parser.lastChar() - 'A' + 10;
+							} else {
+								throw exception("invalid hex digit");
+							}
+							n = 16*n + d;
+						}
+						sb.append((char)n);
+						continue;
+					}
+				}
+				throw exception("invalid escape char");
+			default:
+				sb.append(c);
+			}
+		}
+		parser.failure();
+		throw exception("unclosed string");
+	}
+
 	private String Unquoted(String exclude) throws ParseException {
 		int start = parser.begin();
 		while( parser.noneOf(exclude) ) {
--- a/src/luan/host/main.luan	Sat Oct 26 22:28:46 2019 -0600
+++ b/src/luan/host/main.luan	Sun Nov 17 16:28:51 2019 -0700
@@ -8,7 +8,7 @@
 local Rpc = require "luan:Rpc.luan"
 local Thread = require "luan:Thread.luan"
 local String = require "luan:String.luan"
-local literal = String.literal or error()
+local regex_quote = String.regex_quote or error()
 local lower = String.lower or error()
 local matches = String.matches or error()
 local Hosted = require "luan:host/Hosted.luan"
@@ -114,7 +114,7 @@
 end
 
 local function security(site_dir,file)
-	matches( file.to_string(), "^"..literal(site_dir.to_string()) ) or error "security violation"
+	matches( file.to_string(), "^"..regex_quote(site_dir.to_string()) ) or error "security violation"
 end
 
 function fns.copy_file(domain,password,dir,name,content)
--- a/src/luan/modules/String.luan	Sat Oct 26 22:28:46 2019 -0600
+++ b/src/luan/modules/String.luan	Sun Nov 17 16:28:51 2019 -0700
@@ -10,10 +10,11 @@
 String.format = StringLuan.format
 String.gmatch = StringLuan.gmatch
 String.gsub = StringLuan.gsub
-String.literal = Pattern.quote
 String.lower = StringLuan.lower
 String.match = StringLuan.match
 String.matches = StringLuan.matches
+String.regex_quote = Pattern.quote
+String.literal = String.regex_quote  -- tmp
 String.rep = StringLuan.rep
 String.reverse = StringLuan.reverse
 String.split = StringLuan.split
--- a/src/luan/modules/lucene/Lucene.luan	Sat Oct 26 22:28:46 2019 -0600
+++ b/src/luan/modules/lucene/Lucene.luan	Sun Nov 17 16:28:51 2019 -0700
@@ -28,7 +28,8 @@
 	double = NumberFieldParser.DOUBLE
 }
 
-Lucene.literal = SaneQueryParser.literal
+Lucene.quote = SaneQueryParser.quote
+Lucene.literal = Lucene.quote  -- tmp
 
 function Lucene.index(index_dir,options)
 	type(index_dir)=="table" or error "index_dir must be table"
--- a/src/luan/modules/lucene/Versioning.luan	Sat Oct 26 22:28:46 2019 -0600
+++ b/src/luan/modules/lucene/Versioning.luan	Sun Nov 17 16:28:51 2019 -0700
@@ -35,7 +35,7 @@
 -- hack to deal with latest changes
 function Versioning.a_big_step(db)
 	db.indexed_fields["id index"] = Lucene.type.string
-	db.advanced_search( Lucene.literal"id index" .. ":*", function(_,doc_fn)
+	db.advanced_search( Lucene.quote"id index" .. ":*", function(_,doc_fn)
 		local doc = doc_fn()
 		for field, value in pairs(copy(doc)) do
 			if matches(field," index$") then
@@ -49,7 +49,7 @@
 		db.save(doc)
 	end )
 	db.indexed_fields["type index"] = Lucene.type.string
-	db.delete( Lucene.literal"type index" .. ":*" )
+	db.delete( Lucene.quote"type index" .. ":*" )
 end
 
 return Versioning
--- a/website/src/manual.html	Sat Oct 26 22:28:46 2019 -0600
+++ b/website/src/manual.html	Sun Nov 17 16:28:51 2019 -0700
@@ -2416,12 +2416,6 @@
 </pre>
 
 
-
-<h4 heading><a name="String.literal" href="#String.literal"><code>String.literal (s)</code></a></h4>
-<p>
-Returns a string which matches the literal string <code>s</code> in a regular expression.  This function is simply the Java method <a href="http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#quote(java.lang.String)"><code>Pattern.quote</code></a>.
-
-
 <h4 heading><a name="String.lower" href="#String.lower"><code>String.lower (s)</code></a></h4>
 <p>
 Receives a string and returns a copy of this string with all
@@ -2430,7 +2424,6 @@
 
 
 
-
 <h4 heading><a name="String.match" href="#String.match"><code>String.match (s, pattern [, init])</code></a></h4>
 
 <p>
@@ -2456,6 +2449,11 @@
 </pre>
 
 
+<h4 heading><a name="String.regex_quote" href="#String.regex_quote"><code>String.regex_quote (s)</code></a></h4>
+<p>
+Returns a string which matches the literal string <code>s</code> in a regular expression.  This function is simply the Java method <a href="http://docs.oracle.com/javase/7/docs/api/java/util/regex/Pattern.html#quote(java.lang.String)"><code>Pattern.quote</code></a>.
+
+
 <h4 heading><a name="String.rep" href="#String.rep"><code>String.rep (s, n [, sep])</code></a></h4>
 <p>
 Returns a string that is the concatenation of <code>n</code> copies of