changeset 555:e25ba7a2e816

some String documentation and fixes
author Franklin Schmidt <fschmidt@gmail.com>
date Fri, 19 Jun 2015 04:29:06 -0600
parents 18504c41b0be
children d02f43598ba3
files core/src/luan/cmd_line.luan core/src/luan/modules/String.luan core/src/luan/modules/StringLuan.java website/src/manual.html.luan
diffstat 4 files changed, 129 insertions(+), 197 deletions(-) [+]
line wrap: on
line diff
--- a/core/src/luan/cmd_line.luan	Thu Jun 18 03:30:18 2015 -0600
+++ b/core/src/luan/cmd_line.luan	Fri Jun 19 04:29:06 2015 -0600
@@ -24,7 +24,7 @@
 			print( main_file( Table.unpack(Luan.arg) ) )
 		end;
 		catch = function(e)
---			java(); e.java.printStackTrace(); return
+--			java(); e.java.printStackTrace();
 			Io.print_to(Io.stderr, e )
 		end;
 	}
--- a/core/src/luan/modules/String.luan	Thu Jun 18 03:30:18 2015 -0600
+++ b/core/src/luan/modules/String.luan	Fri Jun 19 04:29:06 2015 -0600
@@ -4,7 +4,6 @@
 
 local M = {}
 
-M.unicode = StringLuan.unicode
 M.char = StringLuan.char_
 M.concat = StringLuan.concat
 M.encode = StringLuan.encode
@@ -12,7 +11,6 @@
 M.format = StringLuan.format
 M.gmatch = StringLuan.gmatch
 M.gsub = StringLuan.gsub
-M.len = StringLuan.len
 M.literal = Pattern.quote
 M.lower = StringLuan.lower
 M.match = StringLuan.match
@@ -23,6 +21,7 @@
 M.to_binary = StringLuan.to_binary
 M.to_number = StringLuan.to_number
 M.trim = StringLuan.trim
+M.unicode = StringLuan.unicode
 M.upper = StringLuan.upper
 
 return M
--- a/core/src/luan/modules/StringLuan.java	Thu Jun 18 03:30:18 2015 -0600
+++ b/core/src/luan/modules/StringLuan.java	Fri Jun 19 04:29:06 2015 -0600
@@ -56,11 +56,6 @@
 		return s.getBytes();
 	}
 
-	public static int len(LuanState luan,String s) throws LuanException {
-		Utils.checkNotNull(luan,s);
-		return s.length();
-	}
-
 	public static String lower(LuanState luan,String s) throws LuanException {
 		Utils.checkNotNull(luan,s);
 		return s.toLowerCase();
@@ -100,14 +95,23 @@
 		return s.substring(start,end);
 	}
 
-	public static int[] find(String s,String pattern,Integer init,Boolean plain) {
+	@LuanMethod public static Object[] find(String s,String pattern,Integer init,Boolean plain) {
 		int start = start(s,init,0);
 		if( Boolean.TRUE.equals(plain) ) {
 			int i = s.indexOf(pattern,start);
-			return i == -1 ? null : new int[]{i+1,i+pattern.length()};
+			return i == -1 ? null : new Integer[]{i+1,i+pattern.length()};
 		}
 		Matcher m = Pattern.compile(pattern).matcher(s);
-		return m.find(start) ? new int[]{m.start()+1,m.end()} : null;
+		if( !m.find(start) )
+			return null;
+		int n = m.groupCount();
+		Object[] rtn = new Object[2+n];
+		rtn[0] = m.start() + 1;
+		rtn[1] = m.end();
+		for( int i=0; i<n; i++ ) {
+			rtn[2+i] = m.group(i+1);
+		}
+		return rtn;
 	}
 
 	@LuanMethod public static String[] match(String s,String pattern,Integer init) {
@@ -115,7 +119,7 @@
 		Matcher m = Pattern.compile(pattern).matcher(s);
 		if( !m.find(start) )
 			return null;
-		final int n = m.groupCount();
+		int n = m.groupCount();
 		if( n == 0 )
 			return new String[]{m.group()};
 		String[] rtn = new String[n];
@@ -166,9 +170,7 @@
 				String match = m.groupCount()==0 ? m.group() : m.group(1);
 				Object val = t.get(luan,match);
 				if( val != null ) {
-					String replacement = luan.checkString(val);
-					if( replacement==null )
-						throw luan.exception( "invalid replacement value (a "+Luan.type(val)+")" );
+					String replacement = luan.toString(val);
 					m.appendReplacement(sb,replacement);
 				}
 				i++;
@@ -184,18 +186,16 @@
 				Object[] args;
 				final int count = m.groupCount();
 				if( count == 0 ) {
-					args = new Object[]{m.group()};
+					args = new String[]{m.group()};
 				} else {
-					args = new Object[count];
+					args = new String[count];
 					for( int j=0; j<count; j++ ) {
-						args[j] = m.group(j);
+						args[j] = m.group(j+1);
 					}
 				}
 				Object val = Luan.first( luan.call(fn,"repl-arg",args) );
 				if( val != null ) {
-					String replacement = luan.checkString(val);
-					if( replacement==null )
-						throw luan.exception( "invalid replacement value (a "+Luan.type(val)+")" );
+					String replacement = luan.toString(val);
 					m.appendReplacement(sb,replacement);
 				}
 				i++;
--- a/website/src/manual.html.luan	Thu Jun 18 03:30:18 2015 -0600
+++ b/website/src/manual.html.luan	Fri Jun 19 04:29:06 2015 -0600
@@ -91,6 +91,7 @@
 		<li><a href="#default_lib">Default Environment</a></li>
 		<li><a href="#luan_lib">Basic Functions</a></li>
 		<li><a href="#package_lib">Modules</a></li>
+		<li><a href="#string_lib">String Manipulation</a></li>
 	</ul>
 </div>
 
@@ -2346,30 +2347,25 @@
 
 
 
-<h2>6.4 &ndash; <a name="6.4">String Manipulation</a></h2>
+<h3 <%=heading_options%> ><a name="string_lib">String Manipulation</a></h3>
+
+<p>
+Include this library by:
+
+<p><tt><pre>
+	local String = require "luan:String"
+</pre></tt></p>
 
 <p>
 This library provides generic functions for string manipulation,
 such as finding and extracting substrings, and pattern matching.
-When indexing a string in Lua, the first character is at position&nbsp;1
-(not at&nbsp;0, as in C).
+When indexing a string in Luan, the first character is at position&nbsp;1
+(not at&nbsp;0, as in Java).
 Indices are allowed to be negative and are interpreted as indexing backwards,
 from the end of the string.
 Thus, the last character is at position -1, and so on.
 
 
-<p>
-The string library provides all its functions inside the table
-<a name="pdf-string"><code>string</code></a>.
-It also sets a metatable for strings
-where the <code>__index</code> field points to the <code>string</code> table.
-Therefore, you can use the string functions in object-oriented style.
-For instance, <code>string.byte(s,i)</code>
-can be written as <code>s:byte(i)</code>.
-
-
-<p>
-The string library assumes one-byte character encodings.
 
 
 <p>
@@ -2387,67 +2383,46 @@
 
 
 
-
-<p>
-<hr><h3><a name="pdf-string.char"><code>string.char (&middot;&middot;&middot;)</code></a></h3>
+<h4 <%=heading_options%> ><a name="String.char"><tt>String.char (&middot;&middot;&middot;)</tt></a></h4>
+
+<p>
 Receives zero or more integers.
 Returns a string with length equal to the number of arguments,
 in which each character has the internal numerical code equal
 to its corresponding argument.
 
 
-<p>
-Numerical codes are not necessarily portable across platforms.
-
-
-
-
-<p>
-<hr><h3><a name="pdf-string.dump"><code>string.dump (function [, strip])</code></a></h3>
-
-
-<p>
-Returns a string containing a binary representation
-(a <em>binary chunk</em>)
-of the given function,
-so that a later <a href="#pdf-load"><code>load</code></a> on this string returns
-a copy of the function (but with new upvalues).
-If <code>strip</code> is a true value,
-the binary representation is created without debug information
-about the function
-(local variable names, lines, etc.).
-
-
-<p>
-Functions with upvalues have only their number of upvalues saved.
-When (re)loaded,
-those upvalues receive fresh instances containing <b>nil</b>.
-(You can use the debug library to serialize
-and reload the upvalues of a function
-in a way adequate to your needs.)
-
-
-
-
-<p>
-<hr><h3><a name="pdf-string.find"><code>string.find (s, pattern [, init [, plain]])</code></a></h3>
-
+<h4 <%=heading_options%> ><a name="String.concat"><tt>String.concat (&middot;&middot;&middot;)</tt></a></h4>
+
+<p>
+Concatenates the <a href="#Luan.to_string"><tt>to_string</tt></a> value of all arguments.
+
+
+
+<h4 <%=heading_options%> ><a name="String.encode"><tt>String.encode (s)</tt></a></h4>
+
+<p>
+Encodes argument <tt>s</tt> into a string that can be placed in quotes so as to return the original value of the string.
+
+
+
+
+<h4 <%=heading_options%> ><a name="String.find"><tt>String.find (s, pattern [, init [, plain]])</tt></a></h4>
 
 <p>
 Looks for the first match of
-<code>pattern</code> (see <a href="#6.4.1">&sect;6.4.1</a>) in the string <code>s</code>.
-If it finds a match, then <code>find</code> returns the indices of&nbsp;<code>s</code>
+<tt>pattern</tt> (see <a href="#6.4.1">&sect;6.4.1</a>) in the string <tt>s</tt>.
+If it finds a match, then <tt>find</tt> returns the indices of&nbsp;<tt>s</tt>
 where this occurrence starts and ends;
 otherwise, it returns <b>nil</b>.
-A third, optional numerical argument <code>init</code> specifies
+A third, optional numerical argument <tt>init</tt> specifies
 where to start the search;
 its default value is&nbsp;1 and can be negative.
-A value of <b>true</b> as a fourth, optional argument <code>plain</code>
+A value of <b>true</b> as a fourth, optional argument <tt>plain</tt>
 turns off the pattern matching facilities,
 so the function does a plain "find substring" operation,
-with no characters in <code>pattern</code> being considered magic.
-Note that if <code>plain</code> is given, then <code>init</code> must be given as well.
-
+with no characters in <tt>pattern</tt> being considered magic.
+Note that if <tt>plain</tt> is given, then <tt>init</tt> must be given as well.
 
 <p>
 If the pattern has captures,
@@ -2458,117 +2433,89 @@
 
 
 
-<p>
-<hr><h3><a name="pdf-string.format"><code>string.format (formatstring, &middot;&middot;&middot;)</code></a></h3>
+<h4 <%=heading_options%> ><a name="String.format"><tt>String.format (formatstring, &middot;&middot;&middot;)</tt></a></h4>
 
 
 <p>
 Returns a formatted version of its variable number of arguments
 following the description given in its first argument (which must be a string).
-The format string follows the same rules as the ISO&nbsp;C function <code>sprintf</code>.
-The only differences are that the options/modifiers
-<code>*</code>, <code>h</code>, <code>L</code>, <code>l</code>, <code>n</code>,
-and <code>p</code> are not supported
-and that there is an extra option, <code>q</code>.
-The <code>q</code> option formats a string between double quotes,
-using escape sequences when necessary to ensure that
-it can safely be read back by the Lua interpreter.
-For instance, the call
-
-<pre>
-     string.format('%q', 'a string with "quotes" and \n new line')
-</pre><p>
-may produce the string:
-
-<pre>
-     "a string with \"quotes\" and \
-      new line"
-</pre>
-
-<p>
-Options
-<code>A</code> and <code>a</code> (when available),
-<code>E</code>, <code>e</code>, <code>f</code>,
-<code>G</code>, and <code>g</code> all expect a number as argument.
-Options <code>c</code>, <code>d</code>,
-<code>i</code>, <code>o</code>, <code>u</code>, <code>X</code>, and <code>x</code>
-expect an integer.
-Option <code>q</code> expects a string;
-option <code>s</code> expects a string without embedded zeros.
-If the argument to option <code>s</code> is not a string,
-it is converted to one following the same rules of <a href="#pdf-tostring"><code>tostring</code></a>.
-
-
-
-
-<p>
-<hr><h3><a name="pdf-string.gmatch"><code>string.gmatch (s, pattern)</code></a></h3>
+The format string follows the same rules as the Java function <a href="http://docs.oracle.com/javase/7/docs/api/java/lang/String.html#format(java.lang.String,%20java.lang.Object...)"><tt>String.format</tt></a> because Luan calls this internally.
+
+<p>
+Note that Java's <tt>String.format</tt> is too stupid to convert between ints and floats, so you must provide the right kind of number.
+
+
+
+<h4 <%=heading_options%> ><a name="String.gmatch"><tt>String.gmatch (s, pattern)</tt></a></h4>
+
+<p>
 Returns an iterator function that,
 each time it is called,
-returns the next captures from <code>pattern</code> (see <a href="#6.4.1">&sect;6.4.1</a>)
-over the string <code>s</code>.
-If <code>pattern</code> specifies no captures,
+returns the next captures from <tt>pattern</tt> (see <a href="#6.4.1">&sect;6.4.1</a>)
+over the string <tt>s</tt>.
+If <tt>pattern</tt> specifies no captures,
 then the whole match is produced in each call.
 
 
 <p>
 As an example, the following loop
-will iterate over all the words from string <code>s</code>,
+will iterate over all the words from string <tt>s</tt>,
 printing one per line:
 
-<pre>
-     s = "hello world from Lua"
-     for w in string.gmatch(s, "%a+") do
-       print(w)
-     end
-</pre><p>
-The next example collects all pairs <code>key=value</code> from the
+<p><tt><pre>
+	local s = "hello world from Lua"
+	for w in String.gmatch(s, [[\w+]]) do
+		print(w)
+	end
+</pre></tt></p>
+
+<p>
+The next example collects all pairs <tt>key=value</tt> from the
 given string into a table:
 
-<pre>
-     t = {}
-     s = "from=world, to=Lua"
-     for k, v in string.gmatch(s, "(%w+)=(%w+)") do
-       t[k] = v
-     end
-</pre>
-
-<p>
-For this function, a caret '<code>^</code>' at the start of a pattern does not
+<p><tt><pre>
+	local t = {}
+	local s = "from=world, to=Lua"
+	for k, v in String.gmatch(s, [[(\w+)=(\w+)]]) do
+		t[k] = v
+	end
+</pre></tt></p>
+
+<p>
+For this function, a caret '<tt>^</tt>' at the start of a pattern does not
 work as an anchor, as this would prevent the iteration.
 
 
 
-
-<p>
-<hr><h3><a name="pdf-string.gsub"><code>string.gsub (s, pattern, repl [, n])</code></a></h3>
-Returns a copy of <code>s</code>
-in which all (or the first <code>n</code>, if given)
-occurrences of the <code>pattern</code> (see <a href="#6.4.1">&sect;6.4.1</a>) have been
-replaced by a replacement string specified by <code>repl</code>,
+<h4 <%=heading_options%> ><a name="String.gsub"><tt>String.gsub (s, pattern, repl [, n])</tt></a></h4>
+
+<p>
+Returns a copy of <tt>s</tt>
+in which all (or the first <tt>n</tt>, if given)
+occurrences of the <tt>pattern</tt> (see <a href="#6.4.1">&sect;6.4.1</a>) have been
+replaced by a replacement string specified by <tt>repl</tt>,
 which can be a string, a table, or a function.
-<code>gsub</code> also returns, as its second value,
+<tt>gsub</tt> also returns, as its second value,
 the total number of matches that occurred.
-The name <code>gsub</code> comes from <em>Global SUBstitution</em>.
-
-
-<p>
-If <code>repl</code> is a string, then its value is used for replacement.
-The character&nbsp;<code>%</code> works as an escape character:
-any sequence in <code>repl</code> of the form <code>%<em>d</em></code>,
-with <em>d</em> between 1 and 9,
-stands for the value of the <em>d</em>-th captured substring.
-The sequence <code>%0</code> stands for the whole match.
-The sequence <code>%%</code> stands for a single&nbsp;<code>%</code>.
-
-
-<p>
-If <code>repl</code> is a table, then the table is queried for every match,
+The name <tt>gsub</tt> comes from <i>Global SUBstitution</i>.
+
+
+<p>
+If <tt>repl</tt> is a string, then its value is used for replacement.
+The character&nbsp;<tt>\</tt> works as an escape character.
+Any sequence in <tt>repl</tt> of the form <tt>$<i>d</i></tt>,
+with <i>d</i> between 1 and 9,
+stands for the value of the <i>d</i>-th captured substring.
+The sequence <tt>$0</tt> stands for the whole match.
+
+
+<p>
+If <tt>repl</tt> is a table, then the table is queried for every match,
 using the first capture as the key.
 
 
 <p>
-If <code>repl</code> is a function, then this function is called every time a
+If <tt>repl</tt> is a function, then this function is called every time a
 match occurs, with all captured substrings passed as arguments,
 in order.
 
@@ -2581,9 +2528,9 @@
 
 <p>
 If the value returned by the table query or by the function call
-is a string or a number,
+is not <b>nil</b>,
 then it is used as the replacement string;
-otherwise, if it is <b>false</b> or <b>nil</b>,
+otherwise, if it is <b>nil</b>,
 then there is no replacement
 (that is, the original match is kept in the string).
 
@@ -2591,47 +2538,33 @@
 <p>
 Here are some examples:
 
-<pre>
-     x = string.gsub("hello world", "(%w+)", "%1 %1")
+<p><tt><pre>
+     x = String.gsub("hello world", [[(\w+)]], "$1 $1")
      --&gt; x="hello hello world world"
      
-     x = string.gsub("hello world", "%w+", "%0 %0", 1)
+     x = String.gsub("hello world", [[\w+]], "$0 $0", 1)
      --&gt; x="hello hello world"
      
-     x = string.gsub("hello world from Lua", "(%w+)%s*(%w+)", "%2 %1")
-     --&gt; x="world hello Lua from"
-     
-     x = string.gsub("home = $HOME, user = $USER", "%$(%w+)", os.getenv)
-     --&gt; x="home = /home/roberto, user = roberto"
-     
-     x = string.gsub("4+5 = $return 4+5$", "%$(.-)%$", function (s)
+     x = String.gsub("hello world from Luan", [[(\w+)\s*(\w+)]], "$2 $1")
+     --&gt; x="world hello Luan from"
+          
+     x = String.gsub("4+5 = $return 4+5$", [[\$(.*?)\$]], function (s)
            return load(s)()
          end)
      --&gt; x="4+5 = 9"
      
      local t = {name="lua", version="5.3"}
-     x = string.gsub("$name-$version.tar.gz", "%$(%w+)", t)
+     x = String.gsub("$name-$version.tar.gz", [[\$(\w+)]], t)
      --&gt; x="lua-5.3.tar.gz"
-</pre>
-
-
-
-<p>
-<hr><h3><a name="pdf-string.len"><code>string.len (s)</code></a></h3>
-Receives a string and returns its length.
-The empty string <code>""</code> has length 0.
-Embedded zeros are counted,
-so <code>"a\000bc\000"</code> has length 5.
-
-
-
-
-<p>
-<hr><h3><a name="pdf-string.lower"><code>string.lower (s)</code></a></h3>
+</pre></tt></p>
+
+
+
+<h4 <%=heading_options%> ><a name="String.lower"><tt>String.lower (s)</tt></a></h4>
+<p>
 Receives a string and returns a copy of this string with all
 uppercase letters changed to lowercase.
 All other characters are left unchanged.
-The definition of what an uppercase letter is depends on the current locale.