Mercurial Hosting > nabble

diff src/nabble/view/web/util/codemirror/js/parsejavascript.js @ 0:7ecd1a4ef557
add content
author: Franklin Schmidt <fschmidt@gmail.com>
date: Thu, 21 Mar 2019 19:15:52 -0600
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/nabble/view/web/util/codemirror/js/parsejavascript.js	Thu Mar 21 19:15:52 2019 -0600
@@ -0,0 +1,359 @@
+/* Parse function for JavaScript. Makes use of the tokenizer from
+ * tokenizejavascript.js. Note that your parsers do not have to be
+ * this complicated -- if you don't want to recognize local variables,
+ * in many languages it is enough to just look for braces, semicolons,
+ * parentheses, etc, and know when you are inside a string or comment.
+ *
+ * See manual.html for more info about the parser interface.
+ */
+
+var JSParser = Editor.Parser = (function() {
+  // Token types that can be considered to be atoms.
+  var atomicTypes = {"atom": true, "number": true, "variable": true, "string": true, "regexp": true};
+  // Setting that can be used to have JSON data indent properly.
+  var json = false;
+  // Constructor for the lexical context objects.
+  function JSLexical(indented, column, type, align, prev, info) {
+    // indentation at start of this line
+    this.indented = indented;
+    // column at which this scope was opened
+    this.column = column;
+    // type of scope ('vardef', 'stat' (statement), 'form' (special form), '[', '{', or '(')
+    this.type = type;
+    // '[', '{', or '(' blocks that have any text after their opening
+    // character are said to be 'aligned' -- any lines below are
+    // indented all the way to the opening character.
+    if (align != null)
+      this.align = align;
+    // Parent scope, if any.
+    this.prev = prev;
+    this.info = info;
+  }
+
+  // My favourite JavaScript indentation rules.
+  function indentJS(lexical) {
+    return function(firstChars) {
+      var firstChar = firstChars && firstChars.charAt(0), type = lexical.type;
+      var closing = firstChar == type;
+      if (type == "vardef")
+        return lexical.indented + 4;
+      else if (type == "form" && firstChar == "{")
+        return lexical.indented;
+      else if (type == "stat" || type == "form")
+        return lexical.indented + indentUnit;
+      else if (lexical.info == "switch" && !closing)
+        return lexical.indented + (/^(?:case|default)\b/.test(firstChars) ? indentUnit : 2 * indentUnit);
+      else if (lexical.align)
+        return lexical.column - (closing ? 1 : 0);
+      else
+        return lexical.indented + (closing ? 0 : indentUnit);
+    };
+  }
+
+  // The parser-iterator-producing function itself.
+  function parseJS(input, basecolumn) {
+    // Wrap the input in a token stream
+    var tokens = tokenizeJavaScript(input);
+    // The parser state. cc is a stack of actions that have to be
+    // performed to finish the current statement. For example we might
+    // know that we still need to find a closing parenthesis and a
+    // semicolon. Actions at the end of the stack go first. It is
+    // initialized with an infinitely looping action that consumes
+    // whole statements.
+    var cc = [json ? expressions : statements];
+    // Context contains information about the current local scope, the
+    // variables defined in that, and the scopes above it.
+    var context = null;
+    // The lexical scope, used mostly for indentation.
+    var lexical = new JSLexical((basecolumn || 0) - indentUnit, 0, "block", false);
+    // Current column, and the indentation at the start of the current
+    // line. Used to create lexical scope objects.
+    var column = 0;
+    var indented = 0;
+    // Variables which are used by the mark, cont, and pass functions
+    // below to communicate with the driver loop in the 'next'
+    // function.
+    var consume, marked;
+  
+    // The iterator object.
+    var parser = {next: next, copy: copy};
+
+    function next(){
+      // Start by performing any 'lexical' actions (adjusting the
+      // lexical variable), or the operations below will be working
+      // with the wrong lexical state.
+      while(cc[cc.length - 1].lex)
+        cc.pop()();
+
+      // Fetch a token.
+      var token = tokens.next();
+
+      // Adjust column and indented.
+      if (token.type == "whitespace" && column == 0)
+        indented = token.value.length;
+      column += token.value.length;
+      if (token.content == "\n"){
+        indented = column = 0;
+        // If the lexical scope's align property is still undefined at
+        // the end of the line, it is an un-aligned scope.
+        if (!("align" in lexical))
+          lexical.align = false;
+        // Newline tokens get an indentation function associated with
+        // them.
+        token.indentation = indentJS(lexical);
+      }
+      // No more processing for meaningless tokens.
+      if (token.type == "whitespace" || token.type == "comment")
+        return token;
+      // When a meaningful token is found and the lexical scope's
+      // align is undefined, it is an aligned scope.
+      if (!("align" in lexical))
+        lexical.align = true;
+
+      // Execute actions until one 'consumes' the token and we can
+      // return it.
+      while(true) {
+        consume = marked = false;
+        // Take and execute the topmost action.
+        cc.pop()(token.type, token.content);
+        if (consume){
+          // Marked is used to change the style of the current token.
+          if (marked)
+            token.style = marked;
+          // Here we differentiate between local and global variables.
+          else if (token.type == "variable" && inScope(token.content))
+            token.style = "js-localvariable";
+          return token;
+        }
+      }
+    }
+
+    // This makes a copy of the parser state. It stores all the
+    // stateful variables in a closure, and returns a function that
+    // will restore them when called with a new input stream. Note
+    // that the cc array has to be copied, because it is contantly
+    // being modified. Lexical objects are not mutated, and context
+    // objects are not mutated in a harmful way, so they can be shared
+    // between runs of the parser.
+    function copy(){
+      var _context = context, _lexical = lexical, _cc = cc.concat([]), _tokenState = tokens.state;
+  
+      return function copyParser(input){
+        context = _context;
+        lexical = _lexical;
+        cc = _cc.concat([]); // copies the array
+        column = indented = 0;
+        tokens = tokenizeJavaScript(input, _tokenState);
+        return parser;
+      };
+    }
+
+    // Helper function for pushing a number of actions onto the cc
+    // stack in reverse order.
+    function push(fs){
+      for (var i = fs.length - 1; i >= 0; i--)
+        cc.push(fs[i]);
+    }
+    // cont and pass are used by the action functions to add other
+    // actions to the stack. cont will cause the current token to be
+    // consumed, pass will leave it for the next action.
+    function cont(){
+      push(arguments);
+      consume = true;
+    }
+    function pass(){
+      push(arguments);
+      consume = false;
+    }
+    // Used to change the style of the current token.
+    function mark(style){
+      marked = style;
+    }
+
+    // Push a new scope. Will automatically link the current scope.
+    function pushcontext(){
+      context = {prev: context, vars: {"this": true, "arguments": true}};
+    }
+    // Pop off the current scope.
+    function popcontext(){
+      context = context.prev;
+    }
+    // Register a variable in the current scope.
+    function register(varname){
+      if (context){
+        mark("js-variabledef");
+        context.vars[varname] = true;
+      }
+    }
+    // Check whether a variable is defined in the current scope.
+    function inScope(varname){
+      var cursor = context;
+      while (cursor) {
+        if (cursor.vars[varname])
+          return true;
+        cursor = cursor.prev;
+      }
+      return false;
+    }
+  
+    // Push a new lexical context of the given type.
+    function pushlex(type, info) {
+      var result = function(){
+        lexical = new JSLexical(indented, column, type, null, lexical, info)
+      };
+      result.lex = true;
+      return result;
+    }
+    // Pop off the current lexical context.
+    function poplex(){
+      if (lexical.type == ")")
+        indented = lexical.indented;
+      lexical = lexical.prev;
+    }
+    poplex.lex = true;
+    // The 'lex' flag on these actions is used by the 'next' function
+    // to know they can (and have to) be ran before moving on to the
+    // next token.
+  
+    // Creates an action that discards tokens until it finds one of
+    // the given type.
+    function expect(wanted){
+      return function expecting(type){
+        if (type == wanted) cont();
+        else if (wanted == ";") pass();
+        else cont(arguments.callee);
+      };
+    }
+
+    // Looks for a statement, and then calls itself.
+    function statements(type){
+      return pass(statement, statements);
+    }
+    function expressions(type){
+      return pass(expression, expressions);
+    }
+    // Dispatches various types of statements based on the type of the
+    // current token.
+    function statement(type){
+      if (type == "var") cont(pushlex("vardef"), vardef1, expect(";"), poplex);
+      else if (type == "keyword a") cont(pushlex("form"), expression, statement, poplex);
+      else if (type == "keyword b") cont(pushlex("form"), statement, poplex);
+      else if (type == "{") cont(pushlex("}"), block, poplex);
+      else if (type == ";") cont();
+      else if (type == "function") cont(functiondef);
+      else if (type == "for") cont(pushlex("form"), expect("("), pushlex(")"), forspec1, expect(")"), poplex, statement, poplex);
+      else if (type == "variable") cont(pushlex("stat"), maybelabel);
+      else if (type == "switch") cont(pushlex("form"), expression, pushlex("}", "switch"), expect("{"), block, poplex, poplex);
+      else if (type == "case") cont(expression, expect(":"));
+      else if (type == "default") cont(expect(":"));
+      else if (type == "catch") cont(pushlex("form"), pushcontext, expect("("), funarg, expect(")"), statement, poplex, popcontext);
+      else pass(pushlex("stat"), expression, expect(";"), poplex);
+    }
+    // Dispatch expression types.
+    function expression(type){
+      if (atomicTypes.hasOwnProperty(type)) cont(maybeoperator);
+      else if (type == "function") cont(functiondef);
+      else if (type == "keyword c") cont(expression);
+      else if (type == "(") cont(pushlex(")"), expression, expect(")"), poplex, maybeoperator);
+      else if (type == "operator") cont(expression);
+      else if (type == "[") cont(pushlex("]"), commasep(expression, "]"), poplex, maybeoperator);
+      else if (type == "{") cont(pushlex("}"), commasep(objprop, "}"), poplex, maybeoperator);
+      else cont();
+    }
+    // Called for places where operators, function calls, or
+    // subscripts are valid. Will skip on to the next action if none
+    // is found.
+    function maybeoperator(type, value){
+      if (type == "operator" && /\+\+|--/.test(value)) cont(maybeoperator);
+      else if (type == "operator") cont(expression);
+      else if (type == ";") pass();
+      else if (type == "(") cont(pushlex(")"), commasep(expression, ")"), poplex, maybeoperator);
+      else if (type == ".") cont(property, maybeoperator);
+      else if (type == "[") cont(pushlex("]"), expression, expect("]"), poplex, maybeoperator);
+    }
+    // When a statement starts with a variable name, it might be a
+    // label. If no colon follows, it's a regular statement.
+    function maybelabel(type){
+      if (type == ":") cont(poplex, statement);
+      else pass(maybeoperator, expect(";"), poplex);
+    }
+    // Property names need to have their style adjusted -- the
+    // tokenizer thinks they are variables.
+    function property(type){
+      if (type == "variable") {mark("js-property"); cont();}
+    }
+    // This parses a property and its value in an object literal.
+    function objprop(type){
+      if (type == "variable") mark("js-property");
+      if (atomicTypes.hasOwnProperty(type)) cont(expect(":"), expression);
+    }
+    // Parses a comma-separated list of the things that are recognized
+    // by the 'what' argument.
+    function commasep(what, end){
+      function proceed(type) {
+        if (type == ",") cont(what, proceed);
+        else if (type == end) cont();
+        else cont(expect(end));
+      }
+      return function commaSeparated(type) {
+        if (type == end) cont();
+        else pass(what, proceed);
+      };
+    }
+    // Look for statements until a closing brace is found.
+    function block(type){
+      if (type == "}") cont();
+      else pass(statement, block);
+    }
+    // Variable definitions are split into two actions -- 1 looks for
+    // a name or the end of the definition, 2 looks for an '=' sign or
+    // a comma.
+    function vardef1(type, value){
+      if (type == "variable"){register(value); cont(vardef2);}
+      else cont();
+    }
+    function vardef2(type, value){
+      if (value == "=") cont(expression, vardef2);
+      else if (type == ",") cont(vardef1);
+    }
+    // For loops.
+    function forspec1(type){
+      if (type == "var") cont(vardef1, forspec2);
+      else if (type == ";") pass(forspec2);
+      else if (type == "variable") cont(formaybein);
+      else pass(forspec2);
+    }
+    function formaybein(type, value){
+      if (value == "in") cont(expression);
+      else cont(maybeoperator, forspec2);
+    }
+    function forspec2(type, value){
+      if (type == ";") cont(forspec3);
+      else if (value == "in") cont(expression);
+      else cont(expression, expect(";"), forspec3);
+    }
+    function forspec3(type) {
+      if (type == ")") pass();
+      else cont(expression);
+    }
+    // A function definition creates a new context, and the variables
+    // in its argument list have to be added to this context.
+    function functiondef(type, value){
+      if (type == "variable"){register(value); cont(functiondef);}
+      else if (type == "(") cont(pushlex(")"), pushcontext, commasep(funarg, ")"), poplex, statement, popcontext);
+    }
+    function funarg(type, value){
+      if (type == "variable"){register(value); cont();}
+    }
+  
+    return parser;
+  }
+
+  return {
+    make: parseJS,
+    electricChars: "{}:",
+    configure: function(obj) {
+      if (obj.json != null) json = obj.json;
+    }
+  };
+})();
author	Franklin Schmidt <fschmidt@gmail.com>
date	Thu, 21 Mar 2019 19:15:52 -0600
parents
children