repo/nabble: src/nabble/view/web/util/codemirror/js/tokenizejavascript.js comparison

comparison src/nabble/view/web/util/codemirror/js/tokenizejavascript.js @ 0:7ecd1a4ef557

add content

author	Franklin Schmidt <fschmidt@gmail.com>
date	Thu, 21 Mar 2019 19:15:52 -0600
parents
children

comparison

equal deleted inserted replaced

--1:000000000000
+:7ecd1a4ef557
+/* Tokenizer for JavaScript code */
+var tokenizeJavaScript = (function() {
+// Advance the stream until the given character (not preceded by a
+// backslash) is encountered, or the end of the line is reached.
+function nextUntilUnescaped(source, end) {
+var escaped = false;
+while (!source.endOfLine()) {
+var next = source.next();
+if (next == end && !escaped)
+return false;
+escaped = !escaped && next == "\\";
+}
+return escaped;
+}
+// A map of JavaScript's keywords. The a/b/c keyword distinction is
+// very rough, but it gives the parser enough information to parse
+// correct code correctly (we don't care that much how we parse
+// incorrect code). The style information included in these objects
+// is used by the highlighter to pick the correct CSS style for a
+// token.
+var keywords = function(){
+function result(type, style){
+return {type: type, style: "js-" + style};
+}
+// keywords that take a parenthised expression, and then a
+// statement (if)
+var keywordA = result("keyword a", "keyword");
+// keywords that take just a statement (else)
+var keywordB = result("keyword b", "keyword");
+// keywords that optionally take an expression, and form a
+// statement (return)
+var keywordC = result("keyword c", "keyword");
+var operator = result("operator", "keyword");
+var atom = result("atom", "atom");
+return {
+"if": keywordA, "while": keywordA, "with": keywordA,
+"else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB,
+"return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC,
+"in": operator, "typeof": operator, "instanceof": operator,
+"var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"),
+"for": result("for", "keyword"), "switch": result("switch", "keyword"),
+"case": result("case", "keyword"), "default": result("default", "keyword"),
+"true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom
+};
+}();
+// Some helper regexps
+var isOperatorChar = /[+\-*&%=<>!?|]/;
+var isHexDigit = /[0-9A-Fa-f]/;
+var isWordChar = /[\w\$_]/;
+// Wrapper around jsToken that helps maintain parser state (whether
+// we are inside of a multi-line comment and whether the next token
+// could be a regular expression).
+function jsTokenState(inside, regexp) {
+return function(source, setState) {
+var newInside = inside;
+var type = jsToken(inside, regexp, source, function(c) {newInside = c;});
+var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/);
+if (newRegexp != regexp || newInside != inside)
+setState(jsTokenState(newInside, newRegexp));
+return type;
+};
+}
+// The token reader, intended to be used by the tokenizer from
+// tokenize.js (through jsTokenState). Advances the source stream
+// over a token, and returns an object containing the type and style
+// of that token.
+function jsToken(inside, regexp, source, setInside) {
+function readHexNumber(){
+source.next(); // skip the 'x'
+source.nextWhileMatches(isHexDigit);
+return {type: "number", style: "js-atom"};
+}
+function readNumber() {
+source.nextWhileMatches(/[0-9]/);
+if (source.equals(".")){
+source.next();
+source.nextWhileMatches(/[0-9]/);
+}
+if (source.equals("e") || source.equals("E")){
+source.next();
+if (source.equals("-"))
+source.next();
+source.nextWhileMatches(/[0-9]/);
+}
+return {type: "number", style: "js-atom"};
+}
+// Read a word, look it up in keywords. If not found, it is a
+// variable, otherwise it is a keyword of the type found.
+function readWord() {
+source.nextWhileMatches(isWordChar);
+var word = source.get();
+var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word];
+return known ? {type: known.type, style: known.style, content: word} :
+{type: "variable", style: "js-variable", content: word};
+}
+function readRegexp() {
+nextUntilUnescaped(source, "/");
+source.nextWhileMatches(/[gimy]/); // 'y' is "sticky" option in Mozilla
+return {type: "regexp", style: "js-string"};
+}
+// Mutli-line comments are tricky. We want to return the newlines
+// embedded in them as regular newline tokens, and then continue
+// returning a comment token for every line of the comment. So
+// some state has to be saved (inside) to indicate whether we are
+// inside a /* */ sequence.
+function readMultilineComment(start){
+var newInside = "/*";
+var maybeEnd = (start == "*");
+while (true) {
+if (source.endOfLine())
+break;
+var next = source.next();
+if (next == "/" && maybeEnd){
+newInside = null;
+break;
+}
+maybeEnd = (next == "*");
+}
+setInside(newInside);
+return {type: "comment", style: "js-comment"};
+}
+function readOperator() {
+source.nextWhileMatches(isOperatorChar);
+return {type: "operator", style: "js-operator"};
+}
+function readString(quote) {
+var endBackSlash = nextUntilUnescaped(source, quote);
+setInside(endBackSlash ? quote : null);
+return {type: "string", style: "js-string"};
+}
+// Fetch the next token. Dispatches on first character in the
+// stream, or first two characters when the first is a slash.
+if (inside == "\"" || inside == "'")
+return readString(inside);
+var ch = source.next();
+if (inside == "/*")
+return readMultilineComment(ch);
+else if (ch == "\"" || ch == "'")
+return readString(ch);
+// with punctuation, the type of the token is the symbol itself
+else if (/[\[\]{}\(\),;\:\.]/.test(ch))
+return {type: ch, style: "js-punctuation"};
+else if (ch == "0" && (source.equals("x") || source.equals("X")))
+return readHexNumber();
+else if (/[0-9]/.test(ch))
+return readNumber();
+else if (ch == "/"){
+if (source.equals("*"))
+{ source.next(); return readMultilineComment(ch); }
+else if (source.equals("/"))
+{ nextUntilUnescaped(source, null); return {type: "comment", style: "js-comment"};}
+else if (regexp)
+return readRegexp();
+else
+return readOperator();
+}
+else if (isOperatorChar.test(ch))
+return readOperator();
+else
+return readWord();
+}
+// The external interface to the tokenizer.
+return function(source, startState) {
+return tokenizer(source, startState || jsTokenState(false, true));
+};
+})();