Mercurial Hosting > nabble
diff src/nabble/view/web/util/codemirror/js/tokenizejavascript.js @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nabble/view/web/util/codemirror/js/tokenizejavascript.js Thu Mar 21 19:15:52 2019 -0600 @@ -0,0 +1,174 @@ +/* Tokenizer for JavaScript code */ + +var tokenizeJavaScript = (function() { + // Advance the stream until the given character (not preceded by a + // backslash) is encountered, or the end of the line is reached. + function nextUntilUnescaped(source, end) { + var escaped = false; + while (!source.endOfLine()) { + var next = source.next(); + if (next == end && !escaped) + return false; + escaped = !escaped && next == "\\"; + } + return escaped; + } + + // A map of JavaScript's keywords. The a/b/c keyword distinction is + // very rough, but it gives the parser enough information to parse + // correct code correctly (we don't care that much how we parse + // incorrect code). The style information included in these objects + // is used by the highlighter to pick the correct CSS style for a + // token. + var keywords = function(){ + function result(type, style){ + return {type: type, style: "js-" + style}; + } + // keywords that take a parenthised expression, and then a + // statement (if) + var keywordA = result("keyword a", "keyword"); + // keywords that take just a statement (else) + var keywordB = result("keyword b", "keyword"); + // keywords that optionally take an expression, and form a + // statement (return) + var keywordC = result("keyword c", "keyword"); + var operator = result("operator", "keyword"); + var atom = result("atom", "atom"); + return { + "if": keywordA, "while": keywordA, "with": keywordA, + "else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB, + "return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC, + "in": operator, "typeof": operator, "instanceof": operator, + "var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"), + "for": result("for", "keyword"), "switch": result("switch", "keyword"), + "case": result("case", "keyword"), "default": result("default", "keyword"), + "true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom + }; + }(); + + // Some helper regexps + var isOperatorChar = /[+\-*&%=<>!?|]/; + var isHexDigit = /[0-9A-Fa-f]/; + var isWordChar = /[\w\$_]/; + + // Wrapper around jsToken that helps maintain parser state (whether + // we are inside of a multi-line comment and whether the next token + // could be a regular expression). + function jsTokenState(inside, regexp) { + return function(source, setState) { + var newInside = inside; + var type = jsToken(inside, regexp, source, function(c) {newInside = c;}); + var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/); + if (newRegexp != regexp || newInside != inside) + setState(jsTokenState(newInside, newRegexp)); + return type; + }; + } + + // The token reader, intended to be used by the tokenizer from + // tokenize.js (through jsTokenState). Advances the source stream + // over a token, and returns an object containing the type and style + // of that token. + function jsToken(inside, regexp, source, setInside) { + function readHexNumber(){ + source.next(); // skip the 'x' + source.nextWhileMatches(isHexDigit); + return {type: "number", style: "js-atom"}; + } + + function readNumber() { + source.nextWhileMatches(/[0-9]/); + if (source.equals(".")){ + source.next(); + source.nextWhileMatches(/[0-9]/); + } + if (source.equals("e") || source.equals("E")){ + source.next(); + if (source.equals("-")) + source.next(); + source.nextWhileMatches(/[0-9]/); + } + return {type: "number", style: "js-atom"}; + } + // Read a word, look it up in keywords. If not found, it is a + // variable, otherwise it is a keyword of the type found. + function readWord() { + source.nextWhileMatches(isWordChar); + var word = source.get(); + var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word]; + return known ? {type: known.type, style: known.style, content: word} : + {type: "variable", style: "js-variable", content: word}; + } + function readRegexp() { + nextUntilUnescaped(source, "/"); + source.nextWhileMatches(/[gimy]/); // 'y' is "sticky" option in Mozilla + return {type: "regexp", style: "js-string"}; + } + // Mutli-line comments are tricky. We want to return the newlines + // embedded in them as regular newline tokens, and then continue + // returning a comment token for every line of the comment. So + // some state has to be saved (inside) to indicate whether we are + // inside a /* */ sequence. + function readMultilineComment(start){ + var newInside = "/*"; + var maybeEnd = (start == "*"); + while (true) { + if (source.endOfLine()) + break; + var next = source.next(); + if (next == "/" && maybeEnd){ + newInside = null; + break; + } + maybeEnd = (next == "*"); + } + setInside(newInside); + return {type: "comment", style: "js-comment"}; + } + function readOperator() { + source.nextWhileMatches(isOperatorChar); + return {type: "operator", style: "js-operator"}; + } + function readString(quote) { + var endBackSlash = nextUntilUnescaped(source, quote); + setInside(endBackSlash ? quote : null); + return {type: "string", style: "js-string"}; + } + + // Fetch the next token. Dispatches on first character in the + // stream, or first two characters when the first is a slash. + if (inside == "\"" || inside == "'") + return readString(inside); + var ch = source.next(); + if (inside == "/*") + return readMultilineComment(ch); + else if (ch == "\"" || ch == "'") + return readString(ch); + // with punctuation, the type of the token is the symbol itself + else if (/[\[\]{}\(\),;\:\.]/.test(ch)) + return {type: ch, style: "js-punctuation"}; + else if (ch == "0" && (source.equals("x") || source.equals("X"))) + return readHexNumber(); + else if (/[0-9]/.test(ch)) + return readNumber(); + else if (ch == "/"){ + if (source.equals("*")) + { source.next(); return readMultilineComment(ch); } + else if (source.equals("/")) + { nextUntilUnescaped(source, null); return {type: "comment", style: "js-comment"};} + else if (regexp) + return readRegexp(); + else + return readOperator(); + } + else if (isOperatorChar.test(ch)) + return readOperator(); + else + return readWord(); + } + + // The external interface to the tokenizer. + return function(source, startState) { + return tokenizer(source, startState || jsTokenState(false, true)); + }; +})();