comparison src/nabble/view/web/util/codemirror/js/tokenizejavascript.js @ 0:7ecd1a4ef557

add content
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 21 Mar 2019 19:15:52 -0600
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:7ecd1a4ef557
1 /* Tokenizer for JavaScript code */
2
3 var tokenizeJavaScript = (function() {
4 // Advance the stream until the given character (not preceded by a
5 // backslash) is encountered, or the end of the line is reached.
6 function nextUntilUnescaped(source, end) {
7 var escaped = false;
8 while (!source.endOfLine()) {
9 var next = source.next();
10 if (next == end && !escaped)
11 return false;
12 escaped = !escaped && next == "\\";
13 }
14 return escaped;
15 }
16
17 // A map of JavaScript's keywords. The a/b/c keyword distinction is
18 // very rough, but it gives the parser enough information to parse
19 // correct code correctly (we don't care that much how we parse
20 // incorrect code). The style information included in these objects
21 // is used by the highlighter to pick the correct CSS style for a
22 // token.
23 var keywords = function(){
24 function result(type, style){
25 return {type: type, style: "js-" + style};
26 }
27 // keywords that take a parenthised expression, and then a
28 // statement (if)
29 var keywordA = result("keyword a", "keyword");
30 // keywords that take just a statement (else)
31 var keywordB = result("keyword b", "keyword");
32 // keywords that optionally take an expression, and form a
33 // statement (return)
34 var keywordC = result("keyword c", "keyword");
35 var operator = result("operator", "keyword");
36 var atom = result("atom", "atom");
37 return {
38 "if": keywordA, "while": keywordA, "with": keywordA,
39 "else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB,
40 "return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC,
41 "in": operator, "typeof": operator, "instanceof": operator,
42 "var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"),
43 "for": result("for", "keyword"), "switch": result("switch", "keyword"),
44 "case": result("case", "keyword"), "default": result("default", "keyword"),
45 "true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom
46 };
47 }();
48
49 // Some helper regexps
50 var isOperatorChar = /[+\-*&%=<>!?|]/;
51 var isHexDigit = /[0-9A-Fa-f]/;
52 var isWordChar = /[\w\$_]/;
53
54 // Wrapper around jsToken that helps maintain parser state (whether
55 // we are inside of a multi-line comment and whether the next token
56 // could be a regular expression).
57 function jsTokenState(inside, regexp) {
58 return function(source, setState) {
59 var newInside = inside;
60 var type = jsToken(inside, regexp, source, function(c) {newInside = c;});
61 var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/);
62 if (newRegexp != regexp || newInside != inside)
63 setState(jsTokenState(newInside, newRegexp));
64 return type;
65 };
66 }
67
68 // The token reader, intended to be used by the tokenizer from
69 // tokenize.js (through jsTokenState). Advances the source stream
70 // over a token, and returns an object containing the type and style
71 // of that token.
72 function jsToken(inside, regexp, source, setInside) {
73 function readHexNumber(){
74 source.next(); // skip the 'x'
75 source.nextWhileMatches(isHexDigit);
76 return {type: "number", style: "js-atom"};
77 }
78
79 function readNumber() {
80 source.nextWhileMatches(/[0-9]/);
81 if (source.equals(".")){
82 source.next();
83 source.nextWhileMatches(/[0-9]/);
84 }
85 if (source.equals("e") || source.equals("E")){
86 source.next();
87 if (source.equals("-"))
88 source.next();
89 source.nextWhileMatches(/[0-9]/);
90 }
91 return {type: "number", style: "js-atom"};
92 }
93 // Read a word, look it up in keywords. If not found, it is a
94 // variable, otherwise it is a keyword of the type found.
95 function readWord() {
96 source.nextWhileMatches(isWordChar);
97 var word = source.get();
98 var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word];
99 return known ? {type: known.type, style: known.style, content: word} :
100 {type: "variable", style: "js-variable", content: word};
101 }
102 function readRegexp() {
103 nextUntilUnescaped(source, "/");
104 source.nextWhileMatches(/[gimy]/); // 'y' is "sticky" option in Mozilla
105 return {type: "regexp", style: "js-string"};
106 }
107 // Mutli-line comments are tricky. We want to return the newlines
108 // embedded in them as regular newline tokens, and then continue
109 // returning a comment token for every line of the comment. So
110 // some state has to be saved (inside) to indicate whether we are
111 // inside a /* */ sequence.
112 function readMultilineComment(start){
113 var newInside = "/*";
114 var maybeEnd = (start == "*");
115 while (true) {
116 if (source.endOfLine())
117 break;
118 var next = source.next();
119 if (next == "/" && maybeEnd){
120 newInside = null;
121 break;
122 }
123 maybeEnd = (next == "*");
124 }
125 setInside(newInside);
126 return {type: "comment", style: "js-comment"};
127 }
128 function readOperator() {
129 source.nextWhileMatches(isOperatorChar);
130 return {type: "operator", style: "js-operator"};
131 }
132 function readString(quote) {
133 var endBackSlash = nextUntilUnescaped(source, quote);
134 setInside(endBackSlash ? quote : null);
135 return {type: "string", style: "js-string"};
136 }
137
138 // Fetch the next token. Dispatches on first character in the
139 // stream, or first two characters when the first is a slash.
140 if (inside == "\"" || inside == "'")
141 return readString(inside);
142 var ch = source.next();
143 if (inside == "/*")
144 return readMultilineComment(ch);
145 else if (ch == "\"" || ch == "'")
146 return readString(ch);
147 // with punctuation, the type of the token is the symbol itself
148 else if (/[\[\]{}\(\),;\:\.]/.test(ch))
149 return {type: ch, style: "js-punctuation"};
150 else if (ch == "0" && (source.equals("x") || source.equals("X")))
151 return readHexNumber();
152 else if (/[0-9]/.test(ch))
153 return readNumber();
154 else if (ch == "/"){
155 if (source.equals("*"))
156 { source.next(); return readMultilineComment(ch); }
157 else if (source.equals("/"))
158 { nextUntilUnescaped(source, null); return {type: "comment", style: "js-comment"};}
159 else if (regexp)
160 return readRegexp();
161 else
162 return readOperator();
163 }
164 else if (isOperatorChar.test(ch))
165 return readOperator();
166 else
167 return readWord();
168 }
169
170 // The external interface to the tokenizer.
171 return function(source, startState) {
172 return tokenizer(source, startState || jsTokenState(false, true));
173 };
174 })();