Mercurial Hosting > nabble
comparison src/nabble/view/web/util/codemirror/js/tokenizejavascript.js @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7ecd1a4ef557 |
---|---|
1 /* Tokenizer for JavaScript code */ | |
2 | |
3 var tokenizeJavaScript = (function() { | |
4 // Advance the stream until the given character (not preceded by a | |
5 // backslash) is encountered, or the end of the line is reached. | |
6 function nextUntilUnescaped(source, end) { | |
7 var escaped = false; | |
8 while (!source.endOfLine()) { | |
9 var next = source.next(); | |
10 if (next == end && !escaped) | |
11 return false; | |
12 escaped = !escaped && next == "\\"; | |
13 } | |
14 return escaped; | |
15 } | |
16 | |
17 // A map of JavaScript's keywords. The a/b/c keyword distinction is | |
18 // very rough, but it gives the parser enough information to parse | |
19 // correct code correctly (we don't care that much how we parse | |
20 // incorrect code). The style information included in these objects | |
21 // is used by the highlighter to pick the correct CSS style for a | |
22 // token. | |
23 var keywords = function(){ | |
24 function result(type, style){ | |
25 return {type: type, style: "js-" + style}; | |
26 } | |
27 // keywords that take a parenthised expression, and then a | |
28 // statement (if) | |
29 var keywordA = result("keyword a", "keyword"); | |
30 // keywords that take just a statement (else) | |
31 var keywordB = result("keyword b", "keyword"); | |
32 // keywords that optionally take an expression, and form a | |
33 // statement (return) | |
34 var keywordC = result("keyword c", "keyword"); | |
35 var operator = result("operator", "keyword"); | |
36 var atom = result("atom", "atom"); | |
37 return { | |
38 "if": keywordA, "while": keywordA, "with": keywordA, | |
39 "else": keywordB, "do": keywordB, "try": keywordB, "finally": keywordB, | |
40 "return": keywordC, "break": keywordC, "continue": keywordC, "new": keywordC, "delete": keywordC, "throw": keywordC, | |
41 "in": operator, "typeof": operator, "instanceof": operator, | |
42 "var": result("var", "keyword"), "function": result("function", "keyword"), "catch": result("catch", "keyword"), | |
43 "for": result("for", "keyword"), "switch": result("switch", "keyword"), | |
44 "case": result("case", "keyword"), "default": result("default", "keyword"), | |
45 "true": atom, "false": atom, "null": atom, "undefined": atom, "NaN": atom, "Infinity": atom | |
46 }; | |
47 }(); | |
48 | |
49 // Some helper regexps | |
50 var isOperatorChar = /[+\-*&%=<>!?|]/; | |
51 var isHexDigit = /[0-9A-Fa-f]/; | |
52 var isWordChar = /[\w\$_]/; | |
53 | |
54 // Wrapper around jsToken that helps maintain parser state (whether | |
55 // we are inside of a multi-line comment and whether the next token | |
56 // could be a regular expression). | |
57 function jsTokenState(inside, regexp) { | |
58 return function(source, setState) { | |
59 var newInside = inside; | |
60 var type = jsToken(inside, regexp, source, function(c) {newInside = c;}); | |
61 var newRegexp = type.type == "operator" || type.type == "keyword c" || type.type.match(/^[\[{}\(,;:]$/); | |
62 if (newRegexp != regexp || newInside != inside) | |
63 setState(jsTokenState(newInside, newRegexp)); | |
64 return type; | |
65 }; | |
66 } | |
67 | |
68 // The token reader, intended to be used by the tokenizer from | |
69 // tokenize.js (through jsTokenState). Advances the source stream | |
70 // over a token, and returns an object containing the type and style | |
71 // of that token. | |
72 function jsToken(inside, regexp, source, setInside) { | |
73 function readHexNumber(){ | |
74 source.next(); // skip the 'x' | |
75 source.nextWhileMatches(isHexDigit); | |
76 return {type: "number", style: "js-atom"}; | |
77 } | |
78 | |
79 function readNumber() { | |
80 source.nextWhileMatches(/[0-9]/); | |
81 if (source.equals(".")){ | |
82 source.next(); | |
83 source.nextWhileMatches(/[0-9]/); | |
84 } | |
85 if (source.equals("e") || source.equals("E")){ | |
86 source.next(); | |
87 if (source.equals("-")) | |
88 source.next(); | |
89 source.nextWhileMatches(/[0-9]/); | |
90 } | |
91 return {type: "number", style: "js-atom"}; | |
92 } | |
93 // Read a word, look it up in keywords. If not found, it is a | |
94 // variable, otherwise it is a keyword of the type found. | |
95 function readWord() { | |
96 source.nextWhileMatches(isWordChar); | |
97 var word = source.get(); | |
98 var known = keywords.hasOwnProperty(word) && keywords.propertyIsEnumerable(word) && keywords[word]; | |
99 return known ? {type: known.type, style: known.style, content: word} : | |
100 {type: "variable", style: "js-variable", content: word}; | |
101 } | |
102 function readRegexp() { | |
103 nextUntilUnescaped(source, "/"); | |
104 source.nextWhileMatches(/[gimy]/); // 'y' is "sticky" option in Mozilla | |
105 return {type: "regexp", style: "js-string"}; | |
106 } | |
107 // Mutli-line comments are tricky. We want to return the newlines | |
108 // embedded in them as regular newline tokens, and then continue | |
109 // returning a comment token for every line of the comment. So | |
110 // some state has to be saved (inside) to indicate whether we are | |
111 // inside a /* */ sequence. | |
112 function readMultilineComment(start){ | |
113 var newInside = "/*"; | |
114 var maybeEnd = (start == "*"); | |
115 while (true) { | |
116 if (source.endOfLine()) | |
117 break; | |
118 var next = source.next(); | |
119 if (next == "/" && maybeEnd){ | |
120 newInside = null; | |
121 break; | |
122 } | |
123 maybeEnd = (next == "*"); | |
124 } | |
125 setInside(newInside); | |
126 return {type: "comment", style: "js-comment"}; | |
127 } | |
128 function readOperator() { | |
129 source.nextWhileMatches(isOperatorChar); | |
130 return {type: "operator", style: "js-operator"}; | |
131 } | |
132 function readString(quote) { | |
133 var endBackSlash = nextUntilUnescaped(source, quote); | |
134 setInside(endBackSlash ? quote : null); | |
135 return {type: "string", style: "js-string"}; | |
136 } | |
137 | |
138 // Fetch the next token. Dispatches on first character in the | |
139 // stream, or first two characters when the first is a slash. | |
140 if (inside == "\"" || inside == "'") | |
141 return readString(inside); | |
142 var ch = source.next(); | |
143 if (inside == "/*") | |
144 return readMultilineComment(ch); | |
145 else if (ch == "\"" || ch == "'") | |
146 return readString(ch); | |
147 // with punctuation, the type of the token is the symbol itself | |
148 else if (/[\[\]{}\(\),;\:\.]/.test(ch)) | |
149 return {type: ch, style: "js-punctuation"}; | |
150 else if (ch == "0" && (source.equals("x") || source.equals("X"))) | |
151 return readHexNumber(); | |
152 else if (/[0-9]/.test(ch)) | |
153 return readNumber(); | |
154 else if (ch == "/"){ | |
155 if (source.equals("*")) | |
156 { source.next(); return readMultilineComment(ch); } | |
157 else if (source.equals("/")) | |
158 { nextUntilUnescaped(source, null); return {type: "comment", style: "js-comment"};} | |
159 else if (regexp) | |
160 return readRegexp(); | |
161 else | |
162 return readOperator(); | |
163 } | |
164 else if (isOperatorChar.test(ch)) | |
165 return readOperator(); | |
166 else | |
167 return readWord(); | |
168 } | |
169 | |
170 // The external interface to the tokenizer. | |
171 return function(source, startState) { | |
172 return tokenizer(source, startState || jsTokenState(false, true)); | |
173 }; | |
174 })(); |