annotate src/nabble/view/web/util/codemirror/js/tokenize.js @ 0:7ecd1a4ef557

add content
author Franklin Schmidt <fschmidt@gmail.com>
date Thu, 21 Mar 2019 19:15:52 -0600
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
1 // A framework for simple tokenizers. Takes care of newlines and
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
2 // white-space, and of getting the text from the source stream into
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
3 // the token object. A state is a function of two arguments -- a
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
4 // string stream and a setState function. The second can be used to
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
5 // change the tokenizer's state, and can be ignored for stateless
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
6 // tokenizers. This function should advance the stream over a token
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
7 // and return a string or object containing information about the next
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
8 // token, or null to pass and have the (new) state be called to finish
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
9 // the token. When a string is given, it is wrapped in a {style, type}
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
10 // object. In the resulting object, the characters consumed are stored
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
11 // under the content property. Any whitespace following them is also
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
12 // automatically consumed, and added to the value property. (Thus,
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
13 // content is the actual meaningful part of the token, while value
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
14 // contains all the text it spans.)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
15
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
16 function tokenizer(source, state) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
17 // Newlines are always a separate token.
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
18 function isWhiteSpace(ch) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
19 // The messy regexp is because IE's regexp matcher is of the
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
20 // opinion that non-breaking spaces are no whitespace.
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
21 return ch != "\n" && /^[\s\u00a0]*$/.test(ch);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
22 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
23
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
24 var tokenizer = {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
25 state: state,
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
26
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
27 take: function(type) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
28 if (typeof(type) == "string")
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
29 type = {style: type, type: type};
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
30
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
31 type.content = (type.content || "") + source.get();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
32 if (!/\n$/.test(type.content))
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
33 source.nextWhile(isWhiteSpace);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
34 type.value = type.content + source.get();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
35 return type;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
36 },
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
37
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
38 next: function () {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
39 if (!source.more()) throw StopIteration;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
40
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
41 var type;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
42 if (source.equals("\n")) {
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
43 source.next();
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
44 return this.take("whitespace");
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
45 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
46
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
47 if (source.applies(isWhiteSpace))
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
48 type = "whitespace";
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
49 else
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
50 while (!type)
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
51 type = this.state(source, function(s) {tokenizer.state = s;});
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
52
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
53 return this.take(type);
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
54 }
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
55 };
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
56 return tokenizer;
7ecd1a4ef557 add content
Franklin Schmidt <fschmidt@gmail.com>
parents:
diff changeset
57 }