Mercurial Hosting > nabble
diff src/nabble/view/web/util/codemirror/js/tokenize.js @ 0:7ecd1a4ef557
add content
author | Franklin Schmidt <fschmidt@gmail.com> |
---|---|
date | Thu, 21 Mar 2019 19:15:52 -0600 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/nabble/view/web/util/codemirror/js/tokenize.js Thu Mar 21 19:15:52 2019 -0600 @@ -0,0 +1,57 @@ +// A framework for simple tokenizers. Takes care of newlines and +// white-space, and of getting the text from the source stream into +// the token object. A state is a function of two arguments -- a +// string stream and a setState function. The second can be used to +// change the tokenizer's state, and can be ignored for stateless +// tokenizers. This function should advance the stream over a token +// and return a string or object containing information about the next +// token, or null to pass and have the (new) state be called to finish +// the token. When a string is given, it is wrapped in a {style, type} +// object. In the resulting object, the characters consumed are stored +// under the content property. Any whitespace following them is also +// automatically consumed, and added to the value property. (Thus, +// content is the actual meaningful part of the token, while value +// contains all the text it spans.) + +function tokenizer(source, state) { + // Newlines are always a separate token. + function isWhiteSpace(ch) { + // The messy regexp is because IE's regexp matcher is of the + // opinion that non-breaking spaces are no whitespace. + return ch != "\n" && /^[\s\u00a0]*$/.test(ch); + } + + var tokenizer = { + state: state, + + take: function(type) { + if (typeof(type) == "string") + type = {style: type, type: type}; + + type.content = (type.content || "") + source.get(); + if (!/\n$/.test(type.content)) + source.nextWhile(isWhiteSpace); + type.value = type.content + source.get(); + return type; + }, + + next: function () { + if (!source.more()) throw StopIteration; + + var type; + if (source.equals("\n")) { + source.next(); + return this.take("whitespace"); + } + + if (source.applies(isWhiteSpace)) + type = "whitespace"; + else + while (!type) + type = this.state(source, function(s) {tokenizer.state = s;}); + + return this.take(type); + } + }; + return tokenizer; +}