From c0ba9e29861ab08ad8a5131705f421e96d634f4b Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Wed, 15 Aug 2012 13:32:37 +0300 Subject: WIP --- lib/test.js | 345 +----------------------------------------------------------- 1 file changed, 2 insertions(+), 343 deletions(-) (limited to 'lib/test.js') diff --git a/lib/test.js b/lib/test.js index b765132f..7972d346 100644 --- a/lib/test.js +++ b/lib/test.js @@ -1,346 +1,5 @@ -var func = function tokenizer($TEXT) { - - var S = { - text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''), - pos : 0, - tokpos : 0, - line : 0, - tokline : 0, - col : 0, - tokcol : 0, - newline_before : false, - regex_allowed : false, - comments_before : [] - }; - - function peek() { return S.text.charAt(S.pos); }; - - function next(signal_eof, in_string) { - var ch = S.text.charAt(S.pos++); - if (signal_eof && !ch) - throw EX_EOF; - if (ch == "\n") { - S.newline_before = S.newline_before || !in_string; - ++S.line; - S.col = 0; - } else { - ++S.col; - } - return ch; - }; - - function eof() { - return !S.peek(); - }; - - function find(what, signal_eof) { - var pos = S.text.indexOf(what, S.pos); - if (signal_eof && pos == -1) throw EX_EOF; - return pos; - }; - - function start_token() { - S.tokline = S.line; - S.tokcol = S.col; - S.tokpos = S.pos; - }; - - function token(type, value, is_comment) { - S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) || - (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || - (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); - var ret = { - type : type, - value : value, - line : S.tokline, - col : S.tokcol, - pos : S.tokpos, - endpos : S.pos, - nlb : S.newline_before - }; - if (!is_comment) { - ret.comments_before = S.comments_before; - S.comments_before = []; - // make note of any newlines in the comments that came before - for (var i = 0, len = ret.comments_before.length; i < len; i++) { - ret.nlb = ret.nlb || ret.comments_before[i].nlb; - } - } - S.newline_before = false; - return new AST_Token(ret); - }; - - function skip_whitespace() { - while (HOP(WHITESPACE_CHARS, peek())) - next(); - }; - - function read_while(pred) { - var ret = "", ch = peek(), i = 0; - while (ch && pred(ch, i++)) { - ret += next(); - ch = peek(); - } - return ret; - }; - - function parse_error(err) { - js_error(err, S.tokline, S.tokcol, S.tokpos); - }; - - function read_num(prefix) { - var has_e = false, after_e = false, has_x = false, has_dot = prefix == "."; - var num = read_while(function(ch, i){ - if (ch == "x" || ch == "X") { - if (has_x) return false; - return has_x = true; - } - if (!has_x && (ch == "E" || ch == "e")) { - if (has_e) return false; - return has_e = after_e = true; - } - if (ch == "-") { - if (after_e || (i == 0 && !prefix)) return true; - return false; - } - if (ch == "+") return after_e; - after_e = false; - if (ch == ".") { - if (!has_dot && !has_x && !has_e) - return has_dot = true; - return false; - } - return is_alphanumeric_char(ch); - }); - if (prefix) - num = prefix + num; - var valid = parse_js_number(num); - if (!isNaN(valid)) { - return token("num", valid); - } else { - parse_error("Invalid syntax: " + num); - } - }; - - function read_escaped_char(in_string) { - var ch = next(true, in_string); - switch (ch) { - case "n" : return "\n"; - case "r" : return "\r"; - case "t" : return "\t"; - case "b" : return "\b"; - case "v" : return "\u000b"; - case "f" : return "\f"; - case "0" : return "\0"; - case "x" : return String.fromCharCode(hex_bytes(2)); - case "u" : return String.fromCharCode(hex_bytes(4)); - case "\n": return ""; - default : return ch; - } - }; - - function hex_bytes(n) { - var num = 0; - for (; n > 0; --n) { - var digit = parseInt(next(true), 16); - if (isNaN(digit)) - parse_error("Invalid hex-character pattern in string"); - num = (num << 4) | digit; - } - return num; - }; - - function read_string() { - return with_eof_error("Unterminated string constant", function(){ - var quote = next(), ret = ""; - for (;;) { - var ch = next(true); - if (ch == "\\") { - // read OctalEscapeSequence (XXX: deprecated if "strict mode") - // https://github.com/mishoo/UglifyJS/issues/178 - var octal_len = 0, first = null; - ch = read_while(function(ch){ - if (ch >= "0" && ch <= "7") { - if (!first) { - first = ch; - return ++octal_len; - } - else if (first <= "3" && octal_len <= 2) return ++octal_len; - else if (first >= "4" && octal_len <= 1) return ++octal_len; - } - return false; - }); - if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8)); - else ch = read_escaped_char(true); - } - else if (ch == quote) break; - ret += ch; - } - return token("string", ret); - }); - }; - - function read_line_comment() { - next(); - var i = find("\n"), ret; - if (i == -1) { - ret = S.text.substr(S.pos); - S.pos = S.text.length; - } else { - ret = S.text.substring(S.pos, i); - S.pos = i; - } - return token("comment1", ret, true); - }; - - function read_multiline_comment() { - next(); - return with_eof_error("Unterminated multiline comment", function(){ - var i = find("*/", true), - text = S.text.substring(S.pos, i); - S.pos = i + 2; - S.line += text.split("\n").length - 1; - S.newline_before = S.newline_before || text.indexOf("\n") >= 0; - - // https://github.com/mishoo/UglifyJS/issues/#issue/100 - if (/^@cc_on/i.test(text)) { - warn("WARNING: at line " + S.line); - warn("*** Found \"conditional comment\": " + text); - warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer."); - } - - return token("comment2", text, true); - }); - }; - - function read_name() { - var backslash = false, name = "", ch, escaped = false, hex; - while ((ch = peek()) != null) { - if (!backslash) { - if (ch == "\\") escaped = backslash = true, next(); - else if (is_identifier_char(ch)) name += next(); - else break; - } - else { - if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX"); - ch = read_escaped_char(); - if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier"); - name += ch; - backslash = false; - } - } - if (HOP(KEYWORDS, name) && escaped) { - hex = name.charCodeAt(0).toString(16).toUpperCase(); - name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1); - } - return name; - }; - - function read_regexp(regexp) { - return with_eof_error("Unterminated regular expression", function(){ - var prev_backslash = false, ch, in_class = false; - while ((ch = next(true))) if (prev_backslash) { - regexp += "\\" + ch; - prev_backslash = false; - } else if (ch == "[") { - in_class = true; - regexp += ch; - } else if (ch == "]" && in_class) { - in_class = false; - regexp += ch; - } else if (ch == "/" && !in_class) { - break; - } else if (ch == "\\") { - prev_backslash = true; - } else { - regexp += ch; - } - var mods = read_name(); - return token("regexp", [ regexp, mods ]); - }); - }; - - function read_operator(prefix) { - function grow(op) { - if (!peek()) return op; - var bigger = op + peek(); - if (HOP(OPERATORS, bigger)) { - next(); - return grow(bigger); - } else { - return op; - } - }; - return token("operator", grow(prefix || next())); - }; - - function handle_slash() { - next(); - var regex_allowed = S.regex_allowed; - switch (peek()) { - case "/": - S.comments_before.push(read_line_comment()); - S.regex_allowed = regex_allowed; - return next_token(); - case "*": - S.comments_before.push(read_multiline_comment()); - S.regex_allowed = regex_allowed; - return next_token(); - } - return S.regex_allowed ? read_regexp("") : read_operator("/"); - }; - - function handle_dot() { - next(); - return is_digit(peek()) - ? read_num(".") - : token("punc", "."); - }; - - function read_word() { - var word = read_name(); - return HOP(KEYWORDS_ATOM, word) - ? token("atom", word) - : !HOP(KEYWORDS, word) - ? token("name", word) - : HOP(OPERATORS, word) - ? token("operator", word) - : token("keyword", word); - }; - - function with_eof_error(eof_error, cont) { - try { - return cont(); - } catch(ex) { - if (ex === EX_EOF) parse_error(eof_error); - else throw ex; - } - }; - - function next_token(force_regexp) { - if (force_regexp != null) - return read_regexp(force_regexp); - skip_whitespace(); - start_token(); - var ch = peek(); - if (!ch) return token("eof"); - if (is_digit(ch)) return read_num(); - if (ch == '"' || ch == "'") return read_string(); - if (HOP(PUNC_CHARS, ch)) return token("punc", next()); - if (ch == ".") return handle_dot(); - if (ch == "/") return handle_slash(); - if (HOP(OPERATOR_CHARS, ch)) return read_operator(); - if (ch == "\\" || is_identifier_start(ch)) return read_word(); - parse_error("Unexpected character '" + ch + "'"); - }; - - next_token.context = function(nc) { - if (nc) S = nc; - return S; - }; - - return next_token; - +var func = function TEST () { + }; console.time("parse"); -- cgit v1.2.3