diff options
author | Mihai Bazon <mihai@bazon.net> | 2012-10-11 11:52:05 +0300 |
---|---|---|
committer | Mihai Bazon <mihai@bazon.net> | 2012-10-11 11:52:05 +0300 |
commit | f4584af42c8baa0a8f221adf1ff9a85765163e4b (patch) | |
tree | 94bc9505cebf8fcf4143715b733266976cb2069a /lib | |
parent | 172aa7a93ccd39feceefa058ad008e19eec0a073 (diff) | |
download | tracifyjs-f4584af42c8baa0a8f221adf1ff9a85765163e4b.tar.gz tracifyjs-f4584af42c8baa0a8f221adf1ff9a85765163e4b.zip |
using makeComparator from acorn to generate functions that tests whether a
string is keyword, reserved etc.
speeds up the parser a bit, though not spectacular.. still far from acorn.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/parse.js | 123 | ||||
-rw-r--r-- | lib/utils.js | 37 |
2 files changed, 63 insertions, 97 deletions
diff --git a/lib/parse.js b/lib/parse.js index 65bde146..82fc2fd5 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -44,89 +44,24 @@ "use strict"; -var KEYWORDS = array_to_hash([ - "break", - "case", - "catch", - "const", - "continue", - "debugger", - "default", - "delete", - "do", - "else", - "finally", - "for", - "function", - "if", - "in", - "instanceof", - "new", - "return", - "switch", - "throw", - "try", - "typeof", - "var", - "void", - "while", - "with" -]); +var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with'; +var KEYWORDS_ATOM = 'false null true'; +var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile' + + " " + KEYWORDS_ATOM + " " + KEYWORDS; +var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case'; -var RESERVED_WORDS = array_to_hash([ - "abstract", - "boolean", - "byte", - "char", - "class", - "double", - "enum", - "export", - "extends", - "final", - "float", - "goto", - "implements", - "import", - "int", - "interface", - "long", - "native", - "package", - "private", - "protected", - "public", - "short", - "static", - "super", - "synchronized", - "throws", - "transient", - "volatile" -]); - -var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([ - "return", - "new", - "delete", - "throw", - "else", - "case" -]); - -var KEYWORDS_ATOM = array_to_hash([ - "false", - "null", - "true" -]); +KEYWORDS = makePredicate(KEYWORDS); +RESERVED_WORDS = makePredicate(RESERVED_WORDS); +KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION); +KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM); -var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^")); +var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^")); var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i; var RE_OCT_NUMBER = /^0[0-7]+$/; var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i; -var OPERATORS = array_to_hash([ +var OPERATORS = makePredicate([ "in", "instanceof", "typeof", @@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([ "||" ]); -var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000")); +var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000")); -var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:")); +var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:")); -var PUNC_CHARS = array_to_hash(characters("[]{}(),;:")); +var PUNC_CHARS = makePredicate(characters("[]{}(),;:")); -var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy")); +var REGEXP_MODIFIERS = makePredicate(characters("gmsiy")); /* -----[ Tokenizer ]----- */ @@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) { function is_identifier(name) { return /^[a-z_$][a-z0-9_$]*$/i.test(name) - && name != "this" - && !KEYWORDS_ATOM[name] - && !RESERVED_WORDS[name] - && !KEYWORDS[name]; + && !RESERVED_WORDS(name) }; function is_identifier_start(ch) { @@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) { function token(type, value, is_comment) { S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) || - (type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) || - (type == "punc" && PUNC_BEFORE_EXPRESSION[value])); + (type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) || + (type == "punc" && PUNC_BEFORE_EXPRESSION(value))); var ret = { type : type, value : value, @@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) { }; function skip_whitespace() { - while (WHITESPACE_CHARS[peek()]) + while (WHITESPACE_CHARS(peek())) next(); }; @@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) { backslash = false; } } - if (KEYWORDS[name] && escaped) { + if (KEYWORDS(name) && escaped) { hex = name.charCodeAt(0).toString(16).toUpperCase(); name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1); } @@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) { function grow(op) { if (!peek()) return op; var bigger = op + peek(); - if (OPERATORS[bigger]) { + if (OPERATORS(bigger)) { next(); return grow(bigger); } else { @@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) { function read_word() { var word = read_name(); - return KEYWORDS_ATOM[word] - ? token("atom", word) - : !KEYWORDS[word] - ? token("name", word) - : OPERATORS[word] - ? token("operator", word) + return KEYWORDS_ATOM(word) ? token("atom", word) + : !KEYWORDS(word) ? token("name", word) + : OPERATORS(word) ? token("operator", word) : token("keyword", word); }; @@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) { if (!ch) return token("eof"); if (is_digit(ch)) return read_num(); if (ch == '"' || ch == "'") return read_string(); - if (PUNC_CHARS[ch]) return token("punc", next()); + if (PUNC_CHARS(ch)) return token("punc", next()); if (ch == ".") return handle_dot(); if (ch == "/") return handle_slash(); - if (OPERATOR_CHARS[ch]) return read_operator(); + if (OPERATOR_CHARS(ch)) return read_operator(); if (ch == "\\" || is_identifier_start(ch)) return read_word(); parse_error("Unexpected character '" + ch + "'"); }; diff --git a/lib/utils.js b/lib/utils.js index 33dc8ff2..d18e62ee 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -214,3 +214,40 @@ function set_intersection(a, b) { return b.indexOf(el) >= 0; }); }; + +// this function is taken from Acorn [1], written by Marijn Haverbeke +// [1] https://github.com/marijnh/acorn +function makePredicate(words) { + if (!(words instanceof Array)) words = words.split(" "); + var f = "", cats = []; + out: for (var i = 0; i < words.length; ++i) { + for (var j = 0; j < cats.length; ++j) + if (cats[j][0].length == words[i].length) { + cats[j].push(words[i]); + continue out; + } + cats.push([words[i]]); + } + function compareTo(arr) { + if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";"; + f += "switch(str){"; + for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":"; + f += "return true}return false;"; + } + // When there are more than three length categories, an outer + // switch first dispatches on the lengths, to save on comparisons. + if (cats.length > 3) { + cats.sort(function(a, b) {return b.length - a.length;}); + f += "switch(str.length){"; + for (var i = 0; i < cats.length; ++i) { + var cat = cats[i]; + f += "case " + cat[0].length + ":"; + compareTo(cat); + } + f += "}"; + // Otherwise, simply generate a flat `switch` statement. + } else { + compareTo(words); + } + return new Function("str", f); +}; |