aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorMihai Bazon <mihai@bazon.net>2012-10-11 11:52:05 +0300
committerMihai Bazon <mihai@bazon.net>2012-10-11 11:52:05 +0300
commitf4584af42c8baa0a8f221adf1ff9a85765163e4b (patch)
tree94bc9505cebf8fcf4143715b733266976cb2069a /lib
parent172aa7a93ccd39feceefa058ad008e19eec0a073 (diff)
downloadtracifyjs-f4584af42c8baa0a8f221adf1ff9a85765163e4b.tar.gz
tracifyjs-f4584af42c8baa0a8f221adf1ff9a85765163e4b.zip
using makeComparator from acorn to generate functions that tests whether a
string is keyword, reserved etc. speeds up the parser a bit, though not spectacular.. still far from acorn.
Diffstat (limited to 'lib')
-rw-r--r--lib/parse.js123
-rw-r--r--lib/utils.js37
2 files changed, 63 insertions, 97 deletions
diff --git a/lib/parse.js b/lib/parse.js
index 65bde146..82fc2fd5 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -44,89 +44,24 @@
"use strict";
-var KEYWORDS = array_to_hash([
- "break",
- "case",
- "catch",
- "const",
- "continue",
- "debugger",
- "default",
- "delete",
- "do",
- "else",
- "finally",
- "for",
- "function",
- "if",
- "in",
- "instanceof",
- "new",
- "return",
- "switch",
- "throw",
- "try",
- "typeof",
- "var",
- "void",
- "while",
- "with"
-]);
+var KEYWORDS = 'break case catch const continue debugger default delete do else finally for function if in instanceof new return switch throw try typeof var void while with';
+var KEYWORDS_ATOM = 'false null true';
+var RESERVED_WORDS = 'abstract boolean byte char class double enum export extends final float goto implements import int interface long native package private protected public short static super synchronized this throws transient volatile'
+ + " " + KEYWORDS_ATOM + " " + KEYWORDS;
+var KEYWORDS_BEFORE_EXPRESSION = 'return new delete throw else case';
-var RESERVED_WORDS = array_to_hash([
- "abstract",
- "boolean",
- "byte",
- "char",
- "class",
- "double",
- "enum",
- "export",
- "extends",
- "final",
- "float",
- "goto",
- "implements",
- "import",
- "int",
- "interface",
- "long",
- "native",
- "package",
- "private",
- "protected",
- "public",
- "short",
- "static",
- "super",
- "synchronized",
- "throws",
- "transient",
- "volatile"
-]);
-
-var KEYWORDS_BEFORE_EXPRESSION = array_to_hash([
- "return",
- "new",
- "delete",
- "throw",
- "else",
- "case"
-]);
-
-var KEYWORDS_ATOM = array_to_hash([
- "false",
- "null",
- "true"
-]);
+KEYWORDS = makePredicate(KEYWORDS);
+RESERVED_WORDS = makePredicate(RESERVED_WORDS);
+KEYWORDS_BEFORE_EXPRESSION = makePredicate(KEYWORDS_BEFORE_EXPRESSION);
+KEYWORDS_ATOM = makePredicate(KEYWORDS_ATOM);
-var OPERATOR_CHARS = array_to_hash(characters("+-*&%=<>!?|~^"));
+var OPERATOR_CHARS = makePredicate(characters("+-*&%=<>!?|~^"));
var RE_HEX_NUMBER = /^0x[0-9a-f]+$/i;
var RE_OCT_NUMBER = /^0[0-7]+$/;
var RE_DEC_NUMBER = /^\d*\.?\d*(?:e[+-]?\d*(?:\d\.?|\.?\d)\d*)?$/i;
-var OPERATORS = array_to_hash([
+var OPERATORS = makePredicate([
"in",
"instanceof",
"typeof",
@@ -173,13 +108,13 @@ var OPERATORS = array_to_hash([
"||"
]);
-var WHITESPACE_CHARS = array_to_hash(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
+var WHITESPACE_CHARS = makePredicate(characters(" \u00a0\n\r\t\f\u000b\u200b\u180e\u2000\u2001\u2002\u2003\u2004\u2005\u2006\u2007\u2008\u2009\u200a\u202f\u205f\u3000"));
-var PUNC_BEFORE_EXPRESSION = array_to_hash(characters("[{(,.;:"));
+var PUNC_BEFORE_EXPRESSION = makePredicate(characters("[{(,.;:"));
-var PUNC_CHARS = array_to_hash(characters("[]{}(),;:"));
+var PUNC_CHARS = makePredicate(characters("[]{}(),;:"));
-var REGEXP_MODIFIERS = array_to_hash(characters("gmsiy"));
+var REGEXP_MODIFIERS = makePredicate(characters("gmsiy"));
/* -----[ Tokenizer ]----- */
@@ -214,10 +149,7 @@ function is_unicode_connector_punctuation(ch) {
function is_identifier(name) {
return /^[a-z_$][a-z0-9_$]*$/i.test(name)
- && name != "this"
- && !KEYWORDS_ATOM[name]
- && !RESERVED_WORDS[name]
- && !KEYWORDS[name];
+ && !RESERVED_WORDS(name)
};
function is_identifier_start(ch) {
@@ -318,8 +250,8 @@ function tokenizer($TEXT, filename) {
function token(type, value, is_comment) {
S.regex_allowed = ((type == "operator" && !UNARY_POSTFIX[value]) ||
- (type == "keyword" && KEYWORDS_BEFORE_EXPRESSION[value]) ||
- (type == "punc" && PUNC_BEFORE_EXPRESSION[value]));
+ (type == "keyword" && KEYWORDS_BEFORE_EXPRESSION(value)) ||
+ (type == "punc" && PUNC_BEFORE_EXPRESSION(value)));
var ret = {
type : type,
value : value,
@@ -343,7 +275,7 @@ function tokenizer($TEXT, filename) {
};
function skip_whitespace() {
- while (WHITESPACE_CHARS[peek()])
+ while (WHITESPACE_CHARS(peek()))
next();
};
@@ -493,7 +425,7 @@ function tokenizer($TEXT, filename) {
backslash = false;
}
}
- if (KEYWORDS[name] && escaped) {
+ if (KEYWORDS(name) && escaped) {
hex = name.charCodeAt(0).toString(16).toUpperCase();
name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1);
}
@@ -528,7 +460,7 @@ function tokenizer($TEXT, filename) {
function grow(op) {
if (!peek()) return op;
var bigger = op + peek();
- if (OPERATORS[bigger]) {
+ if (OPERATORS(bigger)) {
next();
return grow(bigger);
} else {
@@ -563,12 +495,9 @@ function tokenizer($TEXT, filename) {
function read_word() {
var word = read_name();
- return KEYWORDS_ATOM[word]
- ? token("atom", word)
- : !KEYWORDS[word]
- ? token("name", word)
- : OPERATORS[word]
- ? token("operator", word)
+ return KEYWORDS_ATOM(word) ? token("atom", word)
+ : !KEYWORDS(word) ? token("name", word)
+ : OPERATORS(word) ? token("operator", word)
: token("keyword", word);
};
@@ -590,10 +519,10 @@ function tokenizer($TEXT, filename) {
if (!ch) return token("eof");
if (is_digit(ch)) return read_num();
if (ch == '"' || ch == "'") return read_string();
- if (PUNC_CHARS[ch]) return token("punc", next());
+ if (PUNC_CHARS(ch)) return token("punc", next());
if (ch == ".") return handle_dot();
if (ch == "/") return handle_slash();
- if (OPERATOR_CHARS[ch]) return read_operator();
+ if (OPERATOR_CHARS(ch)) return read_operator();
if (ch == "\\" || is_identifier_start(ch)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};
diff --git a/lib/utils.js b/lib/utils.js
index 33dc8ff2..d18e62ee 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -214,3 +214,40 @@ function set_intersection(a, b) {
return b.indexOf(el) >= 0;
});
};
+
+// this function is taken from Acorn [1], written by Marijn Haverbeke
+// [1] https://github.com/marijnh/acorn
+function makePredicate(words) {
+ if (!(words instanceof Array)) words = words.split(" ");
+ var f = "", cats = [];
+ out: for (var i = 0; i < words.length; ++i) {
+ for (var j = 0; j < cats.length; ++j)
+ if (cats[j][0].length == words[i].length) {
+ cats[j].push(words[i]);
+ continue out;
+ }
+ cats.push([words[i]]);
+ }
+ function compareTo(arr) {
+ if (arr.length == 1) return f += "return str === " + JSON.stringify(arr[0]) + ";";
+ f += "switch(str){";
+ for (var i = 0; i < arr.length; ++i) f += "case " + JSON.stringify(arr[i]) + ":";
+ f += "return true}return false;";
+ }
+ // When there are more than three length categories, an outer
+ // switch first dispatches on the lengths, to save on comparisons.
+ if (cats.length > 3) {
+ cats.sort(function(a, b) {return b.length - a.length;});
+ f += "switch(str.length){";
+ for (var i = 0; i < cats.length; ++i) {
+ var cat = cats[i];
+ f += "case " + cat[0].length + ":";
+ compareTo(cat);
+ }
+ f += "}";
+ // Otherwise, simply generate a flat `switch` statement.
+ } else {
+ compareTo(words);
+ }
+ return new Function("str", f);
+};