aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/parse.js331
-rw-r--r--lib/utils.js13
2 files changed, 158 insertions, 186 deletions
diff --git a/lib/parse.js b/lib/parse.js
index 82fc2fd5..074e118a 100644
--- a/lib/parse.js
+++ b/lib/parse.js
@@ -126,17 +126,18 @@ var UNICODE = {
connector_punctuation: new RegExp("[\\u005F\\u203F\\u2040\\u2054\\uFE33\\uFE34\\uFE4D-\\uFE4F\\uFF3F]")
};
-function is_letter(ch) {
- return UNICODE.letter.test(ch);
+function is_letter(code) {
+ return (code >= 97 && code <= 122)
+ || (code >= 65 && code <= 90)
+ || (code >= 0xaa && UNICODE.letter.test(String.fromCharCode(code)));
};
-function is_digit(ch) {
- ch = ch.charCodeAt(0);
- return ch >= 48 && ch <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
+function is_digit(code) {
+ return code >= 48 && code <= 57; //XXX: find out if "UnicodeDigit" means something else than 0..9
};
-function is_alphanumeric_char(ch) {
- return is_digit(ch) || is_letter(ch);
+function is_alphanumeric_char(code) {
+ return is_digit(code) || is_letter(code);
};
function is_unicode_combining_mark(ch) {
@@ -148,21 +149,21 @@ function is_unicode_connector_punctuation(ch) {
};
function is_identifier(name) {
- return /^[a-z_$][a-z0-9_$]*$/i.test(name)
- && !RESERVED_WORDS(name)
+ return /^[a-z_$][a-z0-9_$]*$/i.test(name) && !RESERVED_WORDS(name);
};
-function is_identifier_start(ch) {
- return ch == "$" || ch == "_" || is_letter(ch);
+function is_identifier_start(code) {
+ return code == 36 || code == 95 || is_letter(code);
};
function is_identifier_char(ch) {
- return is_identifier_start(ch)
+ var code = ch.charCodeAt(0);
+ return is_identifier_start(code)
+ || is_digit(code)
+ || code == 8204 // \u200c: zero-width non-joiner <ZWNJ>
+ || code == 8205 // \u200d: zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
|| is_unicode_combining_mark(ch)
- || is_digit(ch)
|| is_unicode_connector_punctuation(ch)
- || ch == "\u200c" // zero-width non-joiner <ZWNJ>
- || ch == "\u200d" // zero-width joiner <ZWJ> (in my ECMA-262 PDF, this is also 200c)
;
};
@@ -280,11 +281,9 @@ function tokenizer($TEXT, filename) {
};
function read_while(pred) {
- var ret = "", ch = peek(), i = 0;
- while (ch && pred(ch, i++)) {
+ var ret = "", ch, i = 0;
+ while ((ch = peek()) && pred(ch, i++))
ret += next();
- ch = peek();
- }
return ret;
};
@@ -295,29 +294,22 @@ function tokenizer($TEXT, filename) {
function read_num(prefix) {
var has_e = false, after_e = false, has_x = false, has_dot = prefix == ".";
var num = read_while(function(ch, i){
- if (ch == "x" || ch == "X") {
- if (has_x) return false;
- return has_x = true;
- }
- if (!has_x && (ch == "E" || ch == "e")) {
- if (has_e) return false;
- return has_e = after_e = true;
- }
- if (ch == "-") {
- if (after_e || (i == 0 && !prefix)) return true;
- return false;
+ var code = ch.charCodeAt(0);
+ switch (code) {
+ case 120: case 88: // xX
+ return has_x ? false : (has_x = true);
+ case 101: case 69: // eE
+ return has_x ? true : has_e ? false : (has_e = after_e = true);
+ case 45: // -
+ return after_e || (i == 0 && !prefix);
+ case 43: // +
+ return after_e;
+ case (after_e = false, 46): // .
+ return (!has_dot && !has_x && !has_e) ? (has_dot = true) : false;
}
- if (ch == "+") return after_e;
- after_e = false;
- if (ch == ".") {
- if (!has_dot && !has_x && !has_e)
- return has_dot = true;
- return false;
- }
- return is_alphanumeric_char(ch);
+ return is_alphanumeric_char(code);
});
- if (prefix)
- num = prefix + num;
+ if (prefix) num = prefix + num;
var valid = parse_js_number(num);
if (!isNaN(valid)) {
return token("num", valid);
@@ -328,17 +320,17 @@ function tokenizer($TEXT, filename) {
function read_escaped_char(in_string) {
var ch = next(true, in_string);
- switch (ch) {
- case "n" : return "\n";
- case "r" : return "\r";
- case "t" : return "\t";
- case "b" : return "\b";
- case "v" : return "\u000b";
- case "f" : return "\f";
- case "0" : return "\0";
- case "x" : return String.fromCharCode(hex_bytes(2));
- case "u" : return String.fromCharCode(hex_bytes(4));
- case "\n": return "";
+ switch (ch.charCodeAt(0)) {
+ case 110 : return "\n";
+ case 114 : return "\r";
+ case 116 : return "\t";
+ case 98 : return "\b";
+ case 118 : return "\u000b"; // \v
+ case 102 : return "\f";
+ case 48 : return "\0";
+ case 120 : return String.fromCharCode(hex_bytes(2)); // \x
+ case 117 : return String.fromCharCode(hex_bytes(4)); // \u
+ case 10 : return ""; // newline
default : return ch;
}
};
@@ -354,35 +346,33 @@ function tokenizer($TEXT, filename) {
return num;
};
- function read_string() {
- return with_eof_error("Unterminated string constant", function(){
- var quote = next(), ret = "";
- for (;;) {
- var ch = next(true);
- if (ch == "\\") {
- // read OctalEscapeSequence (XXX: deprecated if "strict mode")
- // https://github.com/mishoo/UglifyJS/issues/178
- var octal_len = 0, first = null;
- ch = read_while(function(ch){
- if (ch >= "0" && ch <= "7") {
- if (!first) {
- first = ch;
- return ++octal_len;
- }
- else if (first <= "3" && octal_len <= 2) return ++octal_len;
- else if (first >= "4" && octal_len <= 1) return ++octal_len;
+ var read_string = with_eof_error("Unterminated string constant", function(){
+ var quote = next(), ret = "";
+ for (;;) {
+ var ch = next(true);
+ if (ch == "\\") {
+ // read OctalEscapeSequence (XXX: deprecated if "strict mode")
+ // https://github.com/mishoo/UglifyJS/issues/178
+ var octal_len = 0, first = null;
+ ch = read_while(function(ch){
+ if (ch >= "0" && ch <= "7") {
+ if (!first) {
+ first = ch;
+ return ++octal_len;
}
- return false;
- });
- if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
- else ch = read_escaped_char(true);
- }
- else if (ch == quote) break;
- ret += ch;
+ else if (first <= "3" && octal_len <= 2) return ++octal_len;
+ else if (first >= "4" && octal_len <= 1) return ++octal_len;
+ }
+ return false;
+ });
+ if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8));
+ else ch = read_escaped_char(true);
}
- return token("string", ret);
- });
- };
+ else if (ch == quote) break;
+ ret += ch;
+ }
+ return token("string", ret);
+ });
function read_line_comment() {
next();
@@ -397,17 +387,20 @@ function tokenizer($TEXT, filename) {
return token("comment1", ret, true);
};
- function read_multiline_comment() {
+ var read_multiline_comment = with_eof_error("Unterminated multiline comment", function(){
next();
- return with_eof_error("Unterminated multiline comment", function(){
- var i = find("*/", true),
- text = S.text.substring(S.pos, i);
- S.pos = i + 2;
- S.line += text.split("\n").length - 1;
- S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
- return token("comment2", text, true);
- });
- };
+ var i = find("*/", true);
+ var text = S.text.substring(S.pos, i);
+ var a = text.split("\n"), n = a.length;
+ // update stream position
+ S.pos = i + 2;
+ S.line += n - 1;
+ if (n > 1) S.col = a[n - 1].length;
+ else S.col += a[n - 1].length;
+ S.col += 2;
+ S.newline_before = S.newline_before || text.indexOf("\n") >= 0;
+ return token("comment2", text, true);
+ });
function read_name() {
var backslash = false, name = "", ch, escaped = false, hex;
@@ -432,29 +425,27 @@ function tokenizer($TEXT, filename) {
return name;
};
- function read_regexp(regexp) {
- return with_eof_error("Unterminated regular expression", function(){
- var prev_backslash = false, ch, in_class = false;
- while ((ch = next(true))) if (prev_backslash) {
- regexp += "\\" + ch;
- prev_backslash = false;
- } else if (ch == "[") {
- in_class = true;
- regexp += ch;
- } else if (ch == "]" && in_class) {
- in_class = false;
- regexp += ch;
- } else if (ch == "/" && !in_class) {
- break;
- } else if (ch == "\\") {
- prev_backslash = true;
- } else {
- regexp += ch;
- }
- var mods = read_name();
- return token("regexp", new RegExp(regexp, mods));
- });
- };
+ var read_regexp = with_eof_error("Unterminated regular expression", function(regexp){
+ var prev_backslash = false, ch, in_class = false;
+ while ((ch = next(true))) if (prev_backslash) {
+ regexp += "\\" + ch;
+ prev_backslash = false;
+ } else if (ch == "[") {
+ in_class = true;
+ regexp += ch;
+ } else if (ch == "]" && in_class) {
+ in_class = false;
+ regexp += ch;
+ } else if (ch == "/" && !in_class) {
+ break;
+ } else if (ch == "\\") {
+ prev_backslash = true;
+ } else {
+ regexp += ch;
+ }
+ var mods = read_name();
+ return token("regexp", new RegExp(regexp, mods));
+ });
function read_operator(prefix) {
function grow(op) {
@@ -488,7 +479,7 @@ function tokenizer($TEXT, filename) {
function handle_dot() {
next();
- return is_digit(peek())
+ return is_digit(peek().charCodeAt(0))
? read_num(".")
: token("punc", ".");
};
@@ -502,12 +493,14 @@ function tokenizer($TEXT, filename) {
};
function with_eof_error(eof_error, cont) {
- try {
- return cont();
- } catch(ex) {
- if (ex === EX_EOF) parse_error(eof_error);
- else throw ex;
- }
+ return function(x) {
+ try {
+ return cont(x);
+ } catch(ex) {
+ if (ex === EX_EOF) parse_error(eof_error);
+ else throw ex;
+ }
+ };
};
function next_token(force_regexp) {
@@ -517,13 +510,16 @@ function tokenizer($TEXT, filename) {
start_token();
var ch = peek();
if (!ch) return token("eof");
- if (is_digit(ch)) return read_num();
- if (ch == '"' || ch == "'") return read_string();
+ var code = ch.charCodeAt(0);
+ switch (code) {
+ case 34: case 39: return read_string();
+ case 46: return handle_dot();
+ case 47: return handle_slash();
+ }
+ if (is_digit(code)) return read_num();
if (PUNC_CHARS(ch)) return token("punc", next());
- if (ch == ".") return handle_dot();
- if (ch == "/") return handle_slash();
if (OPERATOR_CHARS(ch)) return read_operator();
- if (ch == "\\" || is_identifier_start(ch)) return read_word();
+ if (code == 92 || is_identifier_start(code)) return read_word();
parse_error("Unexpected character '" + ch + "'");
};
@@ -538,7 +534,7 @@ function tokenizer($TEXT, filename) {
/* -----[ Parser (constants) ]----- */
-var UNARY_PREFIX = array_to_hash([
+var UNARY_PREFIX = makePredicate([
"typeof",
"void",
"delete",
@@ -550,19 +546,9 @@ var UNARY_PREFIX = array_to_hash([
"+"
]);
-var UNARY_POSTFIX = array_to_hash([ "--", "++" ]);
+var UNARY_POSTFIX = makePredicate([ "--", "++" ]);
-var ASSIGNMENT = (function(a, ret, i){
- while (i < a.length) {
- ret[a[i]] = a[i];
- i++;
- }
- return ret;
-})(
- [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ],
- {},
- 0
-);
+var ASSIGNMENT = makePredicate([ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ]);
var PRECEDENCE = (function(a, ret){
for (var i = 0, n = 1; i < a.length; ++i, ++n) {
@@ -680,7 +666,7 @@ function parse($TEXT, options) {
function parenthesised() {
expect("(");
- var exp = expression();
+ var exp = expression(true);
expect(")");
return exp;
};
@@ -688,7 +674,7 @@ function parse($TEXT, options) {
function embed_tokens(parser) {
return function() {
var start = S.token;
- var expr = parser.apply(this, arguments);
+ var expr = parser();
var end = prev();
expr.start = start;
expr.end = end;
@@ -697,6 +683,7 @@ function parse($TEXT, options) {
};
var statement = embed_tokens(function() {
+ var tmp;
if (is("operator", "/") || is("operator", "/=")) {
S.peeked = null;
S.token = S.input(S.token.value.substr(1)); // force regexp
@@ -738,7 +725,7 @@ function parse($TEXT, options) {
}
case "keyword":
- switch (prog1(S.token.value, next)) {
+ switch (tmp = S.token.value, next(), tmp) {
case "break":
return break_cont(AST_Break);
@@ -752,7 +739,7 @@ function parse($TEXT, options) {
case "do":
return new AST_Do({
body : in_loop(statement),
- condition : (expect_token("keyword", "while"), prog1(parenthesised, semicolon))
+ condition : (expect_token("keyword", "while"), tmp = parenthesised(), semicolon(), tmp)
});
case "while":
@@ -778,30 +765,30 @@ function parse($TEXT, options) {
? (next(), null)
: can_insert_semicolon()
? null
- : prog1(expression, semicolon) )
+ : (tmp = expression(true), semicolon(), tmp) )
});
case "switch":
return new AST_Switch({
expression : parenthesised(),
- body : switch_body_()
+ body : in_loop(switch_body_)
});
case "throw":
if (S.token.nlb)
croak("Illegal newline after 'throw'");
return new AST_Throw({
- value: prog1(expression, semicolon)
+ value: (tmp = expression(true), semicolon(), tmp)
});
case "try":
return try_();
case "var":
- return prog1(var_, semicolon);
+ return tmp = var_(), semicolon(), tmp;
case "const":
- return prog1(const_, semicolon);
+ return tmp = const_(), semicolon(), tmp;
case "with":
return new AST_With({
@@ -831,8 +818,8 @@ function parse($TEXT, options) {
return new AST_LabeledStatement({ body: stat, label: label });
};
- function simple_statement() {
- return new AST_SimpleStatement({ body: prog1(expression, semicolon) });
+ function simple_statement(tmp) {
+ return new AST_SimpleStatement({ body: (tmp = expression(true), semicolon(), tmp) });
};
function break_cont(type) {
@@ -869,9 +856,9 @@ function parse($TEXT, options) {
function regular_for(init) {
expect(";");
- var test = is("punc", ";") ? null : expression();
+ var test = is("punc", ";") ? null : expression(true);
expect(";");
- var step = is("punc", ")") ? null : expression();
+ var step = is("punc", ")") ? null : expression(true);
expect(")");
return new AST_For({
init : init,
@@ -883,7 +870,7 @@ function parse($TEXT, options) {
function for_in(init) {
var lhs = init instanceof AST_Var ? init.definitions[0].name : null;
- var obj = expression();
+ var obj = expression(true);
expect(")");
return new AST_ForIn({
init : init,
@@ -911,10 +898,8 @@ function parse($TEXT, options) {
next();
return a;
})(true, []),
- body: embed_tokens(function(){
+ body: (function(loop, labels){
++S.in_function;
- var loop = S.in_loop;
- var labels = S.labels;
S.in_directives = true;
S.in_loop = 0;
S.labels = [];
@@ -923,7 +908,7 @@ function parse($TEXT, options) {
S.in_loop = loop;
S.labels = labels;
return a;
- })()
+ })(S.in_loop, S.labels)
});
};
@@ -951,17 +936,17 @@ function parse($TEXT, options) {
return a;
};
- var switch_body_ = curry(in_loop, function(){
+ function switch_body_() {
expect("{");
- var a = [], cur = null, branch = null;
+ var a = [], cur = null, branch = null, tmp;
while (!is("punc", "}")) {
if (is("eof")) unexpected();
if (is("keyword", "case")) {
if (branch) branch.end = prev();
cur = [];
branch = new AST_Case({
- start : prog1(S.token, next),
- expression : expression(),
+ start : (tmp = S.token, next(), tmp),
+ expression : expression(true),
body : cur
});
a.push(branch);
@@ -971,9 +956,9 @@ function parse($TEXT, options) {
if (branch) branch.end = prev();
cur = [];
branch = new AST_Default({
- start : prog1(S.token, next, curry(expect, ":")),
+ start : (tmp = S.token, next(), expect(":"), tmp),
body : cur
- })
+ });
a.push(branch);
}
else {
@@ -984,7 +969,7 @@ function parse($TEXT, options) {
if (branch) branch.end = prev();
next();
return a;
- });
+ };
function try_() {
var body = block_(), bcatch = null, bfinally = null;
@@ -1110,7 +1095,7 @@ function parse($TEXT, options) {
switch (start.value) {
case "(":
next();
- var ex = expression();
+ var ex = expression(true);
ex.start = start;
ex.end = S.token;
expect(")");
@@ -1201,6 +1186,7 @@ function parse($TEXT, options) {
});
function as_property_name() {
+ var tmp;
switch (S.token.type) {
case "num":
case "string":
@@ -1208,19 +1194,20 @@ function parse($TEXT, options) {
case "operator":
case "keyword":
case "atom":
- return prog1(S.token.value, next);
+ return (tmp = S.token.value, next(), tmp);
default:
unexpected();
}
};
function as_name() {
+ var tmp;
switch (S.token.type) {
case "name":
case "operator":
case "keyword":
case "atom":
- return prog1(S.token.value, next);
+ return (tmp = S.token.value, next(), tmp);
default:
unexpected();
}
@@ -1254,7 +1241,7 @@ function parse($TEXT, options) {
}
if (is("punc", "[")) {
next();
- var prop = expression();
+ var prop = expression(true);
expect("]");
return subscripts(new AST_Sub({
start : start,
@@ -1276,17 +1263,17 @@ function parse($TEXT, options) {
};
var maybe_unary = function(allow_calls) {
- var start = S.token;
- if (is("operator") && UNARY_PREFIX[S.token.value]) {
+ var start = S.token, tmp;
+ if (is("operator") && UNARY_PREFIX(S.token.value)) {
var ex = make_unary(AST_UnaryPrefix,
- prog1(S.token.value, next),
+ (tmp = S.token.value, next(), tmp),
maybe_unary(allow_calls));
ex.start = start;
ex.end = prev();
return ex;
}
var val = expr_atom(allow_calls);
- while (is("operator") && UNARY_POSTFIX[S.token.value] && !S.token.nlb) {
+ while (is("operator") && UNARY_POSTFIX(S.token.value) && !S.token.nlb) {
val = make_unary(AST_UnaryPostfix, S.token.value, val);
val.start = start;
val.end = S.token;
@@ -1357,13 +1344,13 @@ function parse($TEXT, options) {
var maybe_assign = function(no_in) {
var start = S.token;
var left = maybe_conditional(no_in), val = S.token.value;
- if (is("operator") && ASSIGNMENT[val]) {
+ if (is("operator") && ASSIGNMENT(val)) {
if (is_assignable(left)) {
next();
return new AST_Assign({
start : start,
left : left,
- operator : ASSIGNMENT[val],
+ operator : val,
right : maybe_assign(no_in),
end : peek()
});
@@ -1374,8 +1361,6 @@ function parse($TEXT, options) {
};
var expression = function(commas, no_in) {
- if (arguments.length == 0)
- commas = true;
var start = S.token;
var expr = maybe_assign(no_in);
if (commas && is("punc", ",")) {
diff --git a/lib/utils.js b/lib/utils.js
index d18e62ee..79039665 100644
--- a/lib/utils.js
+++ b/lib/utils.js
@@ -43,19 +43,6 @@
"use strict";
-function curry(f) {
- var args = slice(arguments, 1);
- return function() { return f.apply(this, args.concat(slice(arguments))); };
-};
-
-function prog1(ret) {
- if (ret instanceof Function)
- ret = ret();
- for (var i = 1, n = arguments.length; --n > 0; ++i)
- arguments[i]();
- return ret;
-};
-
function array_to_hash(a) {
var ret = Object.create(null);
for (var i = 0; i < a.length; ++i)