diff options
author | Mihai Bazon <mihai@bazon.net> | 2012-05-27 17:25:31 +0300 |
---|---|---|
committer | Mihai Bazon <mihai@bazon.net> | 2012-06-03 23:10:31 +0300 |
commit | 861e26a66639ca61eab2af53de45760370c4d534 (patch) | |
tree | c35c94fe6978dfdff6887a9e5ea03703f2e4bed4 | |
parent | 22bb5e8306687fb6324f094d208b564c9e874f77 (diff) | |
download | tracifyjs-861e26a66639ca61eab2af53de45760370c4d534.tar.gz tracifyjs-861e26a66639ca61eab2af53de45760370c4d534.zip |
WIP
-rw-r--r-- | lib/ast.js | 343 | ||||
-rwxr-xr-x | lib/node.js | 41 | ||||
-rw-r--r-- | lib/output.js | 134 | ||||
-rw-r--r-- | lib/parse.js | 223 | ||||
-rw-r--r-- | lib/test.js | 962 | ||||
-rw-r--r-- | lib/utils.js | 26 |
6 files changed, 898 insertions, 831 deletions
@@ -18,42 +18,75 @@ function DEFNODE(type, props, methods, base) { if (type) { ctor.prototype.TYPE = ctor.TYPE = type; } - if (methods) for (var i in methods) if (HOP(methods, i)) { + if (methods) for (i in methods) if (HOP(methods, i)) { ctor.prototype[i] = methods[i]; } return ctor; }; -var AST_Token = DEFNODE("Token", "type value line col pos endpos nlb", { +var AST_Token = DEFNODE("Token", "type value line col pos endpos nlb comments_before", { }, null); var AST_Node = DEFNODE("Node", "start end", { - + renew: function(args) { + var ctor = this.CTOR, props = ctor.props; + for (var i in props) if (!HOP(args, i)) args[i] = this[i]; + return new ctor(args); + }, + walk: function(w) { + w._visit(this); + } }, null); var AST_Directive = DEFNODE("Directive", "value", { - + print: function(output) { + output.string(this.value); + } }); var AST_Debugger = DEFNODE("Debugger", null, { - + print: function(output) { + output.print("debugger"); + } }); var AST_Parenthesized = DEFNODE("Parenthesized", "expression", { - documentation: "Represents an expression which is always parenthesized. Used for the \ -conditions in IF/WHILE." + $documentation: "Represents an expression which is always parenthesized. Used for the \ +conditions in IF/WHILE/DO and expression in SWITCH/WITH.", + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + }); + } }); var AST_Bracketed = DEFNODE("Bracketed", "body", { - documentation: "Represents a block of statements that are always included in brackets. \ -Used for bodies of FUNCTION/TRY/CATCH/THROW/SWITCH." + $documentation: "Represents a block of statements that are always included in brackets. \ +Used for bodies of FUNCTION/TRY/CATCH/THROW/SWITCH.", + walk: function(w) { + w._visit(this, function(){ + this.body.forEach(function(stat){ + stat.walk(w); + }); + }); + } }); /* -----[ loops ]----- */ var AST_LabeledStatement = DEFNODE("LabeledStatement", "label body", { - + walk: function(w) { + w._visit(this, function(){ + if (this.label) this.label.walk(w); + if (this.body) { + if (this.body instanceof Array) + AST_Bracketed.prototype.walk.call(this, w); + else + this.body.walk(w); + } + }); + } }); var AST_Statement = DEFNODE("Statement", null, { @@ -61,39 +94,64 @@ var AST_Statement = DEFNODE("Statement", null, { }, AST_LabeledStatement); var AST_Do = DEFNODE("Do", "condition", { - + walk: function(w) { + w._visit(this, function(){ + this.condition.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }, AST_LabeledStatement); var AST_While = DEFNODE("While", "condition", { - + walk: function(w) { + w._visit(this, function(){ + this.condition.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }, AST_LabeledStatement); var AST_For = DEFNODE("For", "init condition step", { - + walk: function(w) { + w._visit(this, function(){ + if (this.init) this.init.walk(w); + if (this.condition) this.condition.walk(w); + if (this.step) this.step.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }, AST_LabeledStatement); var AST_ForIn = DEFNODE("ForIn", "init name object", { - + walk: function(w) { + w._visit(this, function(){ + if (this.init) this.init.walk(w); + this.object.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }, AST_LabeledStatement); var AST_With = DEFNODE("With", "expression body", { - -}); - -var AST_LoopControl = DEFNODE("LoopControl", "label", { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }); -var AST_Break = DEFNODE("Break", null, { - -}, AST_LoopControl); -var AST_Continue = DEFNODE("Continue", null, { - -}, AST_LoopControl); /* -----[ functions ]----- */ var AST_Scope = DEFNODE("Scope", "identifiers body", { - + walk: function(w) { + w._visit(this, function(){ + if (this.identifiers) this.identifiers.forEach(function(el){ + el.walk(w); + }); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }); var AST_Toplevel = DEFNODE("Toplevel", null, { @@ -101,37 +159,84 @@ var AST_Toplevel = DEFNODE("Toplevel", null, { }, AST_Scope); var AST_Lambda = DEFNODE("Lambda", "name argnames", { - + walk: function(w) { + w._visit(this, function(){ + if (this.name) this.name.walk(w); + this.argnames.forEach(function(el){ + el.walk(w); + }); + AST_Scope.prototype.walk.call(this, w); + }); + } }, AST_Scope); + var AST_Function = DEFNODE("Function", null, { }, AST_Lambda); + var AST_Defun = DEFNODE("Defun", null, { }, AST_Function); /* -----[ JUMPS ]----- */ -var AST_Jump = DEFNODE("Jump", "value"); +var AST_Jump = DEFNODE("Jump", null, { -var AST_Return = DEFNODE("Return", null, { +}); +var AST_Exit = DEFNODE("Exit", "value", { + walk: function(w) { + w._visit(this, function(){ + if (this.value) this.value.walk(w); + }); + } }, AST_Jump); +var AST_Return = DEFNODE("Return", null, { + +}, AST_Exit); + var AST_Throw = DEFNODE("Throw", null, { +}, AST_Exit); + +var AST_LoopControl = DEFNODE("LoopControl", "label", { + walk: function(w) { + w._visit(this, function(){ + if (this.label) this.label.walk(w); + }); + } }, AST_Jump); +var AST_Break = DEFNODE("Break", null, { + +}, AST_LoopControl); + +var AST_Continue = DEFNODE("Continue", null, { + +}, AST_LoopControl); + /* -----[ IF ]----- */ var AST_If = DEFNODE("If", "condition consequent alternative", { - + walk: function(w) { + w._visit(this, function(){ + this.condition.walk(w); + this.consequent.walk(w); + if (this.alternative) this.alternative.walk(w); + }); + } }); /* -----[ SWITCH ]----- */ var AST_Switch = DEFNODE("Switch", "expression", { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + AST_LabeledStatement.prototype.walk.call(this, w); + }); + } }, AST_LabeledStatement); var AST_SwitchBlock = DEFNODE("SwitchBlock", null, { @@ -143,21 +248,41 @@ var AST_SwitchBranch = DEFNODE("SwitchBranch", "body", { }); var AST_Default = DEFNODE("Default", null, { - + walk: function(w) { + w._visit(this, function(){ + AST_Statement.prototype.walk.call(this, w); + }); + } }, AST_SwitchBranch); var AST_Case = DEFNODE("Case", "expression", { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + AST_Statement.prototype.walk.call(this, w); + }); + } }, AST_SwitchBranch); /* -----[ EXCEPTIONS ]----- */ var AST_Try = DEFNODE("Try", "btry bcatch bfinally", { - + walk: function(w) { + w._visit(this, function(){ + this.btry.walk(w); + if (this.bcatch) this.bcatch.walk(w); + if (this.bfinally) this.bfinally.walk(w); + }); + } }); var AST_Catch = DEFNODE("Catch", "argname body", { - + walk: function(w) { + w._visit(this, function(){ + this.argname.walk(w); + this.body.walk(w); + }); + } }); var AST_Finally = DEFNODE("Finally", null, { @@ -167,7 +292,13 @@ var AST_Finally = DEFNODE("Finally", null, { /* -----[ VAR/CONST ]----- */ var AST_Definitions = DEFNODE("Definitions", "definitions", { - + walk: function(w) { + w._visit(this, function(){ + this.definitions.forEach(function(el){ + el.walk(w); + }); + }); + } }); var AST_Var = DEFNODE("Var", null, { @@ -179,13 +310,25 @@ var AST_Const = DEFNODE("Const", null, { }, AST_Definitions); var AST_VarDef = DEFNODE("VarDef", "name value", { - + walk: function(w) { + w._visit(this, function(){ + this.name.walk(w); + if (this.value) this.value.walk(w); + }); + } }); /* -----[ OTHER ]----- */ var AST_Call = DEFNODE("Call", "expression args", { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + this.args.forEach(function(el){ + el.walk(w); + }); + }); + } }); var AST_New = DEFNODE("New", null, { @@ -193,7 +336,12 @@ var AST_New = DEFNODE("New", null, { }, AST_Call); var AST_Seq = DEFNODE("Seq", "first second", { - + walk: function(w) { + w._visit(this, function(){ + this.first.walk(w); + this.second.walk(w); + }); + } }); var AST_PropAccess = DEFNODE("PropAccess", "expression property", { @@ -201,15 +349,28 @@ var AST_PropAccess = DEFNODE("PropAccess", "expression property", { }); var AST_Dot = DEFNODE("Dot", null, { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + }); + } }, AST_PropAccess); var AST_Sub = DEFNODE("Sub", null, { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + this.property.walk(w); + }); + } }, AST_PropAccess); var AST_Unary = DEFNODE("Unary", "operator expression", { - + walk: function(w) { + w._visit(this, function(){ + this.expression.walk(w); + }); + } }); var AST_UnaryPrefix = DEFNODE("UnaryPrefix", null, { @@ -221,77 +382,129 @@ var AST_UnaryPostfix = DEFNODE("UnaryPostfix", null, { }, AST_Unary); var AST_Binary = DEFNODE("Binary", "left operator right", { - + walk: function(w) { + w._visit(this, function(){ + this.left.walk(w); + this.right.walk(w); + }); + } }); var AST_Conditional = DEFNODE("Conditional", "condition consequent alternative", { - + walk: function(w) { + w._visit(this, function(){ + this.condition.walk(w); + this.consequent.walk(w); + this.alternative.walk(w); + }); + } }); -var AST_Assign = DEFNODE("Assign", "left operator right", { +var AST_Assign = DEFNODE("Assign", null, { -}); +}, AST_Binary); /* -----[ LITERALS ]----- */ -var AST_RegExp = DEFNODE("Regexp", "pattern mods", { - -}); - var AST_Array = DEFNODE("Array", "elements", { - + walk: function(w) { + w._visit(this, function(){ + this.elements.forEach(function(el){ + el.walk(w); + }); + }); + } }); var AST_Object = DEFNODE("Object", "properties", { - + walk: function(w) { + w._visit(this, function(){ + this.properties.forEach(function(prop){ + prop.walk(w); + }); + }); + } }); var AST_ObjectProperty = DEFNODE("ObjectProperty"); var AST_ObjectKeyVal = DEFNODE("ObjectKeyval", "key value", { - + walk: function(w) { + w._visit(this, function(){ + this.value.walk(w); + }); + } }, AST_ObjectProperty); var AST_ObjectSetter = DEFNODE("ObjectSetter", "name func", { - + walk: function(w) { + w._visit(this, function(){ + this.func.walk(w); + }); + } }, AST_ObjectProperty); var AST_ObjectGetter = DEFNODE("ObjectGetter", "name func", { - + walk: function(w) { + w._visit(this, function(){ + this.func.walk(w); + }); + } }, AST_ObjectProperty); var AST_Symbol = DEFNODE("Symbol", "name", { +}); + +var AST_This = DEFNODE("This", null, { + +}, AST_Symbol); + +var AST_SymbolRef = DEFNODE("SymbolRef", "scope symbol", { + +}, AST_Symbol); +var AST_Label = DEFNODE("Label", null, { + +}, AST_SymbolRef); + +var AST_Constant = DEFNODE("Constant", null, { + getValue: function() { + return this.value; + } }); var AST_String = DEFNODE("String", "value", { -}); +}, AST_Constant); var AST_Number = DEFNODE("Number", "value", { -}); - -var AST_Boolean = DEFNODE("Boolean", "value", { +}, AST_Constant); -}); +var AST_RegExp = DEFNODE("Regexp", "pattern mods", { + getValue: function() { + return this._regexp || ( + this._regexp = new RegExp(this.pattern, this.mods) + ); + } +}, AST_Constant); var AST_Atom = DEFNODE("Atom", null, { -}); +}, AST_Constant); var AST_Null = DEFNODE("Null", null, { - + getValue: function() { return null } }, AST_Atom); var AST_Undefined = DEFNODE("Undefined", null, { - + getValue: function() { return (function(){}()) } }, AST_Atom); var AST_False = DEFNODE("False", null, { - + getValue: function() { return false } }, AST_Atom); var AST_True = DEFNODE("True", null, { - + getValue: function() { return true } }, AST_Atom); diff --git a/lib/node.js b/lib/node.js index 36bc18a0..9089a5fe 100755 --- a/lib/node.js +++ b/lib/node.js @@ -1,20 +1,35 @@ #! /usr/bin/env node -var fs = require("fs"); +(function(){ -function load_global(file) { - var code = fs.readFileSync(file, "utf8"); - return global.eval(code); -}; + var fs = require("fs"); + var vm = require("vm"); + var sys = require("util"); -load_global("./utils.js"); -load_global("./ast.js"); -load_global("./parse.js"); + function load_global(file) { + var code = fs.readFileSync(file, "utf8"); + return vm.runInThisContext(code, file); + }; -/// + load_global("./utils.js"); + load_global("./output.js"); + load_global("./ast.js"); + load_global("./parse.js"); -var filename = process.argv[2]; -console.time("parse"); -var ast = parse(fs.readFileSync(filename, "utf8")); -console.timeEnd("parse"); + /// + var filename = process.argv[2]; + console.time("parse"); + var ast = parse(fs.readFileSync(filename, "utf8")); + console.timeEnd("parse"); + + console.time("walk"); + ast.walk({ + _visit: function(node, descend) { + //console.log(node); + if (descend) descend.call(node); + } + }); + console.timeEnd("walk"); + +})(); diff --git a/lib/output.js b/lib/output.js new file mode 100644 index 00000000..2c4c6fdc --- /dev/null +++ b/lib/output.js @@ -0,0 +1,134 @@ +function OutputStream(options) { + options = defaults(options, { + indent_start : 0, + indent_level : 4, + quote_keys : false, + space_colon : false, + beautify : true, + ascii_only : false, + inline_script : false, + width : 80 + }); + + var indentation = 0; + var current_col = 0; + var OUTPUT = ""; + + function to_ascii(str) { + return str.replace(/[\u0080-\uffff]/g, function(ch) { + var code = ch.charCodeAt(0).toString(16); + while (code.length < 4) code = "0" + code; + return "\\u" + code; + }); + }; + + function make_string(str) { + var dq = 0, sq = 0; + str = str.replace(/[\\\b\f\n\r\t\x22\x27\u2028\u2029\0]/g, function(s){ + switch (s) { + case "\\": return "\\\\"; + case "\b": return "\\b"; + case "\f": return "\\f"; + case "\n": return "\\n"; + case "\r": return "\\r"; + case "\u2028": return "\\u2028"; + case "\u2029": return "\\u2029"; + case '"': ++dq; return '"'; + case "'": ++sq; return "'"; + case "\0": return "\\0"; + } + return s; + }); + if (options.ascii_only) str = to_ascii(str); + if (dq > sq) return "'" + str.replace(/\x27/g, "\\'") + "'"; + else return '"' + str.replace(/\x22/g, '\\"') + '"'; + }; + + function print(str) { + var nl = str.lastIndexOf("\n"); + if (nl >= 0) { + current_col = nl; + } else { + current_col += str.length; + } + OUTPUT += str; + }; + + function encode_string(str) { + var ret = make_string(str); + if (options.inline_script) + ret = ret.replace(/<\x2fscript([>\/\t\n\f\r ])/gi, "<\\/script$1"); + return ret; + }; + + function make_name(name) { + name = name.toString(); + if (options.ascii_only) + name = to_ascii(name); + return name; + }; + + function make_indent(line) { + if (line == null) + line = ""; + if (beautify) + line = repeat_string(" ", options.indent_start + indentation) + line; + return line; + }; + + function with_indent(col, cont) { + var save_indentation = indentation; + indentation = col; + var ret = cont(); + indentation = save_indentation; + return ret; + }; + + function indent() { + if (options.beautify) print(make_indent()); + }; + + function newline() { + if (options.beautify) { + print("\n"); + print(make_indent()); + } + }; + + function next_indent() { + return indentation + options.indent_level; + }; + + function with_block(cont) { + var ret; + print("{"); + with_indent(next_indent(), function(){ + newline(); + ret = cont(); + newline(); + }); + indent(); + print("}"); + return ret; + }; + + function with_parens(cont) { + print("("); + var ret = with_indent(current_col, cont); + print(")"); + return ret; + }; + + return { + get : function() { return OUTPUT }, + indent : indent, + newline : newline, + print : print, + string : function(str) { print(encode_string(str)) }, + with_indent : with_indent, + with_block : with_block, + with_parens : with_parens, + options : function() { return options } + }; + +}; diff --git a/lib/parse.js b/lib/parse.js index 7164f3ee..9dcfa635 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -577,13 +577,13 @@ var UNARY_POSTFIX = array_to_hash([ "--", "++" ]); var ASSIGNMENT = (function(a, ret, i){ while (i < a.length) { - ret[a[i]] = a[i].substr(0, a[i].length - 1); + ret[a[i]] = a[i]; i++; } return ret; })( - ["+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&="], - { "=": true }, + [ "=", "+=", "-=", "/=", "*=", "%=", ">>=", "<<=", ">>>=", "|=", "^=", "&=" ], + {}, 0 ); @@ -695,10 +695,11 @@ function parse($TEXT, exigent_mode) { }; function parenthesised() { - expect("("); - var ex = expression(); - expect(")"); - return ex; + return new AST_Parenthesized({ + start : expect("("), + expression : expression(), + end : expect(")") + }); }; function embed_tokens(parser) { @@ -828,8 +829,7 @@ function parse($TEXT, exigent_mode) { }); function labeled_statement() { - var label = S.token.value; - next(); + var label = as_symbol(true); expect(":"); S.labels.push(label); var start = S.token, stat = statement(); @@ -845,19 +845,21 @@ function parse($TEXT, exigent_mode) { }; function break_cont(type) { - var name = null; + var name = null, label = null; if (!can_insert_semicolon()) { name = is("name") ? S.token.value : null; } if (name != null) { next(); - if (!member(name, S.labels)) + label = find_if(function(l){ return l.name == name }, S.labels); + if (!label) croak("Label " + name + " without matching loop or statement"); + label = new AST_Label({ name: name, symbol: label }); } else if (S.in_loop == 0) croak(type.TYPE + " not inside a loop or switch"); semicolon(); - return new type({ label: name }); + return new type({ label: label }); }; function for_() { @@ -892,19 +894,19 @@ function parse($TEXT, exigent_mode) { }; function for_in(init) { - var lhs = init instanceof AST_Var ? init.definitions[0].name : init; + var lhs = init instanceof AST_Var ? init.definitions[0].name : null; var obj = expression(); expect(")"); return new AST_ForIn({ init : init, - lhs : lhs, + name : lhs, object : obj, body : in_loop(statement) }); }; var function_ = function(in_statement) { - var name = is("name") ? as_symbol() : null; + var name = is("name") ? as_symbol(true) : null; if (in_statement && !name) unexpected(); expect("("); @@ -914,7 +916,7 @@ function parse($TEXT, exigent_mode) { argnames: (function(first, a){ while (!is("punc", ")")) { if (first) first = false; else expect(","); - a.push(as_symbol()); + a.push(as_symbol(true)); } next(); return a; @@ -922,11 +924,14 @@ function parse($TEXT, exigent_mode) { body: embed_tokens(function(){ ++S.in_function; var loop = S.in_loop; + var labels = S.labels; S.in_directives = true; S.in_loop = 0; + S.labels = []; var a = block_(); --S.in_function; S.in_loop = loop; + S.labels = labels; return new AST_Bracketed({ body: a }); })() }); @@ -958,47 +963,70 @@ function parse($TEXT, exigent_mode) { var switch_block_ = embed_tokens(curry(in_loop, function(){ expect("{"); - var a = [], cur = null; + var a = [], cur = null, branch = null; while (!is("punc", "}")) { if (is("eof")) unexpected(); if (is("keyword", "case")) { - next(); + if (branch) branch.end = prev(); cur = []; - a.push(new AST_Case({ expression: expression(), body: cur })); + branch = new AST_Case({ + start : prog1(S.token, next), + expression : expression(), + body : cur + }); + a.push(branch); expect(":"); } else if (is("keyword", "default")) { - next(); - expect(":"); + if (branch) branch.end = prev(); cur = []; - a.push(new AST_Default({ body: cur })); + branch = new AST_Default({ + start : prog1(S.token, next, curry(expect, ":")), + body : cur + }) + a.push(branch); } else { if (!cur) unexpected(); cur.push(statement()); } } + if (branch) branch.end = prev(); next(); return new AST_SwitchBlock({ body: a }); })); function try_() { var body = new AST_Bracketed({ - body: block_() + start : S.token, + body : block_(), + end : prev() }), bcatch = null, bfinally = null; if (is("keyword", "catch")) { + var start = S.token; next(); expect("("); - var name = as_symbol(); + var name = as_symbol(true); expect(")"); bcatch = new AST_Catch({ + start : start, argname : name, - body : new AST_Bracketed({ body: block_() }) + body : new AST_Bracketed({ + start : S.token, + body : block_(), + end : prev() + }), + end : prev() }); } if (is("keyword", "finally")) { + var start = S.token; next(); - bfinally = new AST_Finally({ body: block_() }); + bfinally = new AST_Finally({ + start : start, + body : block_(), + end : prev() + }); } if (!bcatch && !bfinally) croak("Missing catch/finally blocks"); @@ -1014,7 +1042,7 @@ function parse($TEXT, exigent_mode) { for (;;) { a.push(new AST_VarDef({ start : S.token, - name : as_symbol(), + name : as_symbol(true), value : is("operator", "=") ? (next(), expression(false, no_in)) : null, end : prev() })); @@ -1025,19 +1053,25 @@ function parse($TEXT, exigent_mode) { return a; }; - var var_ = embed_tokens(function(no_in) { + var var_ = function(no_in) { return new AST_Var({ - definitions: vardefs(no_in) + start : prev(), + definitions : vardefs(no_in), + end : prev() }); - }); + }; - var const_ = embed_tokens(function() { + var const_ = function() { return new AST_Const({ - definitions: vardefs() + start : prev(), + definitions : vardefs(), + end : prev() }); - }); + }; - var new_ = embed_tokens(function() { + var new_ = function() { + var start = S.token; + expect_token("operator", "new"); var newexp = expr_atom(false), args; if (is("punc", "(")) { next(); @@ -1046,10 +1080,12 @@ function parse($TEXT, exigent_mode) { args = []; } return subscripts(new AST_New({ + start : start, expression : newexp, - args : args + args : args, + end : prev() }), true); - }); + }; function as_atom_node() { var tok = S.token, ret; @@ -1085,25 +1121,26 @@ function parse($TEXT, exigent_mode) { var expr_atom = function(allow_calls) { if (is("operator", "new")) { - next(); return new_(); } + var start = S.token; if (is("punc")) { - switch (S.token.value) { + switch (start.value) { case "(": next(); - return subscripts(prog1(expression, curry(expect, ")")), allow_calls); + var ex = expression(); + ex.start = start; + ex.end = S.token; + expect(")"); + return subscripts(ex, allow_calls); case "[": - next(); return subscripts(array_(), allow_calls); case "{": - next(); return subscripts(object_(), allow_calls); } unexpected(); } if (is("keyword", "function")) { - var start = S.token; next(); var func = function_(false); func.start = start; @@ -1131,13 +1168,15 @@ function parse($TEXT, exigent_mode) { return a; }; - function array_() { + var array_ = embed_tokens(function() { + expect("["); return new AST_Array({ elements: expr_list("]", !exigent_mode, true) }); - }; + }); var object_ = embed_tokens(function() { + expect("{"); var first = true, a = []; while (!is("punc", "}")) { if (first) first = false; else expect(","); @@ -1183,9 +1222,14 @@ function parse($TEXT, exigent_mode) { switch (S.token.type) { case "num": case "string": - return as_symbol(true); + case "name": + case "operator": + case "keyword": + case "atom": + return prog1(S.token.value, next); + default: + unexpected(); } - return as_name(); }; function as_name() { @@ -1194,15 +1238,16 @@ function parse($TEXT, exigent_mode) { case "operator": case "keyword": case "atom": - return as_symbol(true); + return prog1(S.token.value, next); default: unexpected(); } }; - function as_symbol(noerror) { - if (!noerror && !is("name")) croak("Name expected"); - var sym = new AST_Symbol({ + function as_symbol(def) { + if (!is("name")) croak("Name expected"); + var name = S.token.value; + var sym = new (name == "this" ? AST_This : def ? AST_Symbol : AST_SymbolRef)({ name : String(S.token.value), start : S.token, end : S.token @@ -1211,44 +1256,59 @@ function parse($TEXT, exigent_mode) { return sym; }; - var subscripts = embed_tokens(function(expr, allow_calls) { + var subscripts = function(expr, allow_calls) { + var start = expr.start; if (is("punc", ".")) { next(); return subscripts(new AST_Dot({ + start : start, expression : expr, - property : as_name() + property : as_name(), + end : prev() }), allow_calls); } if (is("punc", "[")) { next(); + var prop = expression(); + expect("]"); return subscripts(new AST_Sub({ + start : start, expression : expr, - property : prog1(expression, curry(expect, "]")) + property : prop, + end : prev() }), allow_calls); } if (allow_calls && is("punc", "(")) { next(); return subscripts(new AST_Call({ + start : start, expression : expr, - args : expr_list(")") + args : expr_list(")"), + end : prev() }), true); } return expr; - }); + }; - var maybe_unary = embed_tokens(function(allow_calls) { + var maybe_unary = function(allow_calls) { + var start = S.token; if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { - return make_unary(AST_UnaryPrefix, - prog1(S.token.value, next), - maybe_unary(allow_calls)); + var ex = make_unary(AST_UnaryPrefix, + prog1(S.token.value, next), + maybe_unary(allow_calls)); + ex.start = start; + ex.end = prev(); + return ex; } var val = expr_atom(allow_calls); while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) { val = make_unary(AST_UnaryPostfix, S.token.value, val); + val.start = start; + val.end = S.token; next(); } return val; - }); + }; function make_unary(ctor, op, expr) { if ((op == "++" || op == "--") && !is_assignable(expr)) @@ -1256,7 +1316,7 @@ function parse($TEXT, exigent_mode) { return new ctor({ operator: op, expression: expr }); }; - var expr_op = embed_tokens(function(left, min_prec, no_in) { + var expr_op = function(left, min_prec, no_in) { var op = is("operator") ? S.token.value : null; if (op == "in" && no_in) op = null; var prec = op != null ? PRECEDENCE[op] : null; @@ -1264,32 +1324,37 @@ function parse($TEXT, exigent_mode) { next(); var right = expr_op(maybe_unary(true), prec, no_in); return expr_op(new AST_Binary({ + start : left.start, left : left, operator : op, - right : right + right : right, + end : right.end }), min_prec, no_in); } return left; - }); + }; function expr_ops(no_in) { return expr_op(maybe_unary(true), 0, no_in); }; - var maybe_conditional = embed_tokens(function(no_in) { + var maybe_conditional = function(no_in) { + var start = S.token; var expr = expr_ops(no_in); if (is("operator", "?")) { next(); var yes = expression(false); expect(":"); return new AST_Conditional({ - condition: expr, - consequent: yes, - alternative: expression(false, no_in) + start : start, + condition : expr, + consequent : yes, + alternative : expression(false, no_in), + end : peek() }); } return expr; - }); + }; function is_assignable(expr) { if (!exigent_mode) return true; @@ -1304,35 +1369,41 @@ function parse($TEXT, exigent_mode) { } }; - var maybe_assign = embed_tokens(function(no_in) { + var maybe_assign = function(no_in) { + var start = S.token; var left = maybe_conditional(no_in), val = S.token.value; if (is("operator") && HOP(ASSIGNMENT, val)) { if (is_assignable(left)) { next(); return new AST_Assign({ + start : start, left : left, operator : ASSIGNMENT[val], - right : maybe_assign(no_in) + right : maybe_assign(no_in), + end : peek() }); } croak("Invalid assignment"); } return left; - }); + }; - var expression = embed_tokens(function(commas, no_in) { + var expression = function(commas, no_in) { if (arguments.length == 0) commas = true; + var start = S.token; var expr = maybe_assign(no_in); if (commas && is("punc", ",")) { next(); return new AST_Seq({ + start : start, first : expr, - second : expression(true, no_in) + second : expression(true, no_in), + end : peek() }); } return expr; - }); + }; function in_loop(cont) { ++S.in_loop; @@ -1342,11 +1413,13 @@ function parse($TEXT, exigent_mode) { }; return new AST_Toplevel({ + start: S.token, body: (function(a){ while (!is("eof")) a.push(statement()); return a; - })([]) + })([]), + end: prev() }); }; diff --git a/lib/test.js b/lib/test.js index f594ccd2..b765132f 100644 --- a/lib/test.js +++ b/lib/test.js @@ -1,735 +1,345 @@ -var func = function parse($TEXT, exigent_mode) { +var func = function tokenizer($TEXT) { var S = { - input : typeof $TEXT == "string" ? tokenizer($TEXT, true) : $TEXT, - token : null, - prev : null, - peeked : null, - in_function : 0, - in_directives : true, - in_loop : 0, - labels : [] - }; - - S.token = next(); - - function is(type, value) { - return is_token(S.token, type, value); - }; - - function peek() { return S.peeked || (S.peeked = S.input()); }; - - function next() { - S.prev = S.token; - if (S.peeked) { - S.token = S.peeked; - S.peeked = null; + text : $TEXT.replace(/\r\n?|[\n\u2028\u2029]/g, "\n").replace(/^\uFEFF/, ''), + pos : 0, + tokpos : 0, + line : 0, + tokline : 0, + col : 0, + tokcol : 0, + newline_before : false, + regex_allowed : false, + comments_before : [] + }; + + function peek() { return S.text.charAt(S.pos); }; + + function next(signal_eof, in_string) { + var ch = S.text.charAt(S.pos++); + if (signal_eof && !ch) + throw EX_EOF; + if (ch == "\n") { + S.newline_before = S.newline_before || !in_string; + ++S.line; + S.col = 0; } else { - S.token = S.input(); + ++S.col; } - S.in_directives = S.in_directives && ( - S.token.type == "string" || is("punc", ";") - ); - return S.token; - }; - - function prev() { - return S.prev; + return ch; }; - function croak(msg, line, col, pos) { - var ctx = S.input.context(); - js_error(msg, - line != null ? line : ctx.tokline, - col != null ? col : ctx.tokcol, - pos != null ? pos : ctx.tokpos); + function eof() { + return !S.peek(); }; - function token_error(token, msg) { - croak(msg, token.line, token.col); + function find(what, signal_eof) { + var pos = S.text.indexOf(what, S.pos); + if (signal_eof && pos == -1) throw EX_EOF; + return pos; }; - function unexpected(token) { - if (token == null) - token = S.token; - token_error(token, "Unexpected token: " + token.type + " (" + token.value + ")"); + function start_token() { + S.tokline = S.line; + S.tokcol = S.col; + S.tokpos = S.pos; }; - function expect_token(type, val) { - if (is(type, val)) { - return next(); - } - token_error(S.token, "Unexpected token " + S.token.type + ", expected " + type); - }; - - function expect(punc) { return expect_token("punc", punc); }; - - function can_insert_semicolon() { - return !exigent_mode && ( - S.token.nlb || is("eof") || is("punc", "}") - ); - }; - - function semicolon() { - if (is("punc", ";")) next(); - else if (!can_insert_semicolon()) unexpected(); - }; - - function parenthesised() { - expect("("); - var ex = expression(); - expect(")"); - return ex; - }; - - function embed_tokens(parser) { - return function() { - var start = S.token; - var expr = parser.apply(this, arguments); - var end = prev(); - expr.start = start; - expr.end = end; - return expr; + function token(type, value, is_comment) { + S.regex_allowed = ((type == "operator" && !HOP(UNARY_POSTFIX, value)) || + (type == "keyword" && HOP(KEYWORDS_BEFORE_EXPRESSION, value)) || + (type == "punc" && HOP(PUNC_BEFORE_EXPRESSION, value))); + var ret = { + type : type, + value : value, + line : S.tokline, + col : S.tokcol, + pos : S.tokpos, + endpos : S.pos, + nlb : S.newline_before }; - }; - - var statement = embed_tokens(function() { - if (is("operator", "/") || is("operator", "/=")) { - S.peeked = null; - S.token = S.input(S.token.value.substr(1)); // force regexp - } - switch (S.token.type) { - case "string": - var dir = S.in_directives, stat = simple_statement(); - // XXXv2: decide how to fix directives - // if (dir && stat instanceof AST_String && !is("punc", ",")) - // return new AST_Directive({ value: stat.value }); - return stat; - case "num": - case "regexp": - case "operator": - case "atom": - return simple_statement(); - - case "name": - return is_token(peek(), "punc", ":") - ? labeled_statement() - : simple_statement(); - - case "punc": - switch (S.token.value) { - case "{": - return new AST_Statement({ body: block_() }); - case "[": - case "(": - return simple_statement(); - case ";": - next(); - return new AST_Statement(); - default: - unexpected(); - } - - case "keyword": - switch (prog1(S.token.value, next)) { - case "break": - return break_cont(AST_Break); - - case "continue": - return break_cont(AST_Continue); - - case "debugger": - semicolon(); - return new AST_Debugger(); - - case "do": - return new AST_Do({ - body : in_loop(statement), - condition : (expect_token("while"), prog1(parenthesised, semicolon)) - }); - - case "while": - return new AST_While({ - condition : parenthesised(), - body : in_loop(statement) - }); - - case "for": - return for_(); - - case "function": - return function_(true); - - case "if": - return if_(); - - case "return": - if (S.in_function == 0) - croak("'return' outside of function"); - return new AST_Return({ - value: ( is("punc", ";") - ? (next(), null) - : can_insert_semicolon() - ? null - : prog1(expression, semicolon) ) - }); - - case "switch": - return new AST_Switch({ - expression : parenthesised(), - body : switch_block_() - }); - - case "throw": - if (S.token.nlb) - croak("Illegal newline after 'throw'"); - return new AST_Throw({ - value: prog1(expression, semicolon) - }); - - case "try": - return try_(); - - case "var": - return prog1(var_, semicolon); - - case "const": - return prog1(const_, semicolon); - - case "with": - return new AST_With({ - expression : parenthesised(), - body : statement() - }); - - default: - unexpected(); + if (!is_comment) { + ret.comments_before = S.comments_before; + S.comments_before = []; + // make note of any newlines in the comments that came before + for (var i = 0, len = ret.comments_before.length; i < len; i++) { + ret.nlb = ret.nlb || ret.comments_before[i].nlb; } } - }); - - function labeled_statement() { - var label = S.token.value; - next(); - expect(":"); - S.labels.push(label); - var start = S.token, stat = statement(); - if (exigent_mode && !(stat instanceof AST_LabeledStatement)) - unexpected(start); - S.labels.pop(); - stat.label = label; - return stat; - }; - - function simple_statement() { - return new AST_Statement({ body: prog1(expression, semicolon) }); + S.newline_before = false; + return new AST_Token(ret); }; - function break_cont(type) { - var name = null; - if (!can_insert_semicolon()) { - name = is("name") ? S.token.value : null; - } - if (name != null) { + function skip_whitespace() { + while (HOP(WHITESPACE_CHARS, peek())) next(); - if (!member(name, S.labels)) - croak("Label " + name + " without matching loop or statement"); - } - else if (S.in_loop == 0) - croak(type.TYPE + " not inside a loop or switch"); - semicolon(); - return new type({ label: name }); }; - function for_() { - expect("("); - var init = null; - if (!is("punc", ";")) { - init = is("keyword", "var") - ? (next(), var_(true)) - : expression(true, true); - if (is("operator", "in")) { - if (init instanceof AST_Var && init.definitions.length > 1) - croak("Only one variable declaration allowed in for..in loop"); - next(); - return for_in(init); - } + function read_while(pred) { + var ret = "", ch = peek(), i = 0; + while (ch && pred(ch, i++)) { + ret += next(); + ch = peek(); } - return regular_for(init); + return ret; }; - function regular_for(init) { - expect(";"); - var test = is("punc", ";") ? null : expression(); - expect(";"); - var step = is("punc", ")") ? null : expression(); - expect(")"); - return new AST_For({ - init : init, - condition : test, - step : step, - body : in_loop(statement) - }); + function parse_error(err) { + js_error(err, S.tokline, S.tokcol, S.tokpos); }; - function for_in(init) { - var lhs = init instanceof AST_Var ? init.definitions[0].name : init; - var obj = expression(); - expect(")"); - return new AST_ForIn({ - init : init, - lhs : lhs, - object : obj, - body : in_loop(statement) + function read_num(prefix) { + var has_e = false, after_e = false, has_x = false, has_dot = prefix == "."; + var num = read_while(function(ch, i){ + if (ch == "x" || ch == "X") { + if (has_x) return false; + return has_x = true; + } + if (!has_x && (ch == "E" || ch == "e")) { + if (has_e) return false; + return has_e = after_e = true; + } + if (ch == "-") { + if (after_e || (i == 0 && !prefix)) return true; + return false; + } + if (ch == "+") return after_e; + after_e = false; + if (ch == ".") { + if (!has_dot && !has_x && !has_e) + return has_dot = true; + return false; + } + return is_alphanumeric_char(ch); }); - }; - - var function_ = function(in_statement) { - var name = is("name") ? as_symbol() : null; - if (in_statement && !name) - unexpected(); - expect("("); - var ctor = in_statement ? AST_Defun : AST_Function; - return new ctor({ - name: name, - argnames: (function(first, a){ - while (!is("punc", ")")) { - if (first) first = false; else expect(","); - a.push(as_symbol()); + if (prefix) + num = prefix + num; + var valid = parse_js_number(num); + if (!isNaN(valid)) { + return token("num", valid); + } else { + parse_error("Invalid syntax: " + num); + } + }; + + function read_escaped_char(in_string) { + var ch = next(true, in_string); + switch (ch) { + case "n" : return "\n"; + case "r" : return "\r"; + case "t" : return "\t"; + case "b" : return "\b"; + case "v" : return "\u000b"; + case "f" : return "\f"; + case "0" : return "\0"; + case "x" : return String.fromCharCode(hex_bytes(2)); + case "u" : return String.fromCharCode(hex_bytes(4)); + case "\n": return ""; + default : return ch; + } + }; + + function hex_bytes(n) { + var num = 0; + for (; n > 0; --n) { + var digit = parseInt(next(true), 16); + if (isNaN(digit)) + parse_error("Invalid hex-character pattern in string"); + num = (num << 4) | digit; + } + return num; + }; + + function read_string() { + return with_eof_error("Unterminated string constant", function(){ + var quote = next(), ret = ""; + for (;;) { + var ch = next(true); + if (ch == "\\") { + // read OctalEscapeSequence (XXX: deprecated if "strict mode") + // https://github.com/mishoo/UglifyJS/issues/178 + var octal_len = 0, first = null; + ch = read_while(function(ch){ + if (ch >= "0" && ch <= "7") { + if (!first) { + first = ch; + return ++octal_len; + } + else if (first <= "3" && octal_len <= 2) return ++octal_len; + else if (first >= "4" && octal_len <= 1) return ++octal_len; + } + return false; + }); + if (octal_len > 0) ch = String.fromCharCode(parseInt(ch, 8)); + else ch = read_escaped_char(true); } - next(); - return a; - })(true, []), - body: embed_tokens(function(){ - ++S.in_function; - var loop = S.in_loop; - S.in_directives = true; - S.in_loop = 0; - var a = block_(); - --S.in_function; - S.in_loop = loop; - return new AST_Bracketed({ body: a }); - })() + else if (ch == quote) break; + ret += ch; + } + return token("string", ret); }); }; - function if_() { - var cond = parenthesised(), body = statement(), belse = null; - if (is("keyword", "else")) { - next(); - belse = statement(); + function read_line_comment() { + next(); + var i = find("\n"), ret; + if (i == -1) { + ret = S.text.substr(S.pos); + S.pos = S.text.length; + } else { + ret = S.text.substring(S.pos, i); + S.pos = i; } - return new AST_If({ - condition : cond, - consequent : body, - alternative : belse - }); + return token("comment1", ret, true); }; - function block_() { - expect("{"); - var a = []; - while (!is("punc", "}")) { - if (is("eof")) unexpected(); - a.push(statement()); - } + function read_multiline_comment() { next(); - return a; + return with_eof_error("Unterminated multiline comment", function(){ + var i = find("*/", true), + text = S.text.substring(S.pos, i); + S.pos = i + 2; + S.line += text.split("\n").length - 1; + S.newline_before = S.newline_before || text.indexOf("\n") >= 0; + + // https://github.com/mishoo/UglifyJS/issues/#issue/100 + if (/^@cc_on/i.test(text)) { + warn("WARNING: at line " + S.line); + warn("*** Found \"conditional comment\": " + text); + warn("*** UglifyJS DISCARDS ALL COMMENTS. This means your code might no longer work properly in Internet Explorer."); + } + + return token("comment2", text, true); + }); }; - var switch_block_ = embed_tokens(curry(in_loop, function(){ - expect("{"); - var a = [], cur = null; - while (!is("punc", "}")) { - if (is("eof")) unexpected(); - if (is("keyword", "case")) { - next(); - cur = []; - a.push(new AST_Case({ expression: expression(), body: cur })); - expect(":"); - } - else if (is("keyword", "default")) { - next(); - expect(":"); - cur = []; - a.push(new AST_Default({ body: cur })); + function read_name() { + var backslash = false, name = "", ch, escaped = false, hex; + while ((ch = peek()) != null) { + if (!backslash) { + if (ch == "\\") escaped = backslash = true, next(); + else if (is_identifier_char(ch)) name += next(); + else break; } else { - if (!cur) unexpected(); - cur.push(statement()); + if (ch != "u") parse_error("Expecting UnicodeEscapeSequence -- uXXXX"); + ch = read_escaped_char(); + if (!is_identifier_char(ch)) parse_error("Unicode char: " + ch.charCodeAt(0) + " is not valid in identifier"); + name += ch; + backslash = false; } } - next(); - return new AST_SwitchBlock({ body: a }); - })); - - function try_() { - var body = new AST_Bracketed({ - body: block_() - }), bcatch = null, bfinally = null; - if (is("keyword", "catch")) { - next(); - expect("("); - var name = as_symbol(); - next(); - expect(")"); - bcatch = new AST_Catch({ - argname : name, - body : new AST_Bracketed({ body: block_() }) - }); - } - if (is("keyword", "finally")) { - next(); - bfinally = new AST_Finally({ body: block_() }); - } - if (!bcatch && !bfinally) - croak("Missing catch/finally blocks"); - return new AST_Try({ - btry : body, - bcatch : bcatch, - bfinally : bfinally - }); - }; - - function vardefs(no_in) { - var a = []; - for (;;) { - a.push(new AST_VarDef({ - start : S.token, - name : as_symbol(), - value : is("operator", "=") ? (next(), expression(false, no_in)) : null, - end : prev() - })); - if (!is("punc", ",")) - break; - next(); - } - return a; - }; - - var var_ = embed_tokens(function(no_in) { - return new AST_Var({ - definitions: vardefs(no_in) - }); - }); - - var const_ = embed_tokens(function() { - return new AST_Const({ - definitions: vardefs() - }); - }); - - var new_ = embed_tokens(function() { - var newexp = expr_atom(false), args; - if (is("punc", "(")) { - next(); - args = expr_list(")"); - } else { - args = []; - } - return subscripts(new AST_New({ - expression : newexp, - args : args - }), true); - }); - - function as_atom_node() { - var tok = S.token, ret; - switch (tok.type) { - case "name": - return as_symbol(); - case "num": - ret = new AST_Number({ start: tok, end: tok, value: tok.value }); - break; - case "string": - ret = new AST_String({ start: tok, end: tok, value: tok.value }); - break; - case "regexp": - ret = new AST_RegExp({ start: tok, end: tok, pattern: tok.value[0], mods: tok.value[1] }); - break; - case "atom": - switch (tok.value) { - case "false": - ret = new AST_False({ start: tok, end: tok }); - break; - case "true": - ret = new AST_True({ start: tok, end: tok }); - break; - case "null": - ret = new AST_Null({ start: tok, end: tok }); + if (HOP(KEYWORDS, name) && escaped) { + hex = name.charCodeAt(0).toString(16).toUpperCase(); + name = "\\u" + "0000".substr(hex.length) + hex + name.slice(1); + } + return name; + }; + + function read_regexp(regexp) { + return with_eof_error("Unterminated regular expression", function(){ + var prev_backslash = false, ch, in_class = false; + while ((ch = next(true))) if (prev_backslash) { + regexp += "\\" + ch; + prev_backslash = false; + } else if (ch == "[") { + in_class = true; + regexp += ch; + } else if (ch == "]" && in_class) { + in_class = false; + regexp += ch; + } else if (ch == "/" && !in_class) { break; + } else if (ch == "\\") { + prev_backslash = true; + } else { + regexp += ch; } - break; - } - next(); - return ret; + var mods = read_name(); + return token("regexp", [ regexp, mods ]); + }); }; - var expr_atom = function(allow_calls) { - if (is("operator", "new")) { - next(); - return new_(); - } - if (is("punc")) { - switch (S.token.value) { - case "(": - next(); - return subscripts(prog1(expression, curry(expect, ")")), allow_calls); - case "[": + function read_operator(prefix) { + function grow(op) { + if (!peek()) return op; + var bigger = op + peek(); + if (HOP(OPERATORS, bigger)) { next(); - return subscripts(array_(), allow_calls); - case "{": - next(); - return subscripts(object_(), allow_calls); - } - unexpected(); - } - if (is("keyword", "function")) { - var start = S.token; - next(); - var func = function_(false); - func.start = start; - func.end = prev(); - return subscripts(func, allow_calls); - } - if (HOP(ATOMIC_START_TOKEN, S.token.type)) { - return subscripts(as_atom_node(), allow_calls); - } - unexpected(); - }; - - function expr_list(closing, allow_trailing_comma, allow_empty) { - var first = true, a = []; - while (!is("punc", closing)) { - if (first) first = false; else expect(","); - if (allow_trailing_comma && is("punc", closing)) break; - if (is("punc", ",") && allow_empty) { - a.push(new AST_Undefined({ start: S.token, end: S.token })); + return grow(bigger); } else { - a.push(expression(false)); + return op; } - } - next(); - return a; - }; - - function array_() { - return new AST_Array({ - elements: expr_list("]", !exigent_mode, true) - }); + }; + return token("operator", grow(prefix || next())); }; - var object_ = embed_tokens(function() { - var first = true, a = []; - while (!is("punc", "}")) { - if (first) first = false; else expect(","); - if (!exigent_mode && is("punc", "}")) - // allow trailing comma - break; - var start = S.token; - var type = start.type; - var name = as_property_name(); - if (type == "name" && !is("punc", ":")) { - if (name.name == "get") { - a.push(new AST_ObjectGetter({ - start : start, - name : name, - func : function_(false), - end : prev() - })); - continue; - } - if (name.name == "set") { - a.push(new AST_ObjectSetter({ - start : start, - name : name, - func : function_(false), - end : prev() - })); - continue; - } - } - expect(":"); - a.push(new AST_ObjectKeyVal({ - start : start, - key : name, - value : expression(false), - end : prev() - })); - } + function handle_slash() { next(); - return new AST_Object({ properties: a }); - }); - - function as_property_name() { - switch (S.token.type) { - case "num": - case "string": - return as_symbol(true); - } - return as_name(); - }; - - function as_name() { - switch (S.token.type) { - case "name": - case "operator": - case "keyword": - case "atom": - return as_symbol(true); - default: - unexpected(); + var regex_allowed = S.regex_allowed; + switch (peek()) { + case "/": + S.comments_before.push(read_line_comment()); + S.regex_allowed = regex_allowed; + return next_token(); + case "*": + S.comments_before.push(read_multiline_comment()); + S.regex_allowed = regex_allowed; + return next_token(); } + return S.regex_allowed ? read_regexp("") : read_operator("/"); }; - function as_symbol(noerror) { - if (!noerror && !is("name")) croak("Name expected"); - var sym = new AST_Symbol({ - name : String(S.token.value), - start : S.token, - end : S.token - }); + function handle_dot() { next(); - return sym; + return is_digit(peek()) + ? read_num(".") + : token("punc", "."); }; - var subscripts = embed_tokens(function(expr, allow_calls) { - if (is("punc", ".")) { - next(); - return subscripts(new AST_Dot({ - expression : expr, - property : as_name() - }), allow_calls); - } - if (is("punc", "[")) { - next(); - return subscripts(new AST_Sub({ - expression : expr, - property : prog1(expression, curry(expect, "]")) - }), allow_calls); - } - if (allow_calls && is("punc", "(")) { - next(); - return subscripts(new AST_Call({ - expression : expr, - args : expr_list(")") - }), true); - } - return expr; - }); - - var maybe_unary = embed_tokens(function(allow_calls) { - if (is("operator") && HOP(UNARY_PREFIX, S.token.value)) { - return make_unary(AST_UnaryPrefix, - prog1(S.token.value, next), - maybe_unary(allow_calls)); - } - var val = expr_atom(allow_calls); - while (is("operator") && HOP(UNARY_POSTFIX, S.token.value) && !S.token.nlb) { - val = make_unary(AST_UnaryPostfix, S.token.value, val); - next(); - } - return val; - }); - - function make_unary(ctor, op, expr) { - if ((op == "++" || op == "--") && !is_assignable(expr)) - croak("Invalid use of " + op + " operator"); - return new ctor({ operator: op, expression: expr }); + function read_word() { + var word = read_name(); + return HOP(KEYWORDS_ATOM, word) + ? token("atom", word) + : !HOP(KEYWORDS, word) + ? token("name", word) + : HOP(OPERATORS, word) + ? token("operator", word) + : token("keyword", word); }; - var expr_op = embed_tokens(function(left, min_prec, no_in) { - var op = is("operator") ? S.token.value : null; - if (op == "in" && no_in) op = null; - var prec = op != null ? PRECEDENCE[op] : null; - if (prec != null && prec > min_prec) { - next(); - var right = expr_op(maybe_unary(true), prec, no_in); - return expr_op(new AST_Binary({ - left : left, - operator : op, - right : right - }), min_prec, no_in); + function with_eof_error(eof_error, cont) { + try { + return cont(); + } catch(ex) { + if (ex === EX_EOF) parse_error(eof_error); + else throw ex; } - return left; - }); - - function expr_ops(no_in) { - return expr_op(maybe_unary(true), 0, no_in); }; - var maybe_conditional = embed_tokens(function(no_in) { - var expr = expr_ops(no_in); - if (is("operator", "?")) { - next(); - var yes = expression(false); - expect(":"); - return new AST_Conditional({ - condition: expr, - consequent: yes, - alternative: expression(false, no_in) - }); - } - return expr; - }); - - function is_assignable(expr) { - if (!exigent_mode) return true; - switch (expr[0]+"") { - case "dot": - case "sub": - case "new": - case "call": - return true; - case "name": - return expr[1] != "this"; - } + function next_token(force_regexp) { + if (force_regexp != null) + return read_regexp(force_regexp); + skip_whitespace(); + start_token(); + var ch = peek(); + if (!ch) return token("eof"); + if (is_digit(ch)) return read_num(); + if (ch == '"' || ch == "'") return read_string(); + if (HOP(PUNC_CHARS, ch)) return token("punc", next()); + if (ch == ".") return handle_dot(); + if (ch == "/") return handle_slash(); + if (HOP(OPERATOR_CHARS, ch)) return read_operator(); + if (ch == "\\" || is_identifier_start(ch)) return read_word(); + parse_error("Unexpected character '" + ch + "'"); }; - var maybe_assign = embed_tokens(function(no_in) { - var left = maybe_conditional(no_in), val = S.token.value; - if (is("operator") && HOP(ASSIGNMENT, val)) { - if (is_assignable(left)) { - next(); - return new AST_Assign({ - left : left, - operator : ASSIGNMENT[val], - right : maybe_assign(no_in) - }); - } - croak("Invalid assignment"); - } - return left; - }); - - var expression = embed_tokens(function(commas, no_in) { - if (arguments.length == 0) - commas = true; - var expr = maybe_assign(no_in); - if (commas && is("punc", ",")) { - next(); - return new AST_Seq({ - first : expr, - second : expression(true, no_in) - }); - } - return expr; - }); - - function in_loop(cont) { - ++S.in_loop; - var ret = cont(); - --S.in_loop; - return ret; + next_token.context = function(nc) { + if (nc) S = nc; + return S; }; - return new AST_Toplevel({ - body: (function(a){ - while (!is("eof")) - a.push(statement()); - return a; - })([]) - }); + return next_token; }; @@ -737,16 +347,12 @@ console.time("parse"); var ast = parse(func.toString()); console.timeEnd("parse"); -console.log(ast); - - // var moo = 1, i, man = moo + bar; - // try { - // loop: while (/foobar/.test(bar)) { - // alert(bar); - // continue loop; - // } - // } finally { - // return crap; - // } +ast.walk({ + _visit: function(node, descend) { + console.log(node); + console.log(node.TYPE, ":", node.start.pos); + if (descend) descend.call(node); + } +}); diff --git a/lib/utils.js b/lib/utils.js index 01d477ef..4e4f58fc 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -33,6 +33,32 @@ function member(name, array) { return false; }; +function find_if(func, array) { + for (var i = 0, n = array.length; i < n; ++i) { + if (func(array[i])) + return array[i]; + } +}; + function HOP(obj, prop) { return Object.prototype.hasOwnProperty.call(obj, prop); }; + +function repeat_string(str, i) { + if (i <= 0) return ""; + if (i == 1) return str; + var d = repeat_string(str, i >> 1); + d += d; + if (i & 1) d += str; + return d; +}; + +function defaults(args, defs) { + var ret = {}; + if (args === true) + args = {}; + for (var i in defs) if (HOP(defs, i)) { + ret[i] = (args && HOP(args, i)) ? args[i] : defs[i]; + } + return ret; +}; |