From 458e251d7ed6e9ac97849237264caa40a40c76b3 Mon Sep 17 00:00:00 2001 From: Mihai Bazon Date: Mon, 20 Aug 2012 17:19:30 +0300 Subject: added mangler and other stuff --- lib/ast.js | 34 +++++++-------- lib/output.js | 14 +++++-- lib/scope.js | 123 +++++++++++++++++++++++++++++++++++++++++++++++++++---- lib/utils.js | 19 ++++++++- tmp/test-node.js | 77 ++++++++++++++++++++-------------- 5 files changed, 206 insertions(+), 61 deletions(-) diff --git a/lib/ast.js b/lib/ast.js index c170b629..ac5010bb 100644 --- a/lib/ast.js +++ b/lib/ast.js @@ -44,6 +44,11 @@ var AST_Node = DEFNODE("Node", "start end", { } }, null); +AST_Node.warn_function = noop; +AST_Node.warn = function(txt, props) { + AST_Node.warn_function(string_template(txt, props)); +}; + var AST_Debugger = DEFNODE("Debugger", null, { $documentation: "Represents a debugger statement" }); @@ -121,6 +126,7 @@ var AST_For = DEFNODE("For", "init condition step", { if (this.init) this.init._walk(visitor); if (this.condition) this.condition._walk(visitor); if (this.step) this.step._walk(visitor); + this.body._walk(visitor); }); } }, AST_Statement); @@ -132,6 +138,7 @@ var AST_ForIn = DEFNODE("ForIn", "init name object", { if (this.init) this.init._walk(visitor); if (this.name) this.name._walk(visitor); if (this.object) this.object._walk(visitor); + this.body._walk(visitor); }); } }, AST_Statement); @@ -151,12 +158,15 @@ var AST_With = DEFNODE("With", "expression", { var AST_Scope = DEFNODE("Scope", null, { $documentation: "Base class for all statements introducing a lexical scope", initialize: function() { - this.labels = {}; - this.variables = {}; - this.functions = {}; - this.uses_with = false; - this.uses_eval = false; - this.parent_scope = null; + this.labels = {}; // map name to AST_Label (labels defined in this scope) + this.variables = {}; // map name to AST_SymbolVar (variables defined in this scope; includes functions) + this.functions = {}; // map name to AST_SymbolDefun (functions defined in this scope) + this.uses_with = false; // will be set to true if this or some nested scope uses the `with` statement + this.uses_eval = false; // will be set to true if this or nested scope uses the global `eval` + this.parent_scope = null; // the parent scope + this.enclosed = []; // a list of variables this or from outer scope(s) that are accessed from this or inner scopes + this.cname = -1; // the current index for mangling functions/variables + this.lname = -1; // the current index for mangling labels } }, AST_BlockStatement); @@ -496,17 +506,7 @@ var AST_Label = DEFNODE("Label", null, { }, AST_SymbolDeclaration); var AST_SymbolRef = DEFNODE("SymbolRef", "symbol", { - $documentation: "Reference to some symbol (not definition/declaration)", - reference: function(symbol) { - if (symbol) { - this.symbol = symbol; - symbol.references.push(this); - this.global = symbol.scope.parent_scope == null; - } else { - this.undeclared = true; - this.global = true; - } - } + $documentation: "Reference to some symbol (not definition/declaration)" }, AST_Symbol); var AST_LabelRef = DEFNODE("LabelRef", null, { diff --git a/lib/output.js b/lib/output.js index e14c649f..748921c7 100644 --- a/lib/output.js +++ b/lib/output.js @@ -804,12 +804,20 @@ function OutputStream(options) { DEFPRINT(AST_Symbol, function(self, output){ output.print_name(self.name); }); + DEFPRINT(AST_SymbolDeclaration, function(self, output){ + output.print_name(self.mangled_name || self.name); + }); + DEFPRINT(AST_SymbolRef, function(self, output){ + var def = self.symbol; + if (def) { + def.print(output); + } else { + output.print_name(self.name); + } + }); DEFPRINT(AST_This, function(self, output){ output.print("this"); }); - DEFPRINT(AST_Label, function(self, output){ - output.print_name(self.name); - }); DEFPRINT(AST_Constant, function(self, output){ output.print(self.getValue()); }); diff --git a/lib/scope.js b/lib/scope.js index 4c0aa105..d16ac8a0 100644 --- a/lib/scope.js +++ b/lib/scope.js @@ -21,18 +21,20 @@ AST_Scope.DEFMETHOD("figure_out_scope", function(){ s.uses_with = true; return; } - if (node instanceof AST_SymbolDeclaration && !scope.parent_scope) { - node.global = true; - } - if (node instanceof AST_SymbolVar) { - scope.def_variable(node); - } - else if (node instanceof AST_SymbolLambda) { + if (node instanceof AST_SymbolLambda) { scope.def_function(node); } else if (node instanceof AST_SymbolDefun) { + // Careful here, the scope where this should be defined is + // the parent scope. The reason is that we enter a new + // scope when we encounter the AST_Defun node (which is + // instanceof AST_Scope) but we get to the symbol a bit + // later. scope.parent_scope.def_function(node); } + else if (node instanceof AST_SymbolVar) { + scope.def_variable(node); + } else if (node instanceof AST_Label) { scope.def_label(node); } @@ -58,19 +60,72 @@ AST_Scope.DEFMETHOD("figure_out_scope", function(){ } else if (node instanceof AST_SymbolRef) { var sym = node.scope.find_variable(node); + node.reference(sym); if (!sym) { if (node.name == "eval") { for (var s = scope; s; s = s.parent_scope) s.uses_eval = true; } - } else { - node.reference(sym); } } }); this.walk(tw); }); +AST_Scope.DEFMETHOD("scope_warnings", function(options){ + options = defaults(options, { + undeclared : false, + assign_to_global : true + }); + var tw = new TreeWalker(function(node){ + if (options.undeclared + && node instanceof AST_SymbolRef + && node.undeclared) + { + // XXX: this also warns about JS standard names, + // i.e. Object, Array, parseInt etc. Should add a list of + // exceptions. + AST_Node.warn("Undeclared symbol: {name} [{line},{col}]", { + name: node.name, + line: node.start.line, + col: node.start.col + }); + } + if (options.assign_to_global + && node instanceof AST_Assign + && node.left instanceof AST_SymbolRef + && (node.left.undeclared + || (node.left.symbol.global + && node.left.scope !== node.left.symbol.scope))) + { + AST_Node.warn("{msg}: {name} [{line},{col}]", { + msg: node.left.undeclared ? "Accidental global?" : "Assignment to global", + name: node.left.name, + line: node.start.line, + col: node.start.col + }); + } + }); + this.walk(tw); +}); + +AST_SymbolRef.DEFMETHOD("reference", function(symbol) { + if (symbol) { + this.symbol = symbol; + var origin = symbol.scope; + symbol.references.push(this); + for (var s = this.scope; s; s = s.parent_scope) { + push_uniq(s.enclosed, symbol); + if (s === origin) break; + } + } else { + this.undeclared = true; + for (var s = this.scope; s; s = s.parent_scope) { + push_uniq(s.enclosed, this); + } + } +}); + AST_Scope.DEFMETHOD("find_variable", function(name){ if (name instanceof AST_Symbol) name = name.name; return this.variables[name] || @@ -90,6 +145,7 @@ AST_Scope.DEFMETHOD("def_function", function(symbol){ }); AST_Scope.DEFMETHOD("def_variable", function(symbol){ + symbol.global = !this.parent_scope; this.variables[symbol.name] = symbol; delete this.functions[symbol.name]; symbol.scope = this; @@ -99,3 +155,52 @@ AST_Scope.DEFMETHOD("def_label", function(symbol){ this.labels[symbol.name] = symbol; symbol.scope = this; }); + +AST_Scope.DEFMETHOD("next_mangled", function(for_label){ + var ext = this.enclosed, n = ext.length; + out: for (;;) { + var m = base54(for_label + ? (++this.lname) + : (++this.cname)); + + if (!is_identifier(m)) continue; // skip over "do" + + // labels are easy, since they can't be referenced from nested + // scopes. XXX: not sure that will be the case when the `let` + // keyword is to be supported. + if (for_label) return m; + + // if it's for functions or variables, we must ensure that the + // mangled name does not shadow a name from some parent scope + // that is referenced in this or in inner scopes. + for (var i = n; --i >= 0;) { + var sym = ext[i]; + var name = sym.mangled_name || sym.name; + if (m == name) continue out; + } + + return m; + } +}); + +AST_SymbolDeclaration.DEFMETHOD("mangle", function(){ + if (!this.global) + this.mangled_name = this.scope.next_mangled(false); +}); + +AST_Label.DEFMETHOD("mangle", function(){ + this.mangled_name = this.scope.next_mangled(true); +}); + +AST_Scope.DEFMETHOD("mangle_names", function(){ + var tw = new TreeWalker(function(node){ + // We only need to mangle declarations. Special logic wired + // into the code generator will display the mangled name if + // it's present (and for AST_SymbolRef-s it'll use the mangled + // name of the AST_SymbolDeclaration that it points to). + if (node instanceof AST_SymbolDeclaration) { + node.mangle(); + } + }); + this.walk(tw); +}); diff --git a/lib/utils.js b/lib/utils.js index b0020ac8..ab3bc3d6 100644 --- a/lib/utils.js +++ b/lib/utils.js @@ -98,7 +98,13 @@ var MAP = (function(){ return MAP; })(); -var BASE54_DIGITS = "etnrisouaflchpdvmgybwESxTNCkLAOM_DPHBjFIqRUzWXV$JKQGYZ0516372984"; +// XXX: currently this is optimized for jQuery, though I have the +// feeling it works well in general for many scripts (well, better +// than alphabetical order). It would be nice if we could adapt it to +// the currently running script. +// var BASE54_DIGITS = "etnrisouaflchpdvmgybwESxTNCkLAOM_DPHBjFIqRUzWXV$JKQGYZ0516372984"; + +var BASE54_DIGITS = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ$_0123456789"; function base54(num) { var ret = "", base = 54; @@ -109,3 +115,14 @@ function base54(num) { } while (num > 0); return ret; }; + +function push_uniq(array, el) { + if (array.indexOf(el) < 0) + array.push(el); +}; + +function string_template(text, props) { + return text.replace(/\{(.+?)\}/g, function(str, p){ + return props[p]; + }); +}; diff --git a/tmp/test-node.js b/tmp/test-node.js index 3c3d7b56..a80d0935 100755 --- a/tmp/test-node.js +++ b/tmp/test-node.js @@ -22,43 +22,58 @@ load_global("../lib/scope.js"); load_global("../lib/output.js"); + AST_Node.warn_function = function(txt) { + sys.debug(txt); + }; + /// var filename = process.argv[2]; - console.time("parse"); - var ast = parse(fs.readFileSync(filename, "utf8")); - console.timeEnd("parse"); + var code = fs.readFileSync(filename, "utf8"); + var ast = time_it("parse", function() { + return parse(code); + }); var stream = OutputStream({ beautify: true }); - console.time("figure_out_scope"); - ast.figure_out_scope(); - console.timeEnd("figure_out_scope"); - console.time("generate"); - ast.print(stream); - console.timeEnd("generate"); - //sys.puts(stream.get()); + time_it("scope", function(){ + ast.figure_out_scope(); + }); + time_it("mangle", function(){ + ast.mangle_names(); + }); + time_it("generate", function(){ + ast.print(stream); + }); + sys.puts(stream.get()); + // var w = new TreeWalker(function(node, descend){ + // if (node.start) { + // console.log(node.TYPE + " [" + node.start.line + ":" + node.start.col + "]"); + // } else { + // console.log(node.TYPE + " [NO START]"); + // } + // if (node instanceof AST_Scope) { + // if (node.uses_eval) console.log("!!! uses eval"); + // if (node.uses_with) console.log("!!! uses with"); + // } + // if (node instanceof AST_SymbolDeclaration) { + // console.log("--- declaration " + node.name + (node.global ? " [global]" : "")); + // } + // else if (node instanceof AST_SymbolRef) { + // console.log("--- reference " + node.name + " to " + (node.symbol ? node.symbol.name : "global")); + // if (node.symbol) { + // console.log(" declaration at: " + node.symbol.start.line + ":" + node.symbol.start.col); + // } + // } + // }); + // ast._walk(w); - var w = new TreeWalker(function(node, descend){ - if (node.start) { - console.log(node.TYPE + " [" + node.start.line + ":" + node.start.col + "]"); - } else { - console.log(node.TYPE + " [NO START]"); - } - if (node instanceof AST_Scope) { - if (node.uses_eval) console.log("!!! uses eval"); - if (node.uses_with) console.log("!!! uses with"); - } - if (node instanceof AST_SymbolDeclaration) { - console.log("--- declaration " + node.name + (node.global ? " [global]" : "")); - } - else if (node instanceof AST_SymbolRef) { - console.log("--- reference " + node.name + " to " + (node.symbol ? node.symbol.name : "global")); - if (node.symbol) { - console.log(" declaration at: " + node.symbol.start.line + ":" + node.symbol.start.col); - } - } - }); - ast._walk(w); + ast.scope_warnings(); + + function time_it(name, cont) { + var t1 = new Date().getTime(); + try { return cont(); } + finally { sys.debug("// " + name + ": " + ((new Date().getTime() - t1) / 1000).toFixed(3) + " sec."); } + }; })(); -- cgit v1.2.3