aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2024-02-14 23:13:36 +0100
committerWojtek Kosior <koszko@koszko.org>2024-02-14 23:19:19 +0100
commitd761f509f2e0ea2503f7f75014e396cb6bb26802 (patch)
treeb36d65029f5305781e73ce629517103a2cbd179d
downloadtracifyjs-apprentice.tar.gz
tracifyjs-apprentice.zip
Initial commit.HEADapprentice
-rw-r--r--README.md219
-rw-r--r--trace-preamble.js146
-rw-r--r--trace-template-for-BINARY.js8
-rw-r--r--trace-template-for-CALL.js7
-rw-r--r--trace-template-for-CALL.old.js10
-rw-r--r--trace-template-for-LAZY_BINARY.js8
-rw-r--r--trace-template-for-PROPERTY_CALL.js9
7 files changed, 407 insertions, 0 deletions
diff --git a/README.md b/README.md
new file mode 100644
index 00000000..32cc99da
--- /dev/null
+++ b/README.md
@@ -0,0 +1,219 @@
+# TracifyJS
+
+This is a provisional tool for tracing the flow of data in JS programs. Made
+for personal reverse-engineering needs.
+
+This tool consists of 2 parts:
+
+- a modified variant of UglifyJS (currently based on version 3.14.2) that allows
+ certain expressions to be printed with user-specified templates; kept in the
+ `templatifyjs` branch
+- a set of JS code snippets/templates to use with the above; kept in the
+ counterintuitively-named `apprentice` branch together with this README
+
+As a provisional tool, TracifyJS can at any time be rearranged, moved entirely
+to another repo or replaced with something better. Please don't rely on
+anything being where it is now. Better make your own clone if you need it.
+
+# Working with templates
+
+The modified UglifyJS by itslef knows nothing about tracing values during
+execution of a script. It merely allows one to replace certain expressions
+(e.g. additions, function calls) with something else. For example, consider
+this sample script
+
+```javascript
+function fib(n, prev=1, prev_prev=0) {
+ if (n === 0)
+ return prev_prev;
+
+ if (n === 1)
+ return prev;
+
+ return fib(n - 1, prev + prev_prev, prev);
+}
+
+console.log(fib(15));
+```
+
+Assuming it's in `sample-script.js`, we can do
+
+```shell-script
+uglifyjs sample-script.js \
+ --beautify \
+ "template_for_CALL='(console.log(\"call at line \" + /*line*/), /*expression*//*parented_args*/)'"
+```
+
+it should print
+
+```javascript
+function fib(n, prev = 1, prev_prev = 0) {
+ if (n === 0) return prev_prev;
+ if (n === 1) return prev;
+ return (console.log("call at line " + 8), fib(n - 1, prev + prev_prev, prev));
+}
+
+console.log((console.log("call at line " + 11), fib(15)));
+```
+
+As you can see, we used a template to dictate the way UglifyJS outputs function
+calls. All occurances of `/*line*/` `/*expression*/`, and `/*parented_args*/`
+in a call template get substituted for their respective pieces of code.
+Template text outside `/*` and `*/` delimiters gets printed as is (although
+changes to the amount of whitespace might occur).
+
+Templates should be specified as options to `--beautify` (or to
+`--output-opts`). They should be given in a form of JavaScript sequence of
+assignments,
+e.g. `template_for_CALL='something',template_for_PROPERTY_CALL="something-else"`
+(this syntax is also used for other options in the upstream UglifyJS).
+
+There are a few more details. Firstly, each kind of template has its own set of
+permitted substitutions which includes at least `/*line*/`, `/*col*/` and `/**/`
+(empty substitution). With the above `CALL` template example we omitted (for
+brevity) the `/*optional*/`, `/*col*/` and `/**/` substitutions. Additionally,
+the `*/` delimiter can be replaced with `**/` to cause the text immediately
+after substitution to be ignored until either whitespace or slash `/` is
+encountered. This can be used as a hack to write templates that are still
+syntactically correct JavaScript so that your IDE highlights and indents them
+correctly. See the included templates for examples.
+
+Also, please keep in mind that the template engine isn't very smart when it
+comes to strings. If your template includes a string literal with braces or
+whitespace and you use an output option like `max_line_len`, things might break.
+This shouldn't be a problem most of the time, though.
+
+# Tracifying code
+
+The templates system allows one to dictate different types of code modifications
+without having to modify (and possibly repackage, depending on one's workflow)
+our modified UglifyJS. That's cool but if you're still reading, you probably
+expect to get some ready-to-use tracing tool, not just an (incomplete) JS
+expression templating system, right?
+
+The `trace-*.js` snippets in this repository are what you're looking for. They
+allow function calls, binary expressions and values used/produced by them to be
+logged in a variable called simply `tracing`.
+
+Here are some shell functions useful for passing the snippets to UglifyJS. Note
+that besides the templates we also specify a **preamble** — static piece of code
+to be included at the beginning of the output. Preamble is a feature of
+upstream UglifyJS.
+
+```shell-script
+TRACIFY_DIR="$(pwd)"
+
+function file_as_js_string {
+ printf "'%s'" \
+ "$(tr '\n' '\034' < "$1" |
+ sed 's/\\/\\\\/g;s/\o034/\\n/g;'"s/'/\\\\'/g;")"
+}
+
+function preamble_as_js_string {
+ file_as_js_string "$TRACIFY_DIR/trace-preamble.js"
+}
+
+function tracify_options {
+ printf 'preamble='
+ if [ "x" = "${NO_PREAMBLE:+x}" ]; then
+ printf "''"
+ else
+ preamble_as_js_string
+ fi
+
+ for TYPE in BINARY LAZY_BINARY CALL PROPERTY_CALL; do
+ printf ",template_for_%s=%s" \
+ "$TYPE" \
+ "$(file_as_js_string \
+ "$TRACIFY_DIR/trace-template-for-$TYPE.js")"
+ done
+}
+
+function tracify {
+ uglifyjs --beautify "$(tracify_options)" "$@"
+}
+```
+
+After defining these in your shell, you can do e.g.
+
+```shell-script
+tracify sample-script.js > sample-script-tracified.js
+```
+
+If you're evaluating multiple tracified scripts in the same scope, you'll want
+to only include the preamble in the first one. Using functions above, the rest
+could be tracified like this
+
+```shell-script
+NO_PREAMBLE=omit tracify another-script.js > another-script-tracified.js
+```
+
+# Evaluating and inspecting traces
+
+When reverse-engineering some website's logic, you'll most likely run the
+tracified code in the browser. How you do it is up to you. Pasting it
+manually, "serving" with Mitmproxy, substituting scripts using some quick and
+dirty browser extension… Either way, don't forget to update the integrity
+checksum if they are used :)
+
+Once the code has run, open JavaScript console in the context of that page. You
+can get the entire trace with
+
+```javascript
+tracing.get_log()
+```
+
+This will be a list of log entry objects, each looking like this
+
+```javascript
+{
+​​ op_name: "+"
+ line: 8
+ column: 22
+ ​​id: 71
+​​ parent_call: Object { op_name: "call", line: 8, column: 11, … }
+​​ left: 377
+​​ right: 233
+​​ result: 610
+}
+```
+
+The `left` and `right` properties are of course specific to binary operations.
+Log entries of function calls will not have these but they will instead have
+e.g. a `function_object` property. You get the point.
+
+Feel free to use JavaScript as an aid when inspecting traces
+
+```javascript
+tracing.get_log().filter(op => op.function_object?.name === "jA")
+```
+
+You also get a map of objects (operands, function arguments, results, etc.) to
+lists of log entries they appear in. You can use it like this
+
+```javascript
+tracing.get_objects().get(610) // How did 610 get produced?
+```
+
+Finally, your particular use case might require changes to the templates. Maybe
+the script you're RE'ing causes the page to get reloaded and you have no access
+to the `tracing` object? You might then want to modify the preamble to send the
+logs to your server, for example with the beacon API. Maybe the overhead of
+tracing is too big? Find out if you can limit the tracing to only a subset of
+expressions and still achieve the goal. Finally, avoiding name clashes with
+traced code and guarding against redefinitions of well-known
+properties/functions (think `Map.prototype.get = "trololo";`) are beyond the
+scope of this prototype. These should be easy to work around, though, if you're
+able to replay the browser session somehow.
+
+# Copying
+
+Code on this git branch is Copyright 2024 Wojtek Kosior
+<[koszko@koszko.org](mailto:koszko@koszko.org)>, released under the terms of
+Creative Commons Zero v1.0.
+
+This is public domain software made and released as a gift to the public. You
+can legally use it any way you want. However, I, the author, kindly request
+(without legal requirement) that you don't integrate it into any proprietary or
+otherwise harmful product. Please, make your derivative work free/libre
+software and a gift to the public as well!
diff --git a/trace-preamble.js b/trace-preamble.js
new file mode 100644
index 00000000..75c899d0
--- /dev/null
+++ b/trace-preamble.js
@@ -0,0 +1,146 @@
+const tracing = (() => {
+ const log = [];
+ const objects = new Map();
+
+ let call_stack_top = null;
+
+ function record_value(log_entry, name, value, as_array=false) {
+ log_entry[name] = value;
+
+ for (const _value of as_array? value : [value]) {
+ if (_value !== null && _value !== undefined
+ && _value !== false && _value !== true) {
+ const relevant_log_entries = objects.get(_value) || [];
+
+ relevant_log_entries.push(log_entry);
+ objects.set(_value, relevant_log_entries);
+ }
+ }
+
+ return value;
+ }
+
+ function with_log_entry(op_name, line, column, cb) {
+ const log_entry = {
+ op_name, line, column,
+ id: log.length,
+ parent_call: call_stack_top
+ };
+ const saved_stack_top = call_stack_top;
+
+ log.push(log_entry);
+
+ call_stack_top = log_entry;
+
+ try {
+ return cb(log_entry);
+ } catch(ex) {
+ record_value(log_entry, "error", ex);
+
+ throw ex;
+ } finally {
+ call_stack_top = saved_stack_top;
+ }
+ }
+
+ return {
+ get_objects: () => objects,
+
+ get_log: () => log,
+
+ record_binary: function(line, column, operation_name, operation,
+ left_producer, right_producer) {
+ function go(log_entry) {
+ const left = record_value(log_entry, "left", left_producer());
+
+ const right = record_value(log_entry,
+ "right",
+ right_producer());
+
+ const result = operation(left, right);
+
+ return record_value(log_entry, "result", result);
+ }
+
+ return with_log_entry(operation_name, line, column, go);
+ },
+
+ record_lazy_binary: function(line, column, operation_name, operation,
+ left_producer, right_producer) {
+ function go(log_entry) {
+ const left = record_value(log_entry, "left", left_producer());
+
+ const result = operation(left, right_producer);
+
+ return record_value(log_entry, "result", result);
+ }
+
+ return with_log_entry(operation_name, line, column, go);
+ },
+
+ record_call: function(line, column, function_producer, optional,
+ args_producer) {
+ function go(log_entry) {
+ const function_object = record_value(log_entry,
+ "function_object",
+ function_producer());
+
+ const record_args = () => record_value(log_entry,
+ "args",
+ args_producer(),
+ true);
+
+ const result = optional ?
+ function_object?.(...record_args()) :
+ function_object(...record_args());
+
+ return record_value(log_entry, "result", result);
+ }
+
+ return with_log_entry("call", line, column, go);
+ },
+
+ record_prop_call: function(line, column, this_producer,
+ property_optional, property_producer,
+ optional, args_producer) {
+ function go(log_entry) {
+ const bound_this = record_value(log_entry,
+ "bound_this",
+ this_producer());
+
+ const record_property = () => record_value(
+ log_entry, "property", property_producer()
+ );
+
+ const function_object =
+ record_value(log_entry,
+ "function_object",
+ property_optional ?
+ bound_this?.[record_property()] :
+ bound_this[record_property()]);
+
+ if ((function_object === null ||
+ function_object === undefined) &&
+ optional)
+ return undefined;
+
+ const record_args = () => record_value(log_entry,
+ "args",
+ args_producer(),
+ true);
+
+ const result = Function.apply.call(function_object,
+ bound_this,
+ record_args());
+
+ return record_value(log_entry, "result", result);
+ }
+
+ return with_log_entry(
+ `obj${property_optional}[prop]${optional}() call`,
+ line, column,
+ go
+ );
+ }
+ };
+})();
diff --git a/trace-template-for-BINARY.js b/trace-template-for-BINARY.js
new file mode 100644
index 00000000..bb423124
--- /dev/null
+++ b/trace-template-for-BINARY.js
@@ -0,0 +1,8 @@
+tracing.record_binary(
+ /*line**/LINE_NUMBER/**/,
+ /*col**/COL_NUMBER/**/,
+ "/*operator*/",
+ (a, b) => a /*operator**/+ b,
+ () => (/*left**/LEFT_OPERAND/**/),
+ () => (/*right**/RIGHT_OPERAND/**/)
+)
diff --git a/trace-template-for-CALL.js b/trace-template-for-CALL.js
new file mode 100644
index 00000000..f1592d3b
--- /dev/null
+++ b/trace-template-for-CALL.js
@@ -0,0 +1,7 @@
+tracing.record_call(
+ /*line**/LINE_NUMBER/**/,
+ /*col**/COL_NUMBER/**/,
+ () => (/*expression**/FUN_EXPR/**/),
+ "/*optional*/",
+ () => ((...args) => args)/*parented_args**/(...COMMA_SEPARATED_ARGS)
+)
diff --git a/trace-template-for-CALL.old.js b/trace-template-for-CALL.old.js
new file mode 100644
index 00000000..34d1be25
--- /dev/null
+++ b/trace-template-for-CALL.old.js
@@ -0,0 +1,10 @@
+(functionObject =>
+ ((...traced_args) => tracing.record_call_end(
+ tracing.record_call_start(
+ /*line**/LINE_NUMBER/**/,
+ /*col**/COL_NUMBER/**/,
+ ...traced_args),
+ () => (functionObject/*optional*/(...traced_args))
+ ))
+ /*parented_args**/(...COMMA_SEPARATED_ARGS)/**/)
+(/*expression**/FUN_EXPR/**/)
diff --git a/trace-template-for-LAZY_BINARY.js b/trace-template-for-LAZY_BINARY.js
new file mode 100644
index 00000000..256b5dcb
--- /dev/null
+++ b/trace-template-for-LAZY_BINARY.js
@@ -0,0 +1,8 @@
+tracing.record_lazy_binary(
+ /*line**/LINE_NUMBER/**/,
+ /*col**/COL_NUMBER/**/,
+ "/*operator*/",
+ (a, b) => a /*operator**/|| b(),
+ () => (/*left**/LEFT_OPERAND/**/),
+ () => (/*right**/RIGHT_OPERAND/**/)
+)
diff --git a/trace-template-for-PROPERTY_CALL.js b/trace-template-for-PROPERTY_CALL.js
new file mode 100644
index 00000000..a4471215
--- /dev/null
+++ b/trace-template-for-PROPERTY_CALL.js
@@ -0,0 +1,9 @@
+tracing.record_prop_call(
+ /*line**/LINE_NUMBER/**/,
+ /*col**/COL_NUMBER/**/,
+ () => (/*this_expression**/OBJ_EXPR/**/),
+ "/*property_optional*/",
+ () => (/*property_expression**/"PROP_NAME"/**/),
+ "/*optional*/",
+ () => ((...args) => args)/*parented_args**/(...COMMA_SEPARATED_ARGS)
+)