diff options
-rw-r--r-- | README.md | 219 | ||||
-rw-r--r-- | trace-preamble.js | 146 | ||||
-rw-r--r-- | trace-template-for-BINARY.js | 8 | ||||
-rw-r--r-- | trace-template-for-CALL.js | 7 | ||||
-rw-r--r-- | trace-template-for-CALL.old.js | 10 | ||||
-rw-r--r-- | trace-template-for-LAZY_BINARY.js | 8 | ||||
-rw-r--r-- | trace-template-for-PROPERTY_CALL.js | 9 |
7 files changed, 407 insertions, 0 deletions
diff --git a/README.md b/README.md new file mode 100644 index 00000000..32cc99da --- /dev/null +++ b/README.md @@ -0,0 +1,219 @@ +# TracifyJS + +This is a provisional tool for tracing the flow of data in JS programs. Made +for personal reverse-engineering needs. + +This tool consists of 2 parts: + +- a modified variant of UglifyJS (currently based on version 3.14.2) that allows + certain expressions to be printed with user-specified templates; kept in the + `templatifyjs` branch +- a set of JS code snippets/templates to use with the above; kept in the + counterintuitively-named `apprentice` branch together with this README + +As a provisional tool, TracifyJS can at any time be rearranged, moved entirely +to another repo or replaced with something better. Please don't rely on +anything being where it is now. Better make your own clone if you need it. + +# Working with templates + +The modified UglifyJS by itslef knows nothing about tracing values during +execution of a script. It merely allows one to replace certain expressions +(e.g. additions, function calls) with something else. For example, consider +this sample script + +```javascript +function fib(n, prev=1, prev_prev=0) { + if (n === 0) + return prev_prev; + + if (n === 1) + return prev; + + return fib(n - 1, prev + prev_prev, prev); +} + +console.log(fib(15)); +``` + +Assuming it's in `sample-script.js`, we can do + +```shell-script +uglifyjs sample-script.js \ + --beautify \ + "template_for_CALL='(console.log(\"call at line \" + /*line*/), /*expression*//*parented_args*/)'" +``` + +it should print + +```javascript +function fib(n, prev = 1, prev_prev = 0) { + if (n === 0) return prev_prev; + if (n === 1) return prev; + return (console.log("call at line " + 8), fib(n - 1, prev + prev_prev, prev)); +} + +console.log((console.log("call at line " + 11), fib(15))); +``` + +As you can see, we used a template to dictate the way UglifyJS outputs function +calls. All occurances of `/*line*/` `/*expression*/`, and `/*parented_args*/` +in a call template get substituted for their respective pieces of code. +Template text outside `/*` and `*/` delimiters gets printed as is (although +changes to the amount of whitespace might occur). + +Templates should be specified as options to `--beautify` (or to +`--output-opts`). They should be given in a form of JavaScript sequence of +assignments, +e.g. `template_for_CALL='something',template_for_PROPERTY_CALL="something-else"` +(this syntax is also used for other options in the upstream UglifyJS). + +There are a few more details. Firstly, each kind of template has its own set of +permitted substitutions which includes at least `/*line*/`, `/*col*/` and `/**/` +(empty substitution). With the above `CALL` template example we omitted (for +brevity) the `/*optional*/`, `/*col*/` and `/**/` substitutions. Additionally, +the `*/` delimiter can be replaced with `**/` to cause the text immediately +after substitution to be ignored until either whitespace or slash `/` is +encountered. This can be used as a hack to write templates that are still +syntactically correct JavaScript so that your IDE highlights and indents them +correctly. See the included templates for examples. + +Also, please keep in mind that the template engine isn't very smart when it +comes to strings. If your template includes a string literal with braces or +whitespace and you use an output option like `max_line_len`, things might break. +This shouldn't be a problem most of the time, though. + +# Tracifying code + +The templates system allows one to dictate different types of code modifications +without having to modify (and possibly repackage, depending on one's workflow) +our modified UglifyJS. That's cool but if you're still reading, you probably +expect to get some ready-to-use tracing tool, not just an (incomplete) JS +expression templating system, right? + +The `trace-*.js` snippets in this repository are what you're looking for. They +allow function calls, binary expressions and values used/produced by them to be +logged in a variable called simply `tracing`. + +Here are some shell functions useful for passing the snippets to UglifyJS. Note +that besides the templates we also specify a **preamble** — static piece of code +to be included at the beginning of the output. Preamble is a feature of +upstream UglifyJS. + +```shell-script +TRACIFY_DIR="$(pwd)" + +function file_as_js_string { + printf "'%s'" \ + "$(tr '\n' '\034' < "$1" | + sed 's/\\/\\\\/g;s/\o034/\\n/g;'"s/'/\\\\'/g;")" +} + +function preamble_as_js_string { + file_as_js_string "$TRACIFY_DIR/trace-preamble.js" +} + +function tracify_options { + printf 'preamble=' + if [ "x" = "${NO_PREAMBLE:+x}" ]; then + printf "''" + else + preamble_as_js_string + fi + + for TYPE in BINARY LAZY_BINARY CALL PROPERTY_CALL; do + printf ",template_for_%s=%s" \ + "$TYPE" \ + "$(file_as_js_string \ + "$TRACIFY_DIR/trace-template-for-$TYPE.js")" + done +} + +function tracify { + uglifyjs --beautify "$(tracify_options)" "$@" +} +``` + +After defining these in your shell, you can do e.g. + +```shell-script +tracify sample-script.js > sample-script-tracified.js +``` + +If you're evaluating multiple tracified scripts in the same scope, you'll want +to only include the preamble in the first one. Using functions above, the rest +could be tracified like this + +```shell-script +NO_PREAMBLE=omit tracify another-script.js > another-script-tracified.js +``` + +# Evaluating and inspecting traces + +When reverse-engineering some website's logic, you'll most likely run the +tracified code in the browser. How you do it is up to you. Pasting it +manually, "serving" with Mitmproxy, substituting scripts using some quick and +dirty browser extension… Either way, don't forget to update the integrity +checksum if they are used :) + +Once the code has run, open JavaScript console in the context of that page. You +can get the entire trace with + +```javascript +tracing.get_log() +``` + +This will be a list of log entry objects, each looking like this + +```javascript +{ + op_name: "+" + line: 8 + column: 22 + id: 71 + parent_call: Object { op_name: "call", line: 8, column: 11, … } + left: 377 + right: 233 + result: 610 +} +``` + +The `left` and `right` properties are of course specific to binary operations. +Log entries of function calls will not have these but they will instead have +e.g. a `function_object` property. You get the point. + +Feel free to use JavaScript as an aid when inspecting traces + +```javascript +tracing.get_log().filter(op => op.function_object?.name === "jA") +``` + +You also get a map of objects (operands, function arguments, results, etc.) to +lists of log entries they appear in. You can use it like this + +```javascript +tracing.get_objects().get(610) // How did 610 get produced? +``` + +Finally, your particular use case might require changes to the templates. Maybe +the script you're RE'ing causes the page to get reloaded and you have no access +to the `tracing` object? You might then want to modify the preamble to send the +logs to your server, for example with the beacon API. Maybe the overhead of +tracing is too big? Find out if you can limit the tracing to only a subset of +expressions and still achieve the goal. Finally, avoiding name clashes with +traced code and guarding against redefinitions of well-known +properties/functions (think `Map.prototype.get = "trololo";`) are beyond the +scope of this prototype. These should be easy to work around, though, if you're +able to replay the browser session somehow. + +# Copying + +Code on this git branch is Copyright 2024 Wojtek Kosior +<[koszko@koszko.org](mailto:koszko@koszko.org)>, released under the terms of +Creative Commons Zero v1.0. + +This is public domain software made and released as a gift to the public. You +can legally use it any way you want. However, I, the author, kindly request +(without legal requirement) that you don't integrate it into any proprietary or +otherwise harmful product. Please, make your derivative work free/libre +software and a gift to the public as well! diff --git a/trace-preamble.js b/trace-preamble.js new file mode 100644 index 00000000..75c899d0 --- /dev/null +++ b/trace-preamble.js @@ -0,0 +1,146 @@ +const tracing = (() => { + const log = []; + const objects = new Map(); + + let call_stack_top = null; + + function record_value(log_entry, name, value, as_array=false) { + log_entry[name] = value; + + for (const _value of as_array? value : [value]) { + if (_value !== null && _value !== undefined + && _value !== false && _value !== true) { + const relevant_log_entries = objects.get(_value) || []; + + relevant_log_entries.push(log_entry); + objects.set(_value, relevant_log_entries); + } + } + + return value; + } + + function with_log_entry(op_name, line, column, cb) { + const log_entry = { + op_name, line, column, + id: log.length, + parent_call: call_stack_top + }; + const saved_stack_top = call_stack_top; + + log.push(log_entry); + + call_stack_top = log_entry; + + try { + return cb(log_entry); + } catch(ex) { + record_value(log_entry, "error", ex); + + throw ex; + } finally { + call_stack_top = saved_stack_top; + } + } + + return { + get_objects: () => objects, + + get_log: () => log, + + record_binary: function(line, column, operation_name, operation, + left_producer, right_producer) { + function go(log_entry) { + const left = record_value(log_entry, "left", left_producer()); + + const right = record_value(log_entry, + "right", + right_producer()); + + const result = operation(left, right); + + return record_value(log_entry, "result", result); + } + + return with_log_entry(operation_name, line, column, go); + }, + + record_lazy_binary: function(line, column, operation_name, operation, + left_producer, right_producer) { + function go(log_entry) { + const left = record_value(log_entry, "left", left_producer()); + + const result = operation(left, right_producer); + + return record_value(log_entry, "result", result); + } + + return with_log_entry(operation_name, line, column, go); + }, + + record_call: function(line, column, function_producer, optional, + args_producer) { + function go(log_entry) { + const function_object = record_value(log_entry, + "function_object", + function_producer()); + + const record_args = () => record_value(log_entry, + "args", + args_producer(), + true); + + const result = optional ? + function_object?.(...record_args()) : + function_object(...record_args()); + + return record_value(log_entry, "result", result); + } + + return with_log_entry("call", line, column, go); + }, + + record_prop_call: function(line, column, this_producer, + property_optional, property_producer, + optional, args_producer) { + function go(log_entry) { + const bound_this = record_value(log_entry, + "bound_this", + this_producer()); + + const record_property = () => record_value( + log_entry, "property", property_producer() + ); + + const function_object = + record_value(log_entry, + "function_object", + property_optional ? + bound_this?.[record_property()] : + bound_this[record_property()]); + + if ((function_object === null || + function_object === undefined) && + optional) + return undefined; + + const record_args = () => record_value(log_entry, + "args", + args_producer(), + true); + + const result = Function.apply.call(function_object, + bound_this, + record_args()); + + return record_value(log_entry, "result", result); + } + + return with_log_entry( + `obj${property_optional}[prop]${optional}() call`, + line, column, + go + ); + } + }; +})(); diff --git a/trace-template-for-BINARY.js b/trace-template-for-BINARY.js new file mode 100644 index 00000000..bb423124 --- /dev/null +++ b/trace-template-for-BINARY.js @@ -0,0 +1,8 @@ +tracing.record_binary( + /*line**/LINE_NUMBER/**/, + /*col**/COL_NUMBER/**/, + "/*operator*/", + (a, b) => a /*operator**/+ b, + () => (/*left**/LEFT_OPERAND/**/), + () => (/*right**/RIGHT_OPERAND/**/) +) diff --git a/trace-template-for-CALL.js b/trace-template-for-CALL.js new file mode 100644 index 00000000..f1592d3b --- /dev/null +++ b/trace-template-for-CALL.js @@ -0,0 +1,7 @@ +tracing.record_call( + /*line**/LINE_NUMBER/**/, + /*col**/COL_NUMBER/**/, + () => (/*expression**/FUN_EXPR/**/), + "/*optional*/", + () => ((...args) => args)/*parented_args**/(...COMMA_SEPARATED_ARGS) +) diff --git a/trace-template-for-CALL.old.js b/trace-template-for-CALL.old.js new file mode 100644 index 00000000..34d1be25 --- /dev/null +++ b/trace-template-for-CALL.old.js @@ -0,0 +1,10 @@ +(functionObject => + ((...traced_args) => tracing.record_call_end( + tracing.record_call_start( + /*line**/LINE_NUMBER/**/, + /*col**/COL_NUMBER/**/, + ...traced_args), + () => (functionObject/*optional*/(...traced_args)) + )) + /*parented_args**/(...COMMA_SEPARATED_ARGS)/**/) +(/*expression**/FUN_EXPR/**/) diff --git a/trace-template-for-LAZY_BINARY.js b/trace-template-for-LAZY_BINARY.js new file mode 100644 index 00000000..256b5dcb --- /dev/null +++ b/trace-template-for-LAZY_BINARY.js @@ -0,0 +1,8 @@ +tracing.record_lazy_binary( + /*line**/LINE_NUMBER/**/, + /*col**/COL_NUMBER/**/, + "/*operator*/", + (a, b) => a /*operator**/|| b(), + () => (/*left**/LEFT_OPERAND/**/), + () => (/*right**/RIGHT_OPERAND/**/) +) diff --git a/trace-template-for-PROPERTY_CALL.js b/trace-template-for-PROPERTY_CALL.js new file mode 100644 index 00000000..a4471215 --- /dev/null +++ b/trace-template-for-PROPERTY_CALL.js @@ -0,0 +1,9 @@ +tracing.record_prop_call( + /*line**/LINE_NUMBER/**/, + /*col**/COL_NUMBER/**/, + () => (/*this_expression**/OBJ_EXPR/**/), + "/*property_optional*/", + () => (/*property_expression**/"PROP_NAME"/**/), + "/*optional*/", + () => ((...args) => args)/*parented_args**/(...COMMA_SEPARATED_ARGS) +) |