/**
* Hachette modify HTML document as it loads and reconstruct HTML code from it
*
* Copyright (C) 2021 Wojtek Kosior
* Redistribution terms are gathered in the `copyright' file.
*/
/*
* IMPORTS_START
* IMPORT gen_nonce
* IMPORT csp_rule
* IMPORT is_csp_header_name
* IMPORT sanitize_csp_header
* IMPORT sanitize_attributes
* IMPORTS_END
*/
/*
* Functions that sanitize elements. The script blocking measures are, when
* possible, going to be applied together with CSP rules injected using
* webRequest.
*/
const blocked = "blocked";
function block_attribute(node, attr)
{
/*
* Disabling attributed this way allows them to still be relatively
* easily accessed in case they contain some useful data.
*/
const construct_name = [attr];
while (node.hasAttribute(construct_name.join("")))
construct_name.unshift(blocked);
while (construct_name.length > 1) {
construct_name.shift();
const name = construct_name.join("");
node.setAttribute(`${blocked}-${name}`, node.getAttribute(name));
}
node.removeAttribute(attr);
}
function sanitize_script(script, data)
{
if (script.getAttribute("data-hachette-deleteme") === data.policy.nonce) {
script.remove();
script.hachette_deleted = true;
script.hachette_ignore = true;
}
if (data.policy.allow)
return;
block_attribute(script, "type");
script.setAttribute("type", "application/json");
}
function inject_csp(head, data)
{
if (data.policy.allow)
return;
const meta = document.createElement("meta");
meta.setAttribute("http-equiv", "Content-Security-Policy");
meta.setAttribute("content", csp_rule(data.policy.nonce));
meta.hachette_ignore = true;
head.prepend(meta);
data.new_added.unshift([meta, head]);
}
function sanitize_http_equiv_csp_rule(meta, data)
{
const http_equiv = meta.getAttribute("http-equiv");
const value = meta.content;
if (!value || !is_csp_header_name(http_equiv, !data.policy.allow))
return;
block_attribute(meta, "content");
if (data.policy.allow || is_csp_header_name(http_equiv, false))
meta.content = sanitize_csp_header({value}, data.policy).value;
}
function sanitize_node(node, data)
{
if (node.tagName === "SCRIPT")
sanitize_script(node, data);
if (node.tagName === "HEAD")
inject_csp(node, data);
if (node.tagName === "META")
sanitize_http_equiv_csp_rule(node, data);
if (!data.policy.allow)
sanitize_attributes(node, data);
}
/*
* Instead of calling writer directly with multiple small chunks of reconstruced
* HTML code, we utilize `setTimeout()' to only have it called once,
* asynchronously.
*/
function do_write_callback(data)
{
data.writer(data.chunks.join(""));
data.chunks = [];
if (data.finished && data.finisher)
data.finisher();
}
function do_write(chunk, data)
{
data.chunks.push(chunk);
clearTimeout(data.write_timeout);
data.write_timeout = setTimeout(() => do_write_callback(data), 0);
}
const serializer = new XMLSerializer();
function start_serializing_node(node, data)
{
node.hachette_started = true;
if (!data.writer)
return;
const clone = node.cloneNode(false);
clone.textContent = data.uniq;
do_write(data.uniq_reg.exec(clone.outerHTML)[1], data);
}
function finish_serializing_node(node, data)
{
const nodes_to_process = [node];
while (true) {
node = nodes_to_process.pop();
if (!node)
break;
nodes_to_process.push(node, node.hachette_last_added);
}
while (nodes_to_process.length > 0) {
const node = nodes_to_process.pop();
node.remove();
node.hachette_ignore = true;
if (!data.writer)
continue;
if (node.hachette_started) {
node.textContent = data.uniq;
do_write(data.uniq_reg.exec(node.outerHTML)[2], data);
continue;
}
do_write(node.outerHTML || serializer.serializeToString(node), data);
}
}
function process_initial_nodes(node, data)
{
if (data.processed_initial_nodes)
return;
data.processed_initial_nodes = true;
start_serializing_node(data.html_root, data);
const new_added = [];
const nodes_to_process = [data.html_root];
let i = 0;
while (nodes_to_process.length > 0) {
let current = nodes_to_process.shift();
if (current.firstChild) {
if (current.firstChild === node)
break;
nodes_to_process.unshift(current.firstChild, current);
new_added.push([current.firstChild, current]);
continue;
}
while (current && !current.nextSibling)
current = nodes_to_process.shift();
if (!current || current.nextSibling === node)
break;
nodes_to_process.unshift(current.nextSibling);
new_added.push([current.nextSibling, nodes_to_process[1]]);
}
data.new_added.unshift(...new_added);
}
/*
* Important! Due to some weirdness node.parentElement is not alway correct
* in MutationRecords under Chromium. Track node relations manually.
*/
function handle_added_node(node, true_parent, data)
{
/*
* Functions we call here might cause new nodes to be injected or found
* that require processing before the one we got in function argument.
* We rely on those functions putting the node(s) they create/find at the
* very beginning of the `new_added' queue and (for created nodes) setting
* their `hachette_ignore' property, based on which their MutationRecord
* will not be processed. A function can also mark a node already in the
* `new_added' queue as not eligible for processing by setting its
* `hachette_deleted' property.
*/
process_initial_nodes(node, data);
data.new_added.push([node, true_parent]);
while (data.new_added.length > 0) {
[node, true_parent] = data.new_added.shift();
if (true_parent.hachette_deleted)
node.hachette_deleted = true;
if (node.hachette_deleted)
continue;
if (!true_parent.hachette_started)
start_serializing_node(true_parent, data)
if (!node.hachette_ignore)
sanitize_node(node, data);
if (node.hachette_deleted)
continue;
if (data.node_eater)
data.node_eater(node, true_parent);
finish_serializing_node(true_parent.hachette_last_added, data);
true_parent.hachette_last_added = node;
}
}
function handle_mutation(mutations, data)
{
/*
* Chromium: for an unknown reason mutation.target is not always the same as
* node.parentElement. The former is the correct one.
*/
for (const mutation of mutations) {
for (const node of mutation.addedNodes) {
/* Check for nodes added by ourselves. */
if (mutation.target.hachette_ignore)
node.hachette_ignore = true;
if (node.hachette_ignore)
continue;
handle_added_node(node, mutation.target, data);
}
}
}
function finish_processing(data)
{
process_initial_nodes(undefined, data);
/*
* The `finisher' callback should be called, if provided. Normally our
* function that performs the last write does it after seeing `finished'
* set to `true'. If, however, there's no `writer' callback and hence no
* writes to perform, we need to take care of calling `finisher' here.
*/
data.finished = true;
handle_mutation(data.observer.takeRecords(), data);
data.observer.disconnect();
/*
* Additional whitespace that was after `