diff options
Diffstat (limited to 'src/hydrilla/proxy/policies')
18 files changed, 2011 insertions, 0 deletions
diff --git a/src/hydrilla/proxy/policies/__init__.py b/src/hydrilla/proxy/policies/__init__.py new file mode 100644 index 0000000..93c3d4f --- /dev/null +++ b/src/hydrilla/proxy/policies/__init__.py @@ -0,0 +1,18 @@ +# SPDX-License-Identifier: CC0-1.0 + +# Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +# +# Available under the terms of Creative Commons Zero v1.0 Universal. + +from .base import PolicyPriority, Policy, PolicyFactory, response_work_data + +from .payload import PayloadPolicyFactory + +from .payload_resource import PayloadResourcePolicyFactory + +from .rule import RuleBlockPolicyFactory, RuleAllowPolicyFactory + +from .misc import FallbackAllowPolicy, FallbackBlockPolicy, ErrorBlockPolicy, \ + MitmItPagePolicyFactory + +from .web_ui import WebUIMainPolicyFactory, WebUILandingPolicyFactory diff --git a/src/hydrilla/proxy/policies/base.py b/src/hydrilla/proxy/policies/base.py new file mode 100644 index 0000000..967e2c4 --- /dev/null +++ b/src/hydrilla/proxy/policies/base.py @@ -0,0 +1,363 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Base defintions for policies for altering HTTP requests. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +..... +""" + +import enum +import re +import threading +import dataclasses as dc +import typing as t + +from abc import ABC, abstractmethod +from hashlib import sha256 +from base64 import b64encode + +import jinja2 + +from immutables import Map + +from ... import translations +from ... import url_patterns +from ... import common_jinja_templates +from .. import state +from .. import http_messages +from .. import csp + + +_info_loader = jinja2.PackageLoader( + __package__, + package_path = 'info_pages_templates' +) +_combined_loader = common_jinja_templates.combine_with_loaders([_info_loader]) +_jinja_info_env = jinja2.Environment( + loader = _combined_loader, + autoescape = jinja2.select_autoescape(['html.jinja']), + lstrip_blocks = True, + extensions = ['jinja2.ext.i18n', 'jinja2.ext.do'] +) +_jinja_info_env.globals['url_patterns'] = url_patterns +_jinja_info_lock = threading.Lock() + + +_jinja_script_loader = jinja2.PackageLoader( + __package__, + package_path = 'injectable_scripts' +) +_jinja_script_env = jinja2.Environment( + loader = _jinja_script_loader, + autoescape = False, + lstrip_blocks = True, + extensions = ['jinja2.ext.do'] +) +_jinja_script_lock = threading.Lock() + +def get_script_template(template_file_name: str) -> jinja2.Template: + with _jinja_script_lock: + return _jinja_script_env.get_template(template_file_name) + + +response_work_data = threading.local() + +def response_nonce() -> str: + """ + When called multiple times during consume_response(), each time returns the + same unpredictable string unique to this response. The string is used as a + nonce for script elements. + """ + return response_work_data.nonce + + +class PolicyPriority(int, enum.Enum): + """....""" + _ONE = 1 + _TWO = 2 + _THREE = 3 + + +class MsgProcessOpt(enum.Enum): + """....""" + MUST = True + MUST_NOT = False + + +MessageInfo = t.Union[ + http_messages.RequestInfo, + http_messages.ResponseInfo +] + + +# We're doing *very* simple doctype matching for now. If a site wanted, it could +# trick us into getting this wrong. +doctype_re = re.compile(r'^\s*<!doctype[^>]*>', re.IGNORECASE) + + +UTF8_BOM = b'\xEF\xBB\xBF' +BOMs = ( + (UTF8_BOM, 'utf-8'), + (b'\xFE\xFF', 'utf-16be'), + (b'\xFF\xFE', 'utf-16le') +) + + +# mypy needs to be corrected: +# https://stackoverflow.com/questions/70999513/conflict-between-mix-ins-for-abstract-dataclasses/70999704#70999704 +@dc.dataclass(frozen=True) # type: ignore[misc] +class Policy(ABC): + _process_request: t.ClassVar[t.Optional[MsgProcessOpt]] = None + _process_response: t.ClassVar[t.Optional[MsgProcessOpt]] = None + anticache: t.ClassVar[bool] = True + + priority: t.ClassVar[PolicyPriority] + + haketilo_settings: state.HaketiloGlobalSettings + + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_jsallowed + + def should_process_request( + self, + request_info: http_messages.BodylessRequestInfo + ) -> bool: + return self._process_request == MsgProcessOpt.MUST + + def should_process_response( + self, + request_info: http_messages.RequestInfo, + response_info: http_messages.AnyResponseInfo + ) -> bool: + if self._process_response is not None: + return self._process_response.value + + return (self.current_popup_settings.popup_enabled and + http_messages.is_likely_a_page(request_info, response_info)) + + def _get_info_template(self, template_file_name: str) -> jinja2.Template: + with _jinja_info_lock: + chosen_locale = self.haketilo_settings.locale + if chosen_locale not in translations.supported_locales: + chosen_locale = None + + if chosen_locale is None: + chosen_locale = translations.default_locale + + trans = translations.translation(chosen_locale) + _jinja_info_env.install_gettext_translations(trans) # type: ignore + return _jinja_info_env.get_template(template_file_name) + + + def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Union[t.Sequence[str], t.Literal['all']]: + return () + + def _csp_to_add(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Mapping[str, t.Sequence[str]]: + return Map() + + def _csp_to_extend(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Mapping[str, t.Sequence[str]]: + if (self.current_popup_settings.popup_enabled and + http_info.is_likely_a_page): + nonce_source = f"'nonce-{response_nonce()}'" + directives = ( + 'script-src', + 'script-src-elem', + 'style-src', + 'frame-src' + ) + return dict((directive, [nonce_source]) for directive in directives) + else: + return Map() + + def _modify_response_headers(self, http_info: http_messages.FullHTTPInfo) \ + -> http_messages.IHeaders: + csp_to_clear = self._csp_to_clear(http_info) + csp_to_add = self._csp_to_add(http_info) + csp_to_extend = self._csp_to_extend(http_info) + + if len(csp_to_clear) + len(csp_to_extend) + len(csp_to_add) == 0: + return http_info.response_info.headers + + return csp.modify( + headers = http_info.response_info.headers, + clear = csp_to_clear, + add = csp_to_add, + extend = csp_to_extend + ) + + def _modify_response_document( + self, + http_info: http_messages.FullHTTPInfo, + encoding: t.Optional[str] + ) -> t.Union[str, bytes]: + popup_settings = self.current_popup_settings + + if popup_settings.popup_enabled: + nonce = response_nonce() + + popup_page = self.make_info_page(http_info) + if popup_page is None: + template = self._get_info_template( + 'special_page_info.html.jinja' + ) + popup_page = template.render( + url = http_info.request_info.url.orig_url + ) + + template = get_script_template('popup.js.jinja') + popup_script = template.render( + popup_page_b64 = b64encode(popup_page.encode()).decode(), + nonce_b64 = b64encode(nonce.encode()).decode(), + # TODO: add an option to configure popup style in the web UI. + # Then start passing the real style value. + #popup_style = popup_settings.style.value + popup_style = 'D' + ) + + if encoding is None: + encoding = 'utf-8' + + body_bytes = http_info.response_info.body + body = body_bytes.decode(encoding, errors='replace') + + match = doctype_re.match(body) + doctype_decl_len = 0 if match is None else match.end() + + dotype_decl = body[0:doctype_decl_len] + doc_rest = body[doctype_decl_len:] + script_tag = f'<script nonce="{nonce}">{popup_script}</script>' + + return dotype_decl + script_tag + doc_rest + else: + return http_info.response_info.body + + def _modify_response_body(self, http_info: http_messages.FullHTTPInfo) \ + -> bytes: + if not http_info.is_likely_a_page: + return http_info.response_info.body + + data = http_info.response_info.body + + _, encoding = http_info.response_info.deduce_content_type() + + # A UTF BOM overrides encoding specified by the header. + for bom, encoding_name in BOMs: + if data.startswith(bom): + encoding = encoding_name + + new_data = self._modify_response_document(http_info, encoding) + + if isinstance(new_data, str): + # Appending a three-byte Byte Order Mark (BOM) will force the + # browser to decode this as UTF-8 regardless of the 'Content-Type' + # header. See + # https://www.w3.org/International/tests/repository/html5/the-input-byte-stream/results-basics#precedence + new_data = UTF8_BOM + new_data.encode() + + return new_data + + def consume_request(self, request_info: http_messages.RequestInfo) \ + -> t.Optional[MessageInfo]: + # We're not using @abstractmethod because not every Policy needs it and + # we don't want to force child classes into implementing dummy methods. + raise NotImplementedError( + 'This kind of policy does not consume requests.' + ) + + def consume_response(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[http_messages.ResponseInfo]: + try: + new_headers = self._modify_response_headers(http_info) + new_body = self._modify_response_body(http_info) + except Exception as e: + # In the future we might want to actually describe eventual errors. + # For now, we're just printing the stack trace. + import traceback + + error_info_list = traceback.format_exception( + type(e), + e, + e.__traceback__ + ) + + return http_messages.ResponseInfo.make( + status_code = 500, + headers = (('Content-Type', 'text/plain; charset=utf-8'),), + body = '\n'.join(error_info_list).encode() + ) + + if (new_headers is http_info.response_info.headers and + new_body is http_info.response_info.body): + return None + + return dc.replace( + http_info.response_info, + headers = new_headers, + body = new_body + ) + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + return None + + +@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] +class PolicyFactory(ABC): + """....""" + builtin: bool + + @abstractmethod + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> t.Optional[Policy]: + """....""" + ... + + def __lt__(self, other: 'PolicyFactory'): + """....""" + return sorting_keys.get(self.__class__.__name__, 999) < \ + sorting_keys.get(other.__class__.__name__, 999) + +sorting_order = ( + 'WebUIMainPolicyFactory', + 'WebUILandingPolicyFactory', + + 'MitmItPagePolicyFactory', + + 'PayloadResourcePolicyFactory', + + 'PayloadPolicyFactory', + + 'RuleBlockPolicyFactory', + 'RuleAllowPolicyFactory', + + 'FallbackPolicyFactory' +) + +sorting_keys = Map((cls, name) for name, cls in enumerate(sorting_order)) diff --git a/src/hydrilla/proxy/policies/info_pages_templates/info_base.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/info_base.html.jinja new file mode 100644 index 0000000..9268c92 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/info_base.html.jinja @@ -0,0 +1,97 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later OR CC-BY-SA-4.0 + +Proxy info page with information about other page - base template. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> + +Dual licensed under +* GNU General Public License v3.0 or later and +* Creative Commons Attribution Share Alike 4.0 International. + +You can choose to use either of these licenses or both. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's licenses. Although I request that you do not make use of this +code in a proprietary work, I am not going to enforce this in court. +#} +{% extends "base.html.jinja" %} + +{% macro hkt_doc_link(page_name) %} + {% set doc_url = 'https://hkt.mitm.it/doc/' ~ page_name %} + {{ doc_link(doc_url) }} +{% endmacro %} + +{% block style %} + {{ super() }} + + #main { + padding: 0 10px; + } +{% endblock %} + +{% block head %} + {{ super() }} + + <title>{{ _('info.base.title') }}</title> +{% endblock head %} + +{% block main %} + <h3> + {{ _('info.base.heading.page_info') }} + {{ hkt_doc_link('popup') }} + </h3> + + {{ label(_('info.base.page_url_label')) }} + + <p> + {{ url }} + </p> + + <div class="horizontal-separator"></div> + + {% call label(_('info.base.page_policy_label')) %} + {{ hkt_doc_link('policy_selection') }} + {% endcall %} + + <p class="has-colored-links"> + {% block site_policy required %}{% endblock %} + </p> + + {% block main_rest %} + {% endblock %} + + {% block options %} + <div class="horizontal-separator"></div> + + {{ label(_('info.base.more_config_options_label')) }} + + {% set site_pattern = url_patterns.pattern_for_domain(url)|urlencode %} + {% set page_pattern = url_patterns.normalize_pattern(url)|urlencode %} + + {% + for pattern, hkt_url_fmt, but_text in [ + (site_pattern, 'https://hkt.mitm.it/rules/viewbypattern?pattern={}', + _('info.base.this_site_script_blocking_button')), + + (site_pattern, 'https://hkt.mitm.it/import?pattern={}', + _('info.base.this_site_payload_button')), + + (page_pattern, 'https://hkt.mitm.it/rules/viewbypattern?pattern={}', + _('info.base.this_page_script_blocking_button')), + + (page_pattern, 'https://hkt.mitm.it/import?pattern={}', + _('info.base.this_page_payload_button')) + ] + %} + {% set hkt_url = hkt_url_fmt.format(pattern) %} + {% set classes = "green-button block-with-bottom-margin" %} + <a class="{{classes}}" href="{{ hkt_url }}" target="_blank"> + {{ but_text }} + </a> + {% endfor %} + {% endblock options %} +{% endblock main %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_error_blocked_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_error_blocked_info.html.jinja new file mode 100644 index 0000000..181b219 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_error_blocked_info.html.jinja @@ -0,0 +1,22 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page with JS blocked after an error. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "info_base.html.jinja" %} + +{% block site_policy %} + {{ _('info.js_error_blocked.html')|safe }} +{% endblock %} + +{% block main_rest %} + {% if settings.advanced_user %} + {{ label(_('info.js_error_blocked.stacktrace')) }} + + {% call verbatim() %}{{ traceback }}{% endcall %} + {% endif %} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_allowed_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_allowed_info.html.jinja new file mode 100644 index 0000000..71f3151 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_allowed_info.html.jinja @@ -0,0 +1,14 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page with JS allowed by default policy. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "info_base.html.jinja" %} + +{% block site_policy %} + {{ _('info.js_fallback_allowed') }} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_blocked_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_blocked_info.html.jinja new file mode 100644 index 0000000..1b4ad51 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_fallback_blocked_info.html.jinja @@ -0,0 +1,15 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page with JS blocked by default policy. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "info_base.html.jinja" %} + +{% block site_policy %} + {{ _('info.js_fallback_blocked') }} + {{ hkt_doc_link('script_blocking') }} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_rule_allowed_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_allowed_info.html.jinja new file mode 100644 index 0000000..fe74602 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_allowed_info.html.jinja @@ -0,0 +1,14 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page with JS allowed by a rule. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "js_rule_info.html.jinja" %} + +{% block site_policy %} + {{ format_html_with_rule_url(_('info.js_allowed.html.rule{url}_is_used')) }} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_rule_blocked_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_blocked_info.html.jinja new file mode 100644 index 0000000..3f396a8 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_blocked_info.html.jinja @@ -0,0 +1,15 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page with JS blocked by a rule. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "js_rule_info.html.jinja" %} + +{% block site_policy %} + {{ format_html_with_rule_url(_('info.js_blocked.html.rule{url}_is_used')) }} + {{ hkt_doc_link('script_blocking') }} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/js_rule_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_info.html.jinja new file mode 100644 index 0000000..1c0c662 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/js_rule_info.html.jinja @@ -0,0 +1,39 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later OR CC-BY-SA-4.0 + +Proxy info page with information about page with JS blocked or allowed by a +rule - template for firther extending. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> + +Dual licensed under +* GNU General Public License v3.0 or later and +* Creative Commons Attribution Share Alike 4.0 International. + +You can choose to use either of these licenses or both. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's licenses. Although I request that you do not make use of this +code in a proprietary work, I am not going to enforce this in court. +#} +{% extends "info_base.html.jinja" %} + +{% macro format_html_with_rule_url(msg_fmt) %} + {% set url_fmt = 'https://hkt.mitm.it/rules/viewbypattern?pattern={pattern}' %} + {{ msg_fmt.format(url=url_fmt.format(pattern=pattern)|e)|safe }} +{% endmacro %} + +{% block main_rest %} + <div class="horizontal-separator"></div> + + {% call label(_('info.rule.matched_pattern_label')) %} + {{ hkt_doc_link('url_patterns') }} + {% endcall %} + + <p> + {{ pattern }} + </p> +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/payload_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/payload_info.html.jinja new file mode 100644 index 0000000..e66e685 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/payload_info.html.jinja @@ -0,0 +1,50 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later OR CC-BY-SA-4.0 + +Proxy info page with information about page with payload. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> + +Dual licensed under +* GNU General Public License v3.0 or later and +* Creative Commons Attribution Share Alike 4.0 International. + +You can choose to use either of these licenses or both. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's licenses. Although I request that you do not make use of this +code in a proprietary work, I am not going to enforce this in court. +#} +{% extends "info_base.html.jinja" %} + +{% macro format_html_with_package_identifier_and_url(msg_fmt) %} + {% set package_identifier = payload_data.mapping_identifier|e %} + {% set url_fmt = 'https://hkt.mitm.it/package/viewbypayload/{payload_id}/{package_identifier}' %} + {% + set url = url_fmt.format( + payload_id = payload_data.ref.id, + package_identifier = package_identifier + ) + %} + {{ msg_fmt.format(identifier=package_identifier, url=url|e)|safe }} +{% endmacro %} + +{% block site_policy %} + {% set fmt = _('info.payload.html.package_{identifier}{url}_is_used') %} + {{ format_html_with_package_identifier_and_url(fmt) }} +{% endblock %} + +{% block main_rest %} + <div class="horizontal-separator"></div> + + {% call label(_('info.payload.matched_pattern_label')) %} + {{ hkt_doc_link('url_patterns') }} + {% endcall %} + + <p> + {{ payload_data.pattern }} + </p> +{% endblock %} diff --git a/src/hydrilla/proxy/policies/info_pages_templates/special_page_info.html.jinja b/src/hydrilla/proxy/policies/info_pages_templates/special_page_info.html.jinja new file mode 100644 index 0000000..2f7a9d3 --- /dev/null +++ b/src/hydrilla/proxy/policies/info_pages_templates/special_page_info.html.jinja @@ -0,0 +1,17 @@ +{# +SPDX-License-Identifier: CC0-1.0 + +Proxy info page with information about page handled by special policy. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2022 Wojtek Kosior <koszko@koszko.org> +#} +{% extends "info_base.html.jinja" %} + +{% block site_policy %} + {{ _('info.special_page') }} +{% endblock %} + +{% block options %} +{% endblock %} diff --git a/src/hydrilla/proxy/policies/injectable_scripts/page_init_script.js.jinja b/src/hydrilla/proxy/policies/injectable_scripts/page_init_script.js.jinja new file mode 100644 index 0000000..f3398ef --- /dev/null +++ b/src/hydrilla/proxy/policies/injectable_scripts/page_init_script.js.jinja @@ -0,0 +1,151 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later + +Haketilo page APIs code template. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2021,2022 Wojtek Kosior + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +As additional permission under GNU GPL version 3 section 7, you +may distribute forms of that code without the copy of the GNU +GPL normally required by section 4, provided you include this +license notice and, in case of non-source distribution, a URL +through which recipients can access the Corresponding Source. +If you modify file(s) with this exception, you may extend this +exception to your version of the file(s), but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +As a special exception to the GPL, any HTML file which merely +makes function calls to this code, and for that purpose +includes it by reference shall be deemed a separate work for +copyright law purposes. If you modify this code, you may extend +this exception to your version of the code, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use of this +code in a proprietary program, I am not going to enforce this in court. +#} + +(function(){ + /* + * Snapshot some variables that other code could theoretically redefine + * later. We're not making the effort to protect from redefinition of + * prototype properties right now. + */ + const console = window.console; + const fetch = window.fetch; + const JSON = window.JSON; + const URL = window.URL; + const Array = window.Array; + const Uint8Array = window.Uint8Array; + const CustomEvent = window.CustomEvent; + const window_dispatchEvent = window.dispatchEvent; + + /* Get values from the proxy. */ + function decode_jinja(str) { + return decodeURIComponent(atob(str)); + } + const unique_token = decode_jinja("{{ unique_token_encoded }}"); + const assets_base_url = decode_jinja("{{ assets_base_url_encoded }}"); + window.haketilo_version = JSON.parse( + decode_jinja("{{ haketilo_version }}") + ); + + /* Make it possible to serialize an Error object. */ + function error_data_jsonifiable(error) { + const jsonifiable = {}; + for (const property of ["name", "message", "fileName", "lineNumber"]) + jsonifiable[property] = error[property]; + + return jsonifiable; + } + + /* Make it possible to serialize a Uint8Array. */ + function uint8_to_hex(array) { + return [...array].map(b => ("0" + b.toString(16)).slice(-2)).join(""); + } + + async function on_unrestricted_http_request(event) { + const name = "haketilo_CORS_bypass"; + + if (typeof event.detail !== "object" || + event.detail === null || + typeof event.detail.id !== "string" || + typeof event.detail.data !== "string") { + console.error(`Unrestricted HTTP: Invalid detail.`, event.detail); + return; + } + + try { + const data = JSON.parse(event.detail.data); + + const params = new URLSearchParams({ + target_url: data.url, + extra_headers: JSON.stringify(data.headers || []) + }); + const replacement_url = assets_base_url + "api/unrestricted_http"; + const replacement_url_obj = new URL(replacement_url); + replacement_url_obj.search = params; + + const response = await fetch(replacement_url_obj.href, data.init); + const response_buffer = await response.arrayBuffer(); + + const true_headers_serialized = + response.headers.get("x-haketilo-true-headers"); + + if (true_headers_serialized === null) + throw new Error("Unrestricted HTTP: The 'X-Haketilo-True-Headers' HTTP response header is missing. Are we connected to Haketilo proxy?") + + const true_headers = JSON.parse( + decodeURIComponent(true_headers_serialized) + ); + + const bad_format_error_msg = + "Unrestricted HTTP: The 'X-Haketilo-True-Headers' HTTP response header has invalid format."; + + if (!Array.isArray(true_headers)) + throw new Error(bad_format_error_msg); + + for (const [header, value] of true_headers) { + if (typeof header !== "string" || typeof value !== "string") + throw new Error(bad_format_error_msg); + } + + var result = { + status: response.status, + statusText: response.statusText, + headers: true_headers, + body: uint8_to_hex(new Uint8Array(response_buffer)) + }; + } catch(e) { + var result = {error: error_data_jsonifiable(e)}; + } + + const response_name = `${name}-${event.detail.id}`; + const detail = JSON.stringify(result); + window_dispatchEvent(new CustomEvent(response_name, {detail})); + } + + window.addEventListener( + "haketilo_CORS_bypass", + on_unrestricted_http_request + ); +})(); diff --git a/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja b/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja new file mode 100644 index 0000000..593673b --- /dev/null +++ b/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja @@ -0,0 +1,221 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later + +Haketilo popup display script. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2021,2022 Wojtek Kosior + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +As additional permission under GNU GPL version 3 section 7, you +may distribute forms of that code without the copy of the GNU +GPL normally required by section 4, provided you include this +license notice and, in case of non-source distribution, a URL +through which recipients can access the Corresponding Source. +If you modify file(s) with this exception, you may extend this +exception to your version of the file(s), but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +As a special exception to the GPL, any HTML file which merely +makes function calls to this code, and for that purpose +includes it by reference shall be deemed a separate work for +copyright law purposes. If you modify this code, you may extend +this exception to your version of the code, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +You should have received a copy of the GNU General Public License +along with this program. If not, see <https://www.gnu.org/licenses/>. + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use of this +code in a proprietary program, I am not going to enforce this in court. +#} + +(function(){ + document.currentScript.remove(); + + /* + * To slightly decrease the chance of accidental popup breakage we snapshot + * methods that other code might redefine. + */ + function get_setter(obj, name) { + return Object.getOwnPropertyDescriptor(obj, name).set; + } + + const ElementPrototype = [0, 0, 0] + .reduce(n => Object.getPrototypeOf(n), document.documentElement); + + const prepend_fun = ElementPrototype.prepend; + const setattr_fun = ElementPrototype.setAttribute; + const remove_fun = ElementPrototype.remove; + const setinner_fun = get_setter(ElementPrototype, "innerHTML"); + const open_fun = window.open; + + const shortcut = "HKT"; + const nonce = atob("{{nonce_b64}}"); + const popup_style = "{{popup_style}}"; + const popup_html = atob("{{popup_page_b64}}"); + const popup_container = document.createElement("div"); + const popup_frame = document.createElement("iframe"); + + function make_style(styles_obj) { + return Object.entries(styles_obj) + .map(([key, val]) => `${key}: ${val} !important`) + .join(';'); + } + + const frame_style = make_style({ + "position": "absolute", + "left": "50%", + "top": "50%", + "transform": "translate(-50%, -50%)", + "display": "block", + "visibility": "visible", + "min-width": "initial", + "width": "600px", + "max-width": "calc(100vw - 20px)", + "min-height": "initial", + "height": "700px", + "max-height": "calc(100vh - 20px)", + "background-color": "#fff", + "opacity": "100%", + "margin": 0, + "padding": 0, + "border": "none", + "border-radius": "5px" + }); + + const container_style = make_style({ + "position": "fixed", + "left": "0", + "top": "0", + "transform": "initial", + "z-index": 2147483647, + "display": "block", + "visibility": "visible", + "min-width": "100vw", + "max-width": "100vw", + "min-height": "100vh", + "max-height": "100vh", + "background-color": "#0008", + "opacity": "100%", + "margin": 0, + "padding": 0, + "border": "none", + "border-radius": 0 + }); + + const popup_blob_opts = {type: "text/html;charset=UTF-8"}; + const popup_blob = new Blob([popup_html], popup_blob_opts); + const popup_url = URL.createObjectURL(popup_blob); + + function show_popup_dialog() { + setattr_fun.call(popup_frame, "srcdoc", popup_html); + setattr_fun.call(popup_frame, "nonce", nonce); + setattr_fun.call(popup_frame, "style", frame_style); + + setattr_fun.call(popup_container, "style", container_style); + setinner_fun.call(popup_container, ""); + prepend_fun.call(popup_container, popup_frame); + + prepend_fun.call(document.body, popup_container); + } + + let popup_newtab_wanted = false; + + function show_popup_newtab() { + /* + * We cannot open popup directly here because browsers block window + * creation attempts from "keypress" event handlers. Instead, we set a + * flag to have "click" event handler open the popup. + */ + popup_newtab_wanted = true; + console.info(`You typed "${shortcut}". Please click anywhere on the page to show Haketilo page information.`); + } + + function show_popup() { + if (popup_style === "T") { + show_popup_newtab(); + } else { + /* popup_syle === "D" */ + show_popup_dialog(); + } + } + + function hide_popup_dialog() { + remove_fun.call(popup_container); + } + + let letters_matched = 0; + + function matches_previous(letter) { + return letters_matched > 0 && letter === shortcut[letters_matched - 1]; + } + + function match_letter(letter) { + if (letter !== shortcut[letters_matched] && !matches_previous(letter)) + letters_matched = 0; + + if (letter === shortcut[letters_matched]) { + if (++letters_matched === shortcut.length) { + letters_matched = 0; + return true; + } + } + + return false; + } + + function consume_keypress(event) { + if (!event.isTrusted) + return; + + if (match_letter(event.key)) + show_popup(); + } + + function cancel_event(event) { + event.stopImmediatePropagation(); + event.stopPropagation(); + event.preventDefault(); + } + + function consume_click(event) { + if (!event.isTrusted) + return; + + if (popup_style === "T") { + if (popup_newtab_wanted) { + popup_newtab_wanted = false; + cancel_event(event); + window.open( + popup_url, + "_blank", + "popup,width=600px,height=700px" + ); + } + } else { + /* popup_syle === "D" */ + if (event.target === popup_container) { + hide_popup_dialog(); + cancel_event(event); + } + } + } + + document.addEventListener("keypress", consume_keypress, {capture: true}); + document.addEventListener("click", consume_click, {capture: true}); +})(); diff --git a/src/hydrilla/proxy/policies/misc.py b/src/hydrilla/proxy/policies/misc.py new file mode 100644 index 0000000..e789b29 --- /dev/null +++ b/src/hydrilla/proxy/policies/misc.py @@ -0,0 +1,110 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Miscellaneous policies. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +..... +""" + +import enum +import traceback as tb +import dataclasses as dc +import typing as t + +from abc import ABC, abstractmethod + +from .. import state +from .. import http_messages +from . import base +from .rule import AllowPolicy, BlockPolicy + + +class FallbackAllowPolicy(AllowPolicy): + priority = base.PolicyPriority._ONE + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + template = self._get_info_template( + 'js_fallback_allowed_info.html.jinja' + ) + return template.render(url=http_info.request_info.url.orig_url) + + +class FallbackBlockPolicy(BlockPolicy): + priority = base.PolicyPriority._ONE + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + template = self._get_info_template( + 'js_fallback_blocked_info.html.jinja' + ) + return template.render(url=http_info.request_info.url.orig_url) + + +@dc.dataclass(frozen=True) +class ErrorBlockPolicy(BlockPolicy): + error: Exception + + @property + def traceback(self) -> str: + lines = tb.format_exception(None, self.error, self.error.__traceback__) + return ''.join(lines) + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + template = self._get_info_template('js_error_blocked_info.html.jinja') + return template.render( + url = http_info.request_info.url.orig_url, + settings = self.haketilo_settings, + traceback = self.traceback + ) + + +class MitmItPagePolicy(base.Policy): + """ + A special policy class for handling of the magical mitm.it domain. It causes + request and response not to be modified in any way and also (unlike + FallbackAllowPolicy) prevents them from being streamed. + """ + _process_request = base.MsgProcessOpt.MUST + _process_response = base.MsgProcessOpt.MUST + anticache = False + + priority = base.PolicyPriority._THREE + + def consume_request(self, request_info: http_messages.RequestInfo) -> None: + return None + + def consume_response(self, http_info: http_messages.FullHTTPInfo) -> None: + return None + +@dc.dataclass(frozen=True, unsafe_hash=True) +class MitmItPagePolicyFactory(base.PolicyFactory): + builtin: bool = True + + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> MitmItPagePolicy: + return MitmItPagePolicy(haketilo_state.get_settings()) diff --git a/src/hydrilla/proxy/policies/payload.py b/src/hydrilla/proxy/policies/payload.py new file mode 100644 index 0000000..3660eac --- /dev/null +++ b/src/hydrilla/proxy/policies/payload.py @@ -0,0 +1,271 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Policies for applying payload injections to HTTP requests. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +..... +""" + +import dataclasses as dc +import typing as t + +from urllib.parse import urlencode + +from itsdangerous.url_safe import URLSafeSerializer +import bs4 # type: ignore + +from ...exceptions import HaketiloException +from ...url_patterns import ParsedUrl +from .. import csp +from .. import state +from .. import http_messages +from . import base + +@dc.dataclass(frozen=True) # type: ignore[misc] +class PayloadAwarePolicy(base.Policy): + """....""" + payload_data: state.PayloadData + + def _assets_base_url(self, url: ParsedUrl) -> str: + token = self.payload_data.unique_token + + base_path_segments = (*self.payload_data.pattern_path_segments, token) + + return f'{url.url_without_path}/{"/".join(base_path_segments)}/' + + def _payload_details_to_signed_query_string( + self, + _salt: str, + **extra_keys: str + ) -> str: + params: t.Mapping[str, str] = { + 'payload_id': self.payload_data.ref.id, + **extra_keys + } + + serializer = URLSafeSerializer(self.payload_data.global_secret, _salt) + + return urlencode({'details': serializer.dumps(params)}) + + +@dc.dataclass(frozen=True) # type: ignore[misc] +class PayloadAwarePolicyFactory(base.PolicyFactory): + """....""" + payload_key: state.PayloadKey + + @property + def payload_ref(self) -> state.PayloadRef: + """....""" + return self.payload_key.ref + + def __lt__(self, other: base.PolicyFactory) -> bool: + """....""" + if isinstance(other, type(self)): + return self.payload_key < other.payload_key + + return super().__lt__(other) + + +def block_attr(element: bs4.PageElement, attr_name: str) -> None: + """ + Disable HTML node attributes by prepending `blocked-'. This allows them to + still be relatively easily accessed in case they contain some useful data. + """ + blocked_value = element.attrs.pop(attr_name, None) + + while blocked_value is not None: + attr_name = f'blocked-{attr_name}' + next_blocked_value = element.attrs.pop(attr_name, None) + element.attrs[attr_name] = blocked_value + + blocked_value = next_blocked_value + +@dc.dataclass(frozen=True) +class PayloadInjectPolicy(PayloadAwarePolicy): + _process_response = base.MsgProcessOpt.MUST + + priority = base.PolicyPriority._TWO + + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_payloadon + + def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Sequence[str]: + return ['script-src'] + + def _csp_to_add(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Mapping[str, t.Sequence[str]]: + allowed_origins = [self._assets_base_url(http_info.request_info.url)] + + if self.payload_data.eval_allowed: + allowed_origins.append("'unsafe-eval'") + + return { + 'script-src': allowed_origins, + 'script-src-elem': ["'none'"], + 'script-src-attr': ["'none'"] + } + + def _script_urls(self, url: ParsedUrl) -> t.Iterable[str]: + base_url = self._assets_base_url(url) + payload_ref = self.payload_data.ref + + yield base_url + 'api/page_init_script.js' + + for path in payload_ref.get_script_paths(): + yield base_url + '/'.join(('static', *path)) + + def _modify_response_document( + self, + http_info: http_messages.FullHTTPInfo, + encoding: t.Optional[str] + ) -> t.Union[bytes, str]: + markup = super()._modify_response_document(http_info, encoding) + if isinstance(markup, str): + encoding = None + + soup = bs4.BeautifulSoup( + markup = markup, + from_encoding = encoding, + features = 'html5lib' + ) + + # Inject scripts. + script_parent = soup.find('body') or soup.find('html') + if script_parent is None: + return http_info.response_info.body + + for script_url in self._script_urls(http_info.request_info.url): + tag = bs4.Tag(name='script', attrs={'src': script_url}) + script_parent.append(tag) + + # Remove Content Security Policy that could possibly block injected + # scripts. + for meta in soup.select('head meta[http-equiv]'): + header_name = meta.attrs.get('http-equiv', '').lower().strip() + if header_name in csp.enforce_header_names: + block_attr(meta, 'http-equiv') + block_attr(meta, 'content') + + return soup.decode() + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + return self._get_info_template('payload_info.html.jinja').render( + url = http_info.request_info.url.orig_url, + payload_data = self.payload_data + ) + + +class _PayloadHasProblemsError(HaketiloException): + pass + +class AutoPayloadInjectPolicy(PayloadInjectPolicy): + priority = base.PolicyPriority._ONE + + def consume_response(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[http_messages.ResponseInfo]: + try: + if self.payload_data.ref.has_problems(): + raise _PayloadHasProblemsError() + + self.payload_data.ref.ensure_items_installed() + + return super().consume_response(http_info) + except (state.RepoCommunicationError, state.FileInstallationError, + _PayloadHasProblemsError) as ex: + extra_params: dict[str, str] = { + 'next_url': http_info.response_info.url.orig_url + } + if isinstance(ex, state.FileInstallationError): + extra_params['repo_id'] = ex.repo_id + extra_params['file_sha256'] = ex.sha256 + + query = self._payload_details_to_signed_query_string( + _salt = 'auto_install_error', + **extra_params + ) + + redirect_url = 'https://hkt.mitm.it/auto_install_error?' + query + msg = 'Error occured when installing payload. Redirecting.' + + return http_messages.ResponseInfo.make( + status_code = 303, + headers = [('Location', redirect_url)], + body = msg.encode() + ) + + +@dc.dataclass(frozen=True) +class PayloadSuggestPolicy(PayloadAwarePolicy): + _process_request = base.MsgProcessOpt.MUST + _process_response = base.MsgProcessOpt.MUST_NOT + + priority = base.PolicyPriority._ONE + + def consume_request(self, request_info: http_messages.RequestInfo) \ + -> http_messages.ResponseInfo: + query = self._payload_details_to_signed_query_string( + _salt = 'package_suggestion', + next_url = request_info.url.orig_url + ) + + redirect_url = 'https://hkt.mitm.it/package_suggestion?' + query + msg = 'A package was found that could be used on this site. Redirecting.' + + return http_messages.ResponseInfo.make( + status_code = 303, + headers = [('Location', redirect_url)], + body = msg.encode() + ) + + +@dc.dataclass(frozen=True, unsafe_hash=True) +class PayloadPolicyFactory(PayloadAwarePolicyFactory): + """....""" + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> t.Optional[base.Policy]: + haketilo_settings = haketilo_state.get_settings() + + try: + payload_data = self.payload_ref.get_data() + except: + return None + + if payload_data.explicitly_enabled: + return PayloadInjectPolicy(haketilo_settings, payload_data) + + mode = haketilo_settings.mapping_use_mode + + if mode == state.MappingUseMode.QUESTION: + return PayloadSuggestPolicy(haketilo_settings, payload_data) + + if mode == state.MappingUseMode.WHEN_ENABLED: + return None + + # mode == state.MappingUseMode.AUTO + return AutoPayloadInjectPolicy(haketilo_settings, payload_data) diff --git a/src/hydrilla/proxy/policies/payload_resource.py b/src/hydrilla/proxy/policies/payload_resource.py new file mode 100644 index 0000000..0d73242 --- /dev/null +++ b/src/hydrilla/proxy/policies/payload_resource.py @@ -0,0 +1,398 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Policies for resolving HTTP requests with local resources. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +We make file resources available to HTTP clients by mapping them +at: + http(s)://<pattern-matching_origin>/<pattern_path>/<token>/ +where <token> is a per-session secret unique for every mapping. +For example, a payload with pattern like the following: + http*://***.example.com/a/b/** +Could cause resources to be mapped (among others) at each of: + https://example.com/a/b/**/Da2uiF2UGfg/ + https://www.example.com/a/b/**/Da2uiF2UGfg/ + http://gnome.vs.kde.example.com/a/b/**/Da2uiF2UGfg/ + +Unauthorized web pages running in the user's browser are exected to be +unable to guess the secret. This way we stop them from spying on the +user and from interfering with Haketilo's normal operation. + +This is only a soft prevention method. With some mechanisms +(e.g. service workers), under certain scenarios, it might be possible +to bypass it. Thus, to make the risk slightly smaller, we also block +the unauthorized accesses that we can detect. + +Since a web page authorized to access the resources may only be served +when the corresponding mapping is enabled (or AUTO mode is on), we +consider accesses to non-enabled mappings' resources a security breach +and block them by responding with 403 Forbidden. +""" + +import dataclasses as dc +import typing as t +import json + +from base64 import b64encode +from urllib.parse import quote, parse_qs, urlparse, urlencode, urljoin + +from ...translations import smart_gettext as _ +from ...url_patterns import ParsedUrl +from ...versions import haketilo_version +from .. import state +from .. import http_messages +from . import base +from .payload import PayloadAwarePolicy, PayloadAwarePolicyFactory + + +def encode_string_for_js(string: str) -> str: + return b64encode(quote(string).encode()).decode() + + +AnyValue = t.TypeVar('AnyValue', bound=object) + +def header_keys(headers: t.Iterable[tuple[str, AnyValue]]) -> frozenset[str]: + return frozenset(header.lower() for header, _ in headers) + +def _merge_headers( + standard_headers: t.Iterable[tuple[str, t.Optional[str]]], + overridable_headers_keys: frozenset[str], + native_headers: http_messages.IHeaders, + extra_headers: t.Iterable[tuple[str, str]] +) -> t.Iterable[tuple[str, str]]: + standard_keys = header_keys(standard_headers) + standard_iterator = iter(standard_headers) + native_keys = header_keys(native_headers.items()) + + selected_base: list[tuple[str, str]] = [] + processed: set[str] = set() + + for header, _ in native_headers.items(): + header_l = header.lower() + + if header_l in processed or header_l not in standard_keys: + continue + + for standard_header_l, chosen_value in standard_iterator: + if standard_header_l not in native_keys: + if chosen_value is not None: + selected_base.append((standard_header_l, chosen_value)) + elif standard_header_l == header_l: + processed.add(header_l) + + if header_l in overridable_headers_keys: + chosen_value = native_headers.get(header_l, chosen_value) + + if chosen_value is not None: + selected_base.append((header, chosen_value)) + + break + + for standard_header_l, standard_value in standard_iterator: + if standard_value is not None: + selected_base.append((standard_header_l, standard_value)) + + extra_keys = header_keys(extra_headers) + extra_iterator = iter(extra_headers) + + result: list[tuple[str, str]] = [] + processed = set() + + for header, value in selected_base: + header_l = header.lower() + + if header_l in processed: + continue + + if header_l in extra_keys: + for extra_header, extra_value in extra_iterator: + extra_header_l = extra_header.lower() + + processed.add(extra_header_l) + + result.append((extra_header, extra_value)) + + if extra_header_l == header_l: + break + else: + result.append((header, value)) + + result.extend(extra_iterator) + + return result + +request_standard_headers: t.Iterable[tuple[str, t.Optional[str]]] = ( + ('user-agent', None), + ('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'), + ('accept-language', 'en-US,en;q=0.5'), + ('accept-encoding', None), + ('dnt', '1'), + ('connection', None), + ('upgrade-insecure-requests', '1'), + ('sec-fetch-dest', 'document'), + ('sec-fetch-mode', 'navigate'), + ('sec-fetch-site', 'none'), + ('sec-fetch-user', '?1'), + ('te', 'trailers') +) + +auto_overridable_request_headers = frozenset(( + 'user-agent', + 'accept-language', + 'accept-encoding', + 'dnt' +)) + +def merge_request_headers( + native_headers: http_messages.IHeaders, + extra_headers: t.Iterable[tuple[str, str]] +) -> t.Iterable[tuple[str, str]]: + return _merge_headers( + standard_headers = request_standard_headers, + overridable_headers_keys = auto_overridable_request_headers, + native_headers = native_headers, + extra_headers = extra_headers + ) + +response_standard_headers: t.Iterable[tuple[str, t.Optional[str]]] = ( + ('cache-control', 'max-age=0, private, must-revalidate'), + ('connection', None), + ('content-length', None), + ('content-type', None), + ('date', None), + ('keep-alive', None), + ('server', None) +) + +auto_overridable_response_headers = frozenset( + header.lower() + for header, value in response_standard_headers + if value is None +) + +def merge_response_headers( + native_headers: http_messages.IHeaders, + extra_headers: t.Iterable[tuple[str, str]] +) -> t.Iterable[tuple[str, str]]: + return _merge_headers( + standard_headers = response_standard_headers, + overridable_headers_keys = auto_overridable_response_headers, + native_headers = native_headers, + extra_headers = extra_headers + ) + + +MessageInfo = t.Union[ + http_messages.ResponseInfo, + http_messages.RequestInfo +] + +@dc.dataclass(frozen=True) +class PayloadResourcePolicy(PayloadAwarePolicy): + _process_request = base.MsgProcessOpt.MUST + + priority = base.PolicyPriority._THREE + + def extract_resource_path(self, request_url: ParsedUrl) -> tuple[str, ...]: + # Payload resource pattern has path of the form: + # "/some/arbitrary/segments/<per-session_token>/***" + # + # Corresponding requests shall have path of the form: + # "/some/arbitrary/segments/<per-session_token>/actual/resource/path" + # + # Here we need to extract the "/actual/resource/path" part. + segments_to_drop = len(self.payload_data.pattern_path_segments) + 1 + return request_url.path_segments[segments_to_drop:] + + def should_process_response( + self, + request_info: http_messages.RequestInfo, + response_info: http_messages.AnyResponseInfo + ) -> bool: + return self.extract_resource_path(request_info.url) \ + == ('api', 'unrestricted_http') + + def _make_file_resource_response(self, path: tuple[str, ...]) \ + -> http_messages.ResponseInfo: + try: + file_data = self.payload_data.ref.get_file_data(path) + except state.MissingItemError: + return resource_blocked_response + + if file_data is None: + return http_messages.ResponseInfo.make( + status_code = 404, + headers = [('Content-Type', 'text/plain; charset=utf-8')], + body =_('api.file_not_found').encode() + ) + + return http_messages.ResponseInfo.make( + status_code = 200, + headers = [('Content-Type', file_data.mime_type)], + body = file_data.contents + ) + + def _make_api_response( + self, + path: tuple[str, ...], + request_info: http_messages.RequestInfo + ) -> MessageInfo: + if path[0] == 'page_init_script.js': + template = base.get_script_template('page_init_script.js.jinja') + + token = self.payload_data.unique_token + base_url = self._assets_base_url(request_info.url) + ver_str = json.dumps(haketilo_version) + js = template.render( + unique_token_encoded = encode_string_for_js(token), + assets_base_url_encoded = encode_string_for_js(base_url), + haketilo_version = encode_string_for_js(ver_str) + ) + + return http_messages.ResponseInfo.make( + status_code = 200, + headers = [('Content-Type', 'application/javascript')], + body = js.encode() + ) + + if path[0] == 'unrestricted_http': + try: + assert self.payload_data.cors_bypass_allowed + + params = parse_qs(request_info.url.query) + target_url, = params['target_url'] + extra_headers_str, = params['extra_headers'] + + assert urlparse(target_url).scheme in ('http', 'https') + + extra_headers = json.loads(extra_headers_str) + assert isinstance(extra_headers, list) + for header, value in extra_headers: + assert isinstance(header, str) + assert isinstance(value, str) + + result_headers = merge_request_headers( + native_headers = request_info.headers, + extra_headers = extra_headers + ) + + return http_messages.RequestInfo.make( + url = target_url, + method = request_info.method, + headers = result_headers, + body = request_info.body + ) + except: + return resource_blocked_response + else: + return resource_blocked_response + + def consume_request(self, request_info: http_messages.RequestInfo) \ + -> MessageInfo: + resource_path = self.extract_resource_path(request_info.url) + + if resource_path == (): + return resource_blocked_response + elif resource_path[0] == 'static': + return self._make_file_resource_response(resource_path[1:]) + elif resource_path[0] == 'api': + return self._make_api_response(resource_path[1:], request_info) + else: + return resource_blocked_response + + def consume_response(self, http_info: http_messages.FullHTTPInfo) \ + -> http_messages.ResponseInfo: + """ + This method shall only be called for responses to unrestricted HTTP API + requests. Its purpose is to sanitize response headers and smuggle their + original data using an additional header. + """ + serialized = json.dumps([*http_info.response_info.headers.items()]) + extra_headers = [('X-Haketilo-True-Headers', quote(serialized)),] + + # Greetings, adventurous code dweller! It's amazing you made it that + # deep. I hope you're having a good day. If not, read Isaiah 49:15 :) + if (300 <= http_info.response_info.status_code < 400): + location = http_info.response_info.headers.get('location') + if location is not None: + orig_params = parse_qs(http_info.request_info.url.query) + orig_extra_headers_str, = orig_params['extra_headers'] + + new_query = urlencode({ + 'target_url': location, + 'extra_headers': orig_extra_headers_str + }) + + orig_url = http_info.request_info.url.orig_url + new_url = urljoin(orig_url, '?' + new_query) + + extra_headers.append(('location', new_url)) + + merged_headers = merge_response_headers( + native_headers = http_info.response_info.headers, + extra_headers = extra_headers + ) + + return dc.replace(http_info.response_info, headers=merged_headers) + + +resource_blocked_response = http_messages.ResponseInfo.make( + status_code = 403, + headers = [('Content-Type', 'text/plain; charset=utf-8')], + body = _('api.resource_not_enabled_for_access').encode() +) + +@dc.dataclass(frozen=True) +class BlockedResponsePolicy(base.Policy): + _process_request = base.MsgProcessOpt.MUST + _process_response = base.MsgProcessOpt.MUST_NOT + + priority = base.PolicyPriority._THREE + + def consume_request(self, request_info: http_messages.RequestInfo) \ + -> http_messages.ResponseInfo: + return resource_blocked_response + + +@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] +class PayloadResourcePolicyFactory(PayloadAwarePolicyFactory): + """....""" + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> t.Union[PayloadResourcePolicy, BlockedResponsePolicy]: + """....""" + haketilo_settings = haketilo_state.get_settings() + + try: + payload_data = self.payload_ref.get_data() + except state.MissingItemError: + return BlockedResponsePolicy(haketilo_settings) + + if not payload_data.explicitly_enabled and \ + haketilo_settings.mapping_use_mode != \ + state.MappingUseMode.AUTO: + return BlockedResponsePolicy(haketilo_settings) + + return PayloadResourcePolicy(haketilo_settings, payload_data) diff --git a/src/hydrilla/proxy/policies/rule.py b/src/hydrilla/proxy/policies/rule.py new file mode 100644 index 0000000..e318a7f --- /dev/null +++ b/src/hydrilla/proxy/policies/rule.py @@ -0,0 +1,122 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Policies for blocking and allowing JS in pages fetched with HTTP. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +..... +""" + +import dataclasses as dc +import typing as t + +from ...url_patterns import ParsedPattern +from .. import csp +from .. import state +from ..import http_messages +from . import base + + +class AllowPolicy(base.Policy): + priority = base.PolicyPriority._TWO + + +script_csp_directives = ('script-src', 'script-src-elem', 'script-src-attr') + +class BlockPolicy(base.Policy): + _process_response = base.MsgProcessOpt.MUST + + priority = base.PolicyPriority._TWO + + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_jsblocked + + def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Sequence[str]: + return script_csp_directives + + def _csp_to_add(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Mapping[str, t.Sequence[str]]: + return dict((d, ["'none'"]) for d in script_csp_directives) + + +@dc.dataclass(frozen=True) +class RuleAllowPolicy(AllowPolicy): + pattern: ParsedPattern + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + template = self._get_info_template('js_rule_allowed_info.html.jinja') + return template.render( + url = http_info.request_info.url.orig_url, + pattern = self.pattern.orig_url + ) + + +@dc.dataclass(frozen=True) +class RuleBlockPolicy(BlockPolicy): + pattern: ParsedPattern + + def make_info_page(self, http_info: http_messages.FullHTTPInfo) \ + -> t.Optional[str]: + template = self._get_info_template('js_rule_blocked_info.html.jinja') + return template.render( + url = http_info.request_info.url.orig_url, + pattern = self.pattern.orig_url + ) + + +@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] +class RulePolicyFactory(base.PolicyFactory): + """....""" + pattern: ParsedPattern + + def __lt__(self, other: base.PolicyFactory) -> bool: + """....""" + if type(other) is not type(self): + return super().__lt__(other) + + assert isinstance(other, RulePolicyFactory) + + return self.pattern < other.pattern + + +@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] +class RuleBlockPolicyFactory(RulePolicyFactory): + """....""" + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> RuleBlockPolicy: + """....""" + return RuleBlockPolicy(haketilo_state.get_settings(), self.pattern) + + +@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] +class RuleAllowPolicyFactory(RulePolicyFactory): + """....""" + def make_policy(self, haketilo_state: state.HaketiloState) \ + -> RuleAllowPolicy: + """....""" + return RuleAllowPolicy(haketilo_state.get_settings(), self.pattern) diff --git a/src/hydrilla/proxy/policies/web_ui.py b/src/hydrilla/proxy/policies/web_ui.py new file mode 100644 index 0000000..1c32ea9 --- /dev/null +++ b/src/hydrilla/proxy/policies/web_ui.py @@ -0,0 +1,74 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +# Policy for serving the web UI from within mitmproxy. +# +# This file is part of Hydrilla&Haketilo. +# +# Copyright (C) 2022 Wojtek Kosior +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <https://www.gnu.org/licenses/>. +# +# +# I, Wojtek Kosior, thereby promise not to sue for violation of this +# file's license. Although I request that you do not make use of this +# code in a proprietary program, I am not going to enforce this in +# court. + +""" +..... +""" + +import dataclasses as dc +import typing as t + +from ...translations import smart_gettext as _ +from .. import state +from .. import http_messages +from .. import web_ui +from . import base + + +@dc.dataclass(frozen=True) +class WebUIPolicy(base.Policy): + _process_request = base.MsgProcessOpt.MUST + _process_response = base.MsgProcessOpt.MUST_NOT + + priority = base.PolicyPriority._THREE + + haketilo_state: state.HaketiloState + ui_domain: web_ui.UIDomain + + def consume_request(self, request_info: http_messages.RequestInfo) \ + -> http_messages.ResponseInfo: + return web_ui.process_request( + request_info = request_info, + state = self.haketilo_state, + ui_domain = self.ui_domain + ) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class WebUIPolicyFactory(base.PolicyFactory): + ui_domain: t.ClassVar[web_ui.UIDomain] + + def make_policy(self, haketilo_state: state.HaketiloState) -> WebUIPolicy: + haketilo_settings = haketilo_state.get_settings() + return WebUIPolicy(haketilo_settings, haketilo_state, self.ui_domain) + +@dc.dataclass(frozen=True, unsafe_hash=True) +class WebUIMainPolicyFactory(WebUIPolicyFactory): + ui_domain = web_ui.UIDomain.MAIN + +@dc.dataclass(frozen=True, unsafe_hash=True) +class WebUILandingPolicyFactory(WebUIPolicyFactory): + ui_domain = web_ui.UIDomain.LANDING_PAGE |