From fe43bd552aaacd649b0e00afada01d07ad8dae9a Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 21 Oct 2022 21:42:56 +0200 Subject: [proxy] facilitate injecting non-payload script to all pages --- src/hydrilla/proxy/addon.py | 3 +- src/hydrilla/proxy/http_messages.py | 20 ++++--- src/hydrilla/proxy/policies/base.py | 69 +++++++++++++++++++--- .../policies/injectable_scripts/popup.js.jinja | 50 ++++++++++++++++ src/hydrilla/proxy/policies/misc.py | 2 +- src/hydrilla/proxy/policies/payload.py | 25 +++++--- src/hydrilla/proxy/policies/payload_resource.py | 28 ++++----- src/hydrilla/proxy/policies/rule.py | 8 ++- src/hydrilla/proxy/policies/web_ui.py | 3 +- src/hydrilla/proxy/state.py | 24 ++++++++ src/hydrilla/proxy/state_impl/base.py | 8 +-- src/hydrilla/proxy/state_impl/concrete_state.py | 11 +++- 12 files changed, 200 insertions(+), 51 deletions(-) create mode 100644 src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py index d5b0537..68b3cd5 100644 --- a/src/hydrilla/proxy/addon.py +++ b/src/hydrilla/proxy/addon.py @@ -238,7 +238,8 @@ class HaketiloAddon: try: parsed_url = url_patterns.parse_url(flow.request.url) except url_patterns.HaketiloURLException as e: - policy = policies.ErrorBlockPolicy(error=e) + haketilo_settings = self.state.get_settings() + policy = policies.ErrorBlockPolicy(haketilo_settings, error=e) parsed_url = url_patterns.dummy_url else: policy = self.state.select_policy(parsed_url) diff --git a/src/hydrilla/proxy/http_messages.py b/src/hydrilla/proxy/http_messages.py index 9aab510..718022f 100644 --- a/src/hydrilla/proxy/http_messages.py +++ b/src/hydrilla/proxy/http_messages.py @@ -235,15 +235,9 @@ class ResponseInfo(HasHeadersMixin, _BaseResponseInfoFields): AnyResponseInfo = t.Union[BodylessResponseInfo, ResponseInfo] -@dc.dataclass(frozen=True) -class FullHTTPInfo: - request_info: RequestInfo - response_info: ResponseInfo - - def is_likely_a_page( - request_info: t.Union[BodylessRequestInfo, RequestInfo], - response_info: t.Union[BodylessResponseInfo, ResponseInfo] + request_info: AnyRequestInfo, + response_info: AnyResponseInfo ) -> bool: fetch_dest = request_info.headers.get('sec-fetch-dest') if fetch_dest is None: @@ -261,3 +255,13 @@ def is_likely_a_page( # account. In the future we might also want to consider the # Content-Disposition header. return mime is not None and 'html' in mime + + +@dc.dataclass(frozen=True) +class FullHTTPInfo: + request_info: RequestInfo + response_info: ResponseInfo + + @property + def is_likely_a_page(self) -> bool: + return is_likely_a_page(self.request_info, self.response_info) diff --git a/src/hydrilla/proxy/policies/base.py b/src/hydrilla/proxy/policies/base.py index 7ce8663..0c37185 100644 --- a/src/hydrilla/proxy/policies/base.py +++ b/src/hydrilla/proxy/policies/base.py @@ -29,11 +29,17 @@ ..... """ +import enum +import re import dataclasses as dc import typing as t -import enum +from threading import Lock from abc import ABC, abstractmethod +from hashlib import sha256 +from base64 import b64encode + +import jinja2 from immutables import Map @@ -43,6 +49,19 @@ from .. import http_messages from .. import csp +loader = jinja2.PackageLoader(__package__, package_path='injectable_scripts') +jinja_env = jinja2.Environment( + loader = loader, + lstrip_blocks = True, + autoescape = False +) +jinja_lock = Lock() + + +popup_script = jinja_env.get_template('popup.js.jinja').render() +popup_script_sha256_bytes = sha256(popup_script.encode()).digest() +popup_script_sha256_b64 = b64encode(popup_script_sha256_bytes).decode() + class PolicyPriority(int, enum.Enum): """....""" _ONE = 1 @@ -55,6 +74,11 @@ MessageInfo = t.Union[ ] +# We're doing *very* simple doctype matching for now. If a site wanted, it could +# trick us into getting this wrong. +doctype_re = re.compile(r'^\s*]*>', re.IGNORECASE) + + UTF8_BOM = b'\xEF\xBB\xBF' BOMs = ( (UTF8_BOM, 'utf-8'), @@ -63,14 +87,22 @@ BOMs = ( ) +# mypy needs to be corrected: +# https://stackoverflow.com/questions/70999513/conflict-between-mix-ins-for-abstract-dataclasses/70999704#70999704 +@dc.dataclass(frozen=True) # type: ignore[misc] class Policy(ABC): - """....""" _process_request: t.ClassVar[bool] = False _process_response: t.ClassVar[bool] = False anticache: t.ClassVar[bool] = True priority: t.ClassVar[PolicyPriority] + haketilo_settings: state.HaketiloGlobalSettings + + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_jsallowed + def should_process_request( self, request_info: http_messages.BodylessRequestInfo @@ -82,7 +114,11 @@ class Policy(ABC): request_info: http_messages.RequestInfo, response_info: http_messages.AnyResponseInfo ) -> bool: - return self._process_response + if self._process_response: + return True + + return (self.current_popup_settings.popup_enabled and + http_messages.is_likely_a_page(request_info, response_info)) def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ -> t.Union[t.Sequence[str], t.Literal['all']]: @@ -94,7 +130,11 @@ class Policy(ABC): def _csp_to_extend(self, http_info: http_messages.FullHTTPInfo) \ -> t.Mapping[str, t.Sequence[str]]: - return Map() + if (self.current_popup_settings.popup_enabled and + http_info.is_likely_a_page): + return {'script-src': [f"'sha256-{popup_script_sha256_b64}'"]} + else: + return Map() def _modify_response_headers(self, http_info: http_messages.FullHTTPInfo) \ -> http_messages.IHeaders: @@ -117,7 +157,24 @@ class Policy(ABC): http_info: http_messages.FullHTTPInfo, encoding: t.Optional[str] ) -> t.Union[str, bytes]: - return http_info.response_info.body + popup_settings = self.current_popup_settings + + if (popup_settings.popup_enabled and + http_info.is_likely_a_page): + if encoding is None: + encoding = 'utf-8' + + body_bytes = http_info.response_info.body + body = body_bytes.decode(encoding, errors='replace') + + match = doctype_re.match(body) + doctype_decl_len = 0 if match is None else match.end() + + dotype_decl = body[0:doctype_decl_len] + doc_rest = body[doctype_decl_len:] + return f'{dotype_decl}{doc_rest}' + else: + return http_info.response_info.body def _modify_response_body(self, http_info: http_messages.FullHTTPInfo) \ -> bytes: @@ -188,8 +245,6 @@ class Policy(ABC): ) -# mypy needs to be corrected: -# https://stackoverflow.com/questions/70999513/conflict-between-mix-ins-for-abstract-dataclasses/70999704#70999704 @dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] class PolicyFactory(ABC): """....""" diff --git a/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja b/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja new file mode 100644 index 0000000..653b7df --- /dev/null +++ b/src/hydrilla/proxy/policies/injectable_scripts/popup.js.jinja @@ -0,0 +1,50 @@ +{# +SPDX-License-Identifier: GPL-3.0-or-later + +Haketilo popup display script. + +This file is part of Hydrilla&Haketilo. + +Copyright (C) 2021,2022 Wojtek Kosior + +This program is free software: you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +As additional permission under GNU GPL version 3 section 7, you +may distribute forms of that code without the copy of the GNU +GPL normally required by section 4, provided you include this +license notice and, in case of non-source distribution, a URL +through which recipients can access the Corresponding Source. +If you modify file(s) with this exception, you may extend this +exception to your version of the file(s), but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +As a special exception to the GPL, any HTML file which merely +makes function calls to this code, and for that purpose +includes it by reference shall be deemed a separate work for +copyright law purposes. If you modify this code, you may extend +this exception to your version of the code, but you are not +obligated to do so. If you do not wish to do so, delete this +exception statement from your version. + +You should have received a copy of the GNU General Public License +along with this program. If not, see . + + +I, Wojtek Kosior, thereby promise not to sue for violation of this +file's license. Although I request that you do not make use of this +code in a proprietary program, I am not going to enforce this in court. +#} + +(function(){ + console.log('TODO: make Haketilo able to actually display a popup') + document.currentScript.remove(); +})(); diff --git a/src/hydrilla/proxy/policies/misc.py b/src/hydrilla/proxy/policies/misc.py index acce164..af6c144 100644 --- a/src/hydrilla/proxy/policies/misc.py +++ b/src/hydrilla/proxy/policies/misc.py @@ -81,4 +81,4 @@ class MitmItPagePolicyFactory(base.PolicyFactory): def make_policy(self, haketilo_state: state.HaketiloState) \ -> MitmItPagePolicy: - return MitmItPagePolicy() + return MitmItPagePolicy(haketilo_state.get_settings()) diff --git a/src/hydrilla/proxy/policies/payload.py b/src/hydrilla/proxy/policies/payload.py index 8aaf845..76a1202 100644 --- a/src/hydrilla/proxy/policies/payload.py +++ b/src/hydrilla/proxy/policies/payload.py @@ -49,12 +49,12 @@ class PayloadAwarePolicy(base.Policy): """....""" payload_data: state.PayloadData - def _assets_base_url(self, request_url: ParsedUrl): + def _assets_base_url(self, url: ParsedUrl) -> str: token = self.payload_data.unique_token base_path_segments = (*self.payload_data.pattern_path_segments, token) - return f'{request_url.url_without_path}/{"/".join(base_path_segments)}/' + return f'{url.url_without_path}/{"/".join(base_path_segments)}/' def _payload_details_to_signed_query_string( self, @@ -110,6 +110,10 @@ class PayloadInjectPolicy(PayloadAwarePolicy): priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._TWO + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_payloadon + def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ -> t.Sequence[str]: return ['script-src'] @@ -141,8 +145,12 @@ class PayloadInjectPolicy(PayloadAwarePolicy): http_info: http_messages.FullHTTPInfo, encoding: t.Optional[str] ) -> t.Union[bytes, str]: + markup = super()._modify_response_document(http_info, encoding) + if isinstance(markup, str): + encoding = None + soup = bs4.BeautifulSoup( - markup = http_info.response_info.body, + markup = markup, from_encoding = encoding, features = 'html5lib' ) @@ -236,22 +244,23 @@ class PayloadPolicyFactory(PayloadAwarePolicyFactory): """....""" def make_policy(self, haketilo_state: state.HaketiloState) \ -> t.Optional[base.Policy]: - """....""" + haketilo_settings = haketilo_state.get_settings() + try: payload_data = self.payload_ref.get_data() except: return None if payload_data.explicitly_enabled: - return PayloadInjectPolicy(payload_data) + return PayloadInjectPolicy(haketilo_settings, payload_data) - mode = haketilo_state.get_settings().mapping_use_mode + mode = haketilo_settings.mapping_use_mode if mode == state.MappingUseMode.QUESTION: - return PayloadSuggestPolicy(payload_data) + return PayloadSuggestPolicy(haketilo_settings, payload_data) if mode == state.MappingUseMode.WHEN_ENABLED: return None # mode == state.MappingUseMode.AUTO - return AutoPayloadInjectPolicy(payload_data) + return AutoPayloadInjectPolicy(haketilo_settings, payload_data) diff --git a/src/hydrilla/proxy/policies/payload_resource.py b/src/hydrilla/proxy/policies/payload_resource.py index 07226cb..d8e5ea5 100644 --- a/src/hydrilla/proxy/policies/payload_resource.py +++ b/src/hydrilla/proxy/policies/payload_resource.py @@ -56,12 +56,9 @@ import dataclasses as dc import typing as t import json -from threading import Lock from base64 import b64encode from urllib.parse import quote, parse_qs, urlparse, urlencode, urljoin -import jinja2 - from ...translations import smart_gettext as _ from ...url_patterns import ParsedUrl from ...versions import haketilo_version @@ -71,15 +68,6 @@ from . import base from .payload import PayloadAwarePolicy, PayloadAwarePolicyFactory -loader = jinja2.PackageLoader(__package__, package_path='injectable_scripts') -jinja_env = jinja2.Environment( - loader = loader, - lstrip_blocks = True, - autoescape = False -) -jinja_lock = Lock() - - def encode_string_for_js(string: str) -> str: return b64encode(quote(string).encode()).decode() @@ -274,8 +262,10 @@ class PayloadResourcePolicy(PayloadAwarePolicy): request_info: http_messages.RequestInfo ) -> MessageInfo: if path[0] == 'page_init_script.js': - with jinja_lock: - template = jinja_env.get_template('page_init_script.js.jinja') + with base.jinja_lock: + template = base.jinja_env.get_template( + 'page_init_script.js.jinja' + ) token = self.payload_data.unique_token base_url = self._assets_base_url(request_info.url) ver_str = json.dumps(haketilo_version) @@ -396,14 +386,16 @@ class PayloadResourcePolicyFactory(PayloadAwarePolicyFactory): def make_policy(self, haketilo_state: state.HaketiloState) \ -> t.Union[PayloadResourcePolicy, BlockedResponsePolicy]: """....""" + haketilo_settings = haketilo_state.get_settings() + try: payload_data = self.payload_ref.get_data() except state.MissingItemError: - return BlockedResponsePolicy() + return BlockedResponsePolicy(haketilo_settings) if not payload_data.explicitly_enabled and \ - haketilo_state.get_settings().mapping_use_mode != \ + haketilo_settings.mapping_use_mode != \ state.MappingUseMode.AUTO: - return BlockedResponsePolicy() + return BlockedResponsePolicy(haketilo_settings) - return PayloadResourcePolicy(payload_data) + return PayloadResourcePolicy(haketilo_settings, payload_data) diff --git a/src/hydrilla/proxy/policies/rule.py b/src/hydrilla/proxy/policies/rule.py index c62f473..2a6d8cb 100644 --- a/src/hydrilla/proxy/policies/rule.py +++ b/src/hydrilla/proxy/policies/rule.py @@ -52,6 +52,10 @@ class BlockPolicy(base.Policy): priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._TWO + @property + def current_popup_settings(self) -> state.PopupSettings: + return self.haketilo_settings.default_popup_jsblocked + def _csp_to_clear(self, http_info: http_messages.FullHTTPInfo) \ -> t.Sequence[str]: return script_csp_directives @@ -94,7 +98,7 @@ class RuleBlockPolicyFactory(RulePolicyFactory): def make_policy(self, haketilo_state: state.HaketiloState) \ -> RuleBlockPolicy: """....""" - return RuleBlockPolicy(self.pattern) + return RuleBlockPolicy(haketilo_state.get_settings(), self.pattern) @dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc] @@ -103,4 +107,4 @@ class RuleAllowPolicyFactory(RulePolicyFactory): def make_policy(self, haketilo_state: state.HaketiloState) \ -> RuleAllowPolicy: """....""" - return RuleAllowPolicy(self.pattern) + return RuleAllowPolicy(haketilo_state.get_settings(), self.pattern) diff --git a/src/hydrilla/proxy/policies/web_ui.py b/src/hydrilla/proxy/policies/web_ui.py index 284d062..74a0655 100644 --- a/src/hydrilla/proxy/policies/web_ui.py +++ b/src/hydrilla/proxy/policies/web_ui.py @@ -62,7 +62,8 @@ class WebUIPolicyFactory(base.PolicyFactory): ui_domain: t.ClassVar[web_ui.UIDomain] def make_policy(self, haketilo_state: state.HaketiloState) -> WebUIPolicy: - return WebUIPolicy(haketilo_state, self.ui_domain) + haketilo_settings = haketilo_state.get_settings() + return WebUIPolicy(haketilo_settings, haketilo_state, self.ui_domain) @dc.dataclass(frozen=True, unsafe_hash=True) class WebUIMainPolicyFactory(WebUIPolicyFactory): diff --git a/src/hydrilla/proxy/state.py b/src/hydrilla/proxy/state.py index 7fb7bac..f047a68 100644 --- a/src/hydrilla/proxy/state.py +++ b/src/hydrilla/proxy/state.py @@ -511,6 +511,26 @@ class MappingUseMode(Enum): QUESTION = 'Q' +class PopupStyle(Enum): + """ + DIALOG - Make popup open inside an iframe on the current page. + + TAB - Make popup open in a new tab. + """ + DIALOG = 'D' + TAB = 'T' + +@dc.dataclass(frozen=True) +class PopupSettings: + # We'll implement button later. + #button_trigger: bool + keyboard_trigger: bool + style: PopupStyle + + @property + def popup_enabled(self) -> bool: + return self.keyboard_trigger #or self.button_trigger + @dc.dataclass(frozen=True) class HaketiloGlobalSettings: """....""" @@ -519,6 +539,10 @@ class HaketiloGlobalSettings: advanced_user: bool repo_refresh_seconds: int + default_popup_jsallowed: PopupSettings + default_popup_jsblocked: PopupSettings + default_popup_payloadon: PopupSettings + class Logger(ABC): @abstractmethod diff --git a/src/hydrilla/proxy/state_impl/base.py b/src/hydrilla/proxy/state_impl/base.py index 7437d52..f8291d8 100644 --- a/src/hydrilla/proxy/state_impl/base.py +++ b/src/hydrilla/proxy/state_impl/base.py @@ -210,15 +210,15 @@ class HaketiloStateWithFields(st.HaketiloState): best_priority = policy.priority best_policy = policy except Exception as e: - return policies.ErrorBlockPolicy(error=e) + return policies.ErrorBlockPolicy(self.settings, error=e) if best_policy is not None: return best_policy - if self.get_settings().default_allow_scripts: - return policies.FallbackAllowPolicy() + if self.settings.default_allow_scripts: + return policies.FallbackAllowPolicy(self.settings) else: - return policies.FallbackBlockPolicy() + return policies.FallbackBlockPolicy(self.settings) @abstractmethod def import_items(self, malcontent_path: Path, repo_id: int = 1) -> None: diff --git a/src/hydrilla/proxy/state_impl/concrete_state.py b/src/hydrilla/proxy/state_impl/concrete_state.py index c28e360..2dd8810 100644 --- a/src/hydrilla/proxy/state_impl/concrete_state.py +++ b/src/hydrilla/proxy/state_impl/concrete_state.py @@ -113,11 +113,20 @@ def load_settings(cursor: sqlite3.Cursor) -> st.HaketiloGlobalSettings: (default_allow_scripts, advanced_user, repo_refresh_seconds, mapping_use_mode), = cursor.fetchall() + default_popup_settings = st.PopupSettings( + keyboard_trigger = True, + style = st.PopupStyle.TAB + ) + return st.HaketiloGlobalSettings( default_allow_scripts = default_allow_scripts, advanced_user = advanced_user, repo_refresh_seconds = repo_refresh_seconds, - mapping_use_mode = st.MappingUseMode(mapping_use_mode) + mapping_use_mode = st.MappingUseMode(mapping_use_mode), + + default_popup_jsallowed = default_popup_settings, + default_popup_jsblocked = default_popup_settings, + default_popup_payloadon = default_popup_settings ) @dc.dataclass -- cgit v1.2.3