aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/proxy/policies/payload_resource.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/hydrilla/proxy/policies/payload_resource.py')
-rw-r--r--src/hydrilla/proxy/policies/payload_resource.py398
1 files changed, 398 insertions, 0 deletions
diff --git a/src/hydrilla/proxy/policies/payload_resource.py b/src/hydrilla/proxy/policies/payload_resource.py
new file mode 100644
index 0000000..0d73242
--- /dev/null
+++ b/src/hydrilla/proxy/policies/payload_resource.py
@@ -0,0 +1,398 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+# Policies for resolving HTTP requests with local resources.
+#
+# This file is part of Hydrilla&Haketilo.
+#
+# Copyright (C) 2022 Wojtek Kosior
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <https://www.gnu.org/licenses/>.
+#
+#
+# I, Wojtek Kosior, thereby promise not to sue for violation of this
+# file's license. Although I request that you do not make use of this
+# code in a proprietary program, I am not going to enforce this in
+# court.
+
+"""
+We make file resources available to HTTP clients by mapping them
+at:
+ http(s)://<pattern-matching_origin>/<pattern_path>/<token>/
+where <token> is a per-session secret unique for every mapping.
+For example, a payload with pattern like the following:
+ http*://***.example.com/a/b/**
+Could cause resources to be mapped (among others) at each of:
+ https://example.com/a/b/**/Da2uiF2UGfg/
+ https://www.example.com/a/b/**/Da2uiF2UGfg/
+ http://gnome.vs.kde.example.com/a/b/**/Da2uiF2UGfg/
+
+Unauthorized web pages running in the user's browser are exected to be
+unable to guess the secret. This way we stop them from spying on the
+user and from interfering with Haketilo's normal operation.
+
+This is only a soft prevention method. With some mechanisms
+(e.g. service workers), under certain scenarios, it might be possible
+to bypass it. Thus, to make the risk slightly smaller, we also block
+the unauthorized accesses that we can detect.
+
+Since a web page authorized to access the resources may only be served
+when the corresponding mapping is enabled (or AUTO mode is on), we
+consider accesses to non-enabled mappings' resources a security breach
+and block them by responding with 403 Forbidden.
+"""
+
+import dataclasses as dc
+import typing as t
+import json
+
+from base64 import b64encode
+from urllib.parse import quote, parse_qs, urlparse, urlencode, urljoin
+
+from ...translations import smart_gettext as _
+from ...url_patterns import ParsedUrl
+from ...versions import haketilo_version
+from .. import state
+from .. import http_messages
+from . import base
+from .payload import PayloadAwarePolicy, PayloadAwarePolicyFactory
+
+
+def encode_string_for_js(string: str) -> str:
+ return b64encode(quote(string).encode()).decode()
+
+
+AnyValue = t.TypeVar('AnyValue', bound=object)
+
+def header_keys(headers: t.Iterable[tuple[str, AnyValue]]) -> frozenset[str]:
+ return frozenset(header.lower() for header, _ in headers)
+
+def _merge_headers(
+ standard_headers: t.Iterable[tuple[str, t.Optional[str]]],
+ overridable_headers_keys: frozenset[str],
+ native_headers: http_messages.IHeaders,
+ extra_headers: t.Iterable[tuple[str, str]]
+) -> t.Iterable[tuple[str, str]]:
+ standard_keys = header_keys(standard_headers)
+ standard_iterator = iter(standard_headers)
+ native_keys = header_keys(native_headers.items())
+
+ selected_base: list[tuple[str, str]] = []
+ processed: set[str] = set()
+
+ for header, _ in native_headers.items():
+ header_l = header.lower()
+
+ if header_l in processed or header_l not in standard_keys:
+ continue
+
+ for standard_header_l, chosen_value in standard_iterator:
+ if standard_header_l not in native_keys:
+ if chosen_value is not None:
+ selected_base.append((standard_header_l, chosen_value))
+ elif standard_header_l == header_l:
+ processed.add(header_l)
+
+ if header_l in overridable_headers_keys:
+ chosen_value = native_headers.get(header_l, chosen_value)
+
+ if chosen_value is not None:
+ selected_base.append((header, chosen_value))
+
+ break
+
+ for standard_header_l, standard_value in standard_iterator:
+ if standard_value is not None:
+ selected_base.append((standard_header_l, standard_value))
+
+ extra_keys = header_keys(extra_headers)
+ extra_iterator = iter(extra_headers)
+
+ result: list[tuple[str, str]] = []
+ processed = set()
+
+ for header, value in selected_base:
+ header_l = header.lower()
+
+ if header_l in processed:
+ continue
+
+ if header_l in extra_keys:
+ for extra_header, extra_value in extra_iterator:
+ extra_header_l = extra_header.lower()
+
+ processed.add(extra_header_l)
+
+ result.append((extra_header, extra_value))
+
+ if extra_header_l == header_l:
+ break
+ else:
+ result.append((header, value))
+
+ result.extend(extra_iterator)
+
+ return result
+
+request_standard_headers: t.Iterable[tuple[str, t.Optional[str]]] = (
+ ('user-agent', None),
+ ('accept', 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8'),
+ ('accept-language', 'en-US,en;q=0.5'),
+ ('accept-encoding', None),
+ ('dnt', '1'),
+ ('connection', None),
+ ('upgrade-insecure-requests', '1'),
+ ('sec-fetch-dest', 'document'),
+ ('sec-fetch-mode', 'navigate'),
+ ('sec-fetch-site', 'none'),
+ ('sec-fetch-user', '?1'),
+ ('te', 'trailers')
+)
+
+auto_overridable_request_headers = frozenset((
+ 'user-agent',
+ 'accept-language',
+ 'accept-encoding',
+ 'dnt'
+))
+
+def merge_request_headers(
+ native_headers: http_messages.IHeaders,
+ extra_headers: t.Iterable[tuple[str, str]]
+) -> t.Iterable[tuple[str, str]]:
+ return _merge_headers(
+ standard_headers = request_standard_headers,
+ overridable_headers_keys = auto_overridable_request_headers,
+ native_headers = native_headers,
+ extra_headers = extra_headers
+ )
+
+response_standard_headers: t.Iterable[tuple[str, t.Optional[str]]] = (
+ ('cache-control', 'max-age=0, private, must-revalidate'),
+ ('connection', None),
+ ('content-length', None),
+ ('content-type', None),
+ ('date', None),
+ ('keep-alive', None),
+ ('server', None)
+)
+
+auto_overridable_response_headers = frozenset(
+ header.lower()
+ for header, value in response_standard_headers
+ if value is None
+)
+
+def merge_response_headers(
+ native_headers: http_messages.IHeaders,
+ extra_headers: t.Iterable[tuple[str, str]]
+) -> t.Iterable[tuple[str, str]]:
+ return _merge_headers(
+ standard_headers = response_standard_headers,
+ overridable_headers_keys = auto_overridable_response_headers,
+ native_headers = native_headers,
+ extra_headers = extra_headers
+ )
+
+
+MessageInfo = t.Union[
+ http_messages.ResponseInfo,
+ http_messages.RequestInfo
+]
+
+@dc.dataclass(frozen=True)
+class PayloadResourcePolicy(PayloadAwarePolicy):
+ _process_request = base.MsgProcessOpt.MUST
+
+ priority = base.PolicyPriority._THREE
+
+ def extract_resource_path(self, request_url: ParsedUrl) -> tuple[str, ...]:
+ # Payload resource pattern has path of the form:
+ # "/some/arbitrary/segments/<per-session_token>/***"
+ #
+ # Corresponding requests shall have path of the form:
+ # "/some/arbitrary/segments/<per-session_token>/actual/resource/path"
+ #
+ # Here we need to extract the "/actual/resource/path" part.
+ segments_to_drop = len(self.payload_data.pattern_path_segments) + 1
+ return request_url.path_segments[segments_to_drop:]
+
+ def should_process_response(
+ self,
+ request_info: http_messages.RequestInfo,
+ response_info: http_messages.AnyResponseInfo
+ ) -> bool:
+ return self.extract_resource_path(request_info.url) \
+ == ('api', 'unrestricted_http')
+
+ def _make_file_resource_response(self, path: tuple[str, ...]) \
+ -> http_messages.ResponseInfo:
+ try:
+ file_data = self.payload_data.ref.get_file_data(path)
+ except state.MissingItemError:
+ return resource_blocked_response
+
+ if file_data is None:
+ return http_messages.ResponseInfo.make(
+ status_code = 404,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body =_('api.file_not_found').encode()
+ )
+
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', file_data.mime_type)],
+ body = file_data.contents
+ )
+
+ def _make_api_response(
+ self,
+ path: tuple[str, ...],
+ request_info: http_messages.RequestInfo
+ ) -> MessageInfo:
+ if path[0] == 'page_init_script.js':
+ template = base.get_script_template('page_init_script.js.jinja')
+
+ token = self.payload_data.unique_token
+ base_url = self._assets_base_url(request_info.url)
+ ver_str = json.dumps(haketilo_version)
+ js = template.render(
+ unique_token_encoded = encode_string_for_js(token),
+ assets_base_url_encoded = encode_string_for_js(base_url),
+ haketilo_version = encode_string_for_js(ver_str)
+ )
+
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', 'application/javascript')],
+ body = js.encode()
+ )
+
+ if path[0] == 'unrestricted_http':
+ try:
+ assert self.payload_data.cors_bypass_allowed
+
+ params = parse_qs(request_info.url.query)
+ target_url, = params['target_url']
+ extra_headers_str, = params['extra_headers']
+
+ assert urlparse(target_url).scheme in ('http', 'https')
+
+ extra_headers = json.loads(extra_headers_str)
+ assert isinstance(extra_headers, list)
+ for header, value in extra_headers:
+ assert isinstance(header, str)
+ assert isinstance(value, str)
+
+ result_headers = merge_request_headers(
+ native_headers = request_info.headers,
+ extra_headers = extra_headers
+ )
+
+ return http_messages.RequestInfo.make(
+ url = target_url,
+ method = request_info.method,
+ headers = result_headers,
+ body = request_info.body
+ )
+ except:
+ return resource_blocked_response
+ else:
+ return resource_blocked_response
+
+ def consume_request(self, request_info: http_messages.RequestInfo) \
+ -> MessageInfo:
+ resource_path = self.extract_resource_path(request_info.url)
+
+ if resource_path == ():
+ return resource_blocked_response
+ elif resource_path[0] == 'static':
+ return self._make_file_resource_response(resource_path[1:])
+ elif resource_path[0] == 'api':
+ return self._make_api_response(resource_path[1:], request_info)
+ else:
+ return resource_blocked_response
+
+ def consume_response(self, http_info: http_messages.FullHTTPInfo) \
+ -> http_messages.ResponseInfo:
+ """
+ This method shall only be called for responses to unrestricted HTTP API
+ requests. Its purpose is to sanitize response headers and smuggle their
+ original data using an additional header.
+ """
+ serialized = json.dumps([*http_info.response_info.headers.items()])
+ extra_headers = [('X-Haketilo-True-Headers', quote(serialized)),]
+
+ # Greetings, adventurous code dweller! It's amazing you made it that
+ # deep. I hope you're having a good day. If not, read Isaiah 49:15 :)
+ if (300 <= http_info.response_info.status_code < 400):
+ location = http_info.response_info.headers.get('location')
+ if location is not None:
+ orig_params = parse_qs(http_info.request_info.url.query)
+ orig_extra_headers_str, = orig_params['extra_headers']
+
+ new_query = urlencode({
+ 'target_url': location,
+ 'extra_headers': orig_extra_headers_str
+ })
+
+ orig_url = http_info.request_info.url.orig_url
+ new_url = urljoin(orig_url, '?' + new_query)
+
+ extra_headers.append(('location', new_url))
+
+ merged_headers = merge_response_headers(
+ native_headers = http_info.response_info.headers,
+ extra_headers = extra_headers
+ )
+
+ return dc.replace(http_info.response_info, headers=merged_headers)
+
+
+resource_blocked_response = http_messages.ResponseInfo.make(
+ status_code = 403,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body = _('api.resource_not_enabled_for_access').encode()
+)
+
+@dc.dataclass(frozen=True)
+class BlockedResponsePolicy(base.Policy):
+ _process_request = base.MsgProcessOpt.MUST
+ _process_response = base.MsgProcessOpt.MUST_NOT
+
+ priority = base.PolicyPriority._THREE
+
+ def consume_request(self, request_info: http_messages.RequestInfo) \
+ -> http_messages.ResponseInfo:
+ return resource_blocked_response
+
+
+@dc.dataclass(frozen=True, unsafe_hash=True) # type: ignore[misc]
+class PayloadResourcePolicyFactory(PayloadAwarePolicyFactory):
+ """...."""
+ def make_policy(self, haketilo_state: state.HaketiloState) \
+ -> t.Union[PayloadResourcePolicy, BlockedResponsePolicy]:
+ """...."""
+ haketilo_settings = haketilo_state.get_settings()
+
+ try:
+ payload_data = self.payload_ref.get_data()
+ except state.MissingItemError:
+ return BlockedResponsePolicy(haketilo_settings)
+
+ if not payload_data.explicitly_enabled and \
+ haketilo_settings.mapping_use_mode != \
+ state.MappingUseMode.AUTO:
+ return BlockedResponsePolicy(haketilo_settings)
+
+ return PayloadResourcePolicy(haketilo_settings, payload_data)