aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-10-20 21:57:47 +0200
committerWojtek Kosior <koszko@koszko.org>2022-10-20 21:57:47 +0200
commit55b95d70b24cfa1b4703ae442a3c6d1781cc95aa (patch)
treec6a33b401af373d7591e46e21dede0609313a140
parent3beab050c339c51c484af9bcd8248ba8ebbbf4d4 (diff)
downloadhaketilo-hydrilla-55b95d70b24cfa1b4703ae442a3c6d1781cc95aa.tar.gz
haketilo-hydrilla-55b95d70b24cfa1b4703ae442a3c6d1781cc95aa.zip
[proxy] rework internal HTTP headers representation
-rw-r--r--src/hydrilla/proxy/addon.py93
-rw-r--r--src/hydrilla/proxy/http_messages.py202
-rw-r--r--src/hydrilla/proxy/policies/__init__.py2
-rw-r--r--src/hydrilla/proxy/policies/base.py15
-rw-r--r--src/hydrilla/proxy/policies/misc.py15
-rw-r--r--src/hydrilla/proxy/policies/payload.py94
-rw-r--r--src/hydrilla/proxy/policies/payload_resource.py61
-rw-r--r--src/hydrilla/proxy/policies/rule.py23
-rw-r--r--src/hydrilla/proxy/policies/web_ui.py2
-rw-r--r--src/hydrilla/proxy/state_impl/concrete_state.py9
-rw-r--r--src/hydrilla/proxy/web_ui/root.py4
-rw-r--r--src/hydrilla/url_patterns.py3
12 files changed, 305 insertions, 218 deletions
diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py
index ae03ecc..de864fc 100644
--- a/src/hydrilla/proxy/addon.py
+++ b/src/hydrilla/proxy/addon.py
@@ -46,49 +46,25 @@ from mitmproxy.script import concurrent
from ..exceptions import HaketiloException
from ..translations import smart_gettext as _
-from ..url_patterns import parse_url, ParsedUrl
+from .. import url_patterns
from .state_impl import ConcreteHaketiloState
from . import state
from . import policies
from . import http_messages
-DefaultGetValue = t.TypeVar('DefaultGetValue', str, None)
-
-class MitmproxyHeadersWrapper():
- """...."""
- def __init__(self, headers: http.Headers) -> None:
- """...."""
- self.headers = headers
-
- __getitem__ = lambda self, key: self.headers[key]
- get_all = lambda self, key: self.headers.get_all(key)
-
- @t.overload
- def get(self, key: str) -> t.Optional[str]:
- ...
- @t.overload
- def get(self, key: str, default: DefaultGetValue) \
- -> t.Union[str, DefaultGetValue]:
- ...
- def get(self, key, default = None):
- value = self.headers.get(key)
-
- if value is None:
- return default
- else:
- return t.cast(str, value)
-
- def items(self) -> t.Iterable[tuple[str, str]]:
- """...."""
- return self.headers.items(multi=True)
-
-
class LoggerToMitmproxy(state.Logger):
def warn(self, msg: str) -> None:
ctx.log.warn(f'Haketilo: {msg}')
+def safe_parse_url(url: str) -> url_patterns.ParsedUrl:
+ try:
+ return url_patterns.parse_url(url)
+ except url_patterns.HaketiloURLException:
+ return url_patterns.dummy_url
+
+
@dc.dataclass
class FlowHandling:
flow: http.HTTPFlow
@@ -114,11 +90,10 @@ class FlowHandling:
if self._bl_response_info is None:
assert self.flow.response is not None
- headers = self.flow.response.headers
- self._bl_response_info = http_messages.BodylessResponseInfo(
- url = parse_url(self.flow.request.url),
+ self._bl_response_info = http_messages.BodylessResponseInfo.make(
+ url = safe_parse_url(self.flow.request.url),
status_code = self.flow.response.status_code,
- headers = MitmproxyHeadersWrapper(headers)
+ headers = self.flow.response.headers
)
return self._bl_response_info
@@ -131,12 +106,15 @@ class FlowHandling:
return self.bl_response_info.with_body(body)
@staticmethod
- def make(flow: http.HTTPFlow, policy: policies.Policy, url: ParsedUrl) \
- -> 'FlowHandling':
- bl_request_info = http_messages.BodylessRequestInfo(
+ def make(
+ flow: http.HTTPFlow,
+ policy: policies.Policy,
+ url: url_patterns.ParsedUrl
+ ) -> 'FlowHandling':
+ bl_request_info = http_messages.BodylessRequestInfo.make(
url = url,
method = flow.request.method,
- headers = MitmproxyHeadersWrapper(flow.request.headers)
+ headers = flow.request.headers
)
return FlowHandling(flow, policy, bl_request_info)
@@ -157,10 +135,6 @@ class PassedOptions:
self.haketilo_launch_browser is not None)
-magical_mitm_it_url_reg = re.compile(r'^http://mitm.it(/.*)?$')
-dummy_url = parse_url('http://dummy.replacement.url')
-
-
@dc.dataclass
class HaketiloAddon:
initial_options: PassedOptions = PassedOptions()
@@ -257,16 +231,13 @@ class HaketiloAddon:
handling = self.handling_dict.get(id(flow))
if handling is None:
- parsed_url = dummy_url
-
- if magical_mitm_it_url_reg.match(flow.request.url):
- policy = policies.DoNothingPolicy()
+ try:
+ parsed_url = url_patterns.parse_url(flow.request.url)
+ except url_patterns.HaketiloURLException as e:
+ policy = policies.ErrorBlockPolicy(builtin=True, error=e)
+ parsed_url = url_patterns.dummy_url
else:
- try:
- parsed_url = parse_url(flow.request.url)
- policy = self.state.select_policy(parsed_url)
- except HaketiloException as e:
- policy = policies.ErrorBlockPolicy(builtin=True, error=e)
+ policy = self.state.select_policy(parsed_url)
handling = FlowHandling.make(flow, policy, parsed_url)
@@ -330,16 +301,18 @@ class HaketiloAddon:
result = handling.policy.consume_request(handling.request_info)
if result is not None:
- if isinstance(result, http_messages.ProducedRequest):
- flow.request.url = result.url
+ mitmproxy_headers = http.Headers(result.headers.items_bin())
+
+ if isinstance(result, http_messages.RequestInfo):
+ flow.request.url = result.url.orig_url
flow.request.method = result.method
- flow.request.headers = http.Headers(result.headers)
+ flow.request.headers = mitmproxy_headers
flow.request.set_content(result.body or None)
else:
- # isinstance(result, http_messages.ProducedResponse)
+ # isinstance(result, http_messages.ResponseInfo)
flow.response = http.Response.make(
status_code = result.status_code,
- headers = http.Headers(result.headers),
+ headers = mitmproxy_headers,
content = result.body
)
@@ -370,8 +343,10 @@ class HaketiloAddon:
response_info = handling.response_info
)
if result is not None:
+ headers_bin = result.headers.items_bin()
+
flow.response.status_code = result.status_code
- flow.response.headers = http.Headers(result.headers)
+ flow.response.headers = http.Headers(headers_bin)
flow.response.set_content(result.body)
self.forget_flow_handling(flow)
diff --git a/src/hydrilla/proxy/http_messages.py b/src/hydrilla/proxy/http_messages.py
index dbf2c63..1bed103 100644
--- a/src/hydrilla/proxy/http_messages.py
+++ b/src/hydrilla/proxy/http_messages.py
@@ -29,6 +29,7 @@
.....
"""
+import re
import dataclasses as dc
import typing as t
import sys
@@ -38,13 +39,42 @@ if sys.version_info >= (3, 8):
else:
from typing_extensions import Protocol
+import mitmproxy.http
+
from .. import url_patterns
DefaultGetValue = t.TypeVar('DefaultGetValue', str, None)
+class _MitmproxyHeadersWrapper():
+ def __init__(self, headers: mitmproxy.http.Headers) -> None:
+ self.headers = headers
+
+ __getitem__ = lambda self, key: self.headers[key]
+ get_all = lambda self, key: self.headers.get_all(key)
+
+ @t.overload
+ def get(self, key: str) -> t.Optional[str]:
+ ...
+ @t.overload
+ def get(self, key: str, default: DefaultGetValue) \
+ -> t.Union[str, DefaultGetValue]:
+ ...
+ def get(self, key, default = None):
+ value = self.headers.get(key)
+
+ if value is None:
+ return default
+ else:
+ return t.cast(str, value)
+
+ def items(self) -> t.Iterable[tuple[str, str]]:
+ return self.headers.items(multi=True)
+
+ def items_bin(self) -> t.Iterable[tuple[bytes, bytes]]:
+ return tuple((key.encode(), val.encode()) for key, val in self.items())
+
class IHeaders(Protocol):
- """...."""
def __getitem__(self, key: str) -> str: ...
def get_all(self, key: str) -> t.Iterable[str]: ...
@@ -59,65 +89,165 @@ class IHeaders(Protocol):
def items(self) -> t.Iterable[tuple[str, str]]: ...
-def encode_headers_items(headers: t.Iterable[tuple[str, str]]) \
- -> t.Iterable[tuple[bytes, bytes]]:
- """...."""
- for name, value in headers:
- yield name.encode(), value.encode()
+ def items_bin(self) -> t.Iterable[tuple[bytes, bytes]]: ...
+
+_AnyHeaders = t.Union[
+ t.Iterable[tuple[bytes, bytes]],
+ t.Iterable[tuple[str, str]],
+ mitmproxy.http.Headers,
+ IHeaders
+]
+
+def make_headers(headers: _AnyHeaders) -> IHeaders:
+ if not isinstance(headers, mitmproxy.http.Headers):
+ if isinstance(headers, t.Iterable):
+ headers = tuple(headers)
+ if not headers or isinstance(headers[0][0], str):
+ headers = ((key.encode(), val.encode()) for key, val in headers)
+
+ headers = mitmproxy.http.Headers(headers)
+ else:
+ # isinstance(headers, IHeaders)
+ return headers
+
+ return _MitmproxyHeadersWrapper(headers)
+
+
+_AnyUrl = t.Union[str, url_patterns.ParsedUrl]
+
+def make_parsed_url(url: t.Union[str, url_patterns.ParsedUrl]) \
+ -> url_patterns.ParsedUrl:
+ return url_patterns.parse_url(url) if isinstance(url, str) else url
+
+
+# For details of 'Content-Type' header's structure, see:
+# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
+content_type_reg = re.compile(r'''
+^
+(?P<mime>[\w-]+/[\w-]+)
+\s*
+(?:
+ ;
+ (?:[^;]*;)* # match possible parameter other than "charset"
+)
+\s*
+charset= # no whitespace allowed in parameter as per RFC
+(?P<encoding>
+ [\w-]+
+ |
+ "[\w-]+" # quotes are optional per RFC
+)
+(?:;[^;]+)* # match possible parameter other than "charset"
+$ # forbid possible dangling characters after closing '"'
+''', re.VERBOSE | re.IGNORECASE)
@dc.dataclass(frozen=True)
-class ProducedRequest:
- """...."""
- url: str
- method: str
- headers: t.Iterable[tuple[bytes, bytes]]
- body: bytes
+class HasHeadersMixin:
+ headers: IHeaders
+
+ def deduce_content_type(self) -> tuple[t.Optional[str], t.Optional[str]]:
+ content_type = self.headers.get('content-type')
+ if content_type is None:
+ return (None, None)
+
+ match = content_type_reg.match(content_type)
+ if match is None:
+ return (None, None)
+
+ mime, encoding = match.group('mime'), match.group('encoding')
+
+ if encoding is not None:
+ encoding = encoding.lower()
+
+ return mime, encoding
+
@dc.dataclass(frozen=True)
-class BodylessRequestInfo:
+class _BaseRequestInfoFields:
url: url_patterns.ParsedUrl
method: str
headers: IHeaders
+@dc.dataclass(frozen=True)
+class BodylessRequestInfo(HasHeadersMixin, _BaseRequestInfoFields):
def with_body(self, body: bytes) -> 'RequestInfo':
return RequestInfo(self.url, self.method, self.headers, body)
+ @staticmethod
+ def make(
+ url: t.Union[str, url_patterns.ParsedUrl],
+ method: str,
+ headers: _AnyHeaders
+ ) -> 'BodylessRequestInfo':
+ url = make_parsed_url(url)
+ return BodylessRequestInfo(url, method, make_headers(headers))
+
@dc.dataclass(frozen=True)
-class RequestInfo(BodylessRequestInfo):
+class RequestInfo(HasHeadersMixin, _BaseRequestInfoFields):
body: bytes
- def make_produced_request(self) -> ProducedRequest:
- return ProducedRequest(
- url = self.url.orig_url,
- method = self.method,
- headers = encode_headers_items(self.headers.items()),
- body = self.body
- )
+ @staticmethod
+ def make(
+ url: _AnyUrl = url_patterns.dummy_url,
+ method: str = 'GET',
+ headers: _AnyHeaders = (),
+ body: bytes = b''
+ ) -> 'RequestInfo':
+ return BodylessRequestInfo.make(url, method, headers).with_body(body)
-@dc.dataclass(frozen=True)
-class ProducedResponse:
- """...."""
- status_code: int
- headers: t.Iterable[tuple[bytes, bytes]]
- body: bytes
@dc.dataclass(frozen=True)
-class BodylessResponseInfo:
- """...."""
+class _BaseResponseInfoFields:
url: url_patterns.ParsedUrl
status_code: int
headers: IHeaders
+@dc.dataclass(frozen=True)
+class BodylessResponseInfo(HasHeadersMixin, _BaseResponseInfoFields):
def with_body(self, body: bytes) -> 'ResponseInfo':
return ResponseInfo(self.url, self.status_code, self.headers, body)
+ @staticmethod
+ def make(
+ url: t.Union[str, url_patterns.ParsedUrl],
+ status_code: int,
+ headers: _AnyHeaders
+ ) -> 'BodylessResponseInfo':
+ url = make_parsed_url(url)
+ return BodylessResponseInfo(url, status_code, make_headers(headers))
+
@dc.dataclass(frozen=True)
-class ResponseInfo(BodylessResponseInfo):
+class ResponseInfo(HasHeadersMixin, _BaseResponseInfoFields):
body: bytes
- def make_produced_response(self) -> ProducedResponse:
- return ProducedResponse(
- status_code = self.status_code,
- headers = encode_headers_items(self.headers.items()),
- body = self.body
- )
+ @staticmethod
+ def make(
+ url: _AnyUrl = url_patterns.dummy_url,
+ status_code: int = 404,
+ headers: _AnyHeaders = (),
+ body: bytes = b''
+ ) -> 'ResponseInfo':
+ bl_info = BodylessResponseInfo.make(url, status_code, headers)
+ return bl_info.with_body(body)
+
+
+def is_likely_a_page(
+ request_info: t.Union[BodylessRequestInfo, RequestInfo],
+ response_info: t.Union[BodylessResponseInfo, ResponseInfo]
+) -> bool:
+ fetch_dest = request_info.headers.get('sec-fetch-dest')
+ if fetch_dest is None:
+ if 'html' in request_info.headers.get('accept', ''):
+ fetch_dest = 'document'
+ else:
+ fetch_dest = 'unknown'
+
+ if fetch_dest not in ('document', 'iframe', 'frame', 'embed', 'object'):
+ return False
+
+ mime, encoding = response_info.deduce_content_type()
+
+ # Right now out of all response headers we're only taking Content-Type into
+ # account. In the future we might also want to consider the
+ # Content-Disposition header.
+ return mime is not None and 'html' in mime
diff --git a/src/hydrilla/proxy/policies/__init__.py b/src/hydrilla/proxy/policies/__init__.py
index e958cbd..2276177 100644
--- a/src/hydrilla/proxy/policies/__init__.py
+++ b/src/hydrilla/proxy/policies/__init__.py
@@ -13,6 +13,6 @@ from .payload_resource import PayloadResourcePolicyFactory
from .rule import RuleBlockPolicyFactory, RuleAllowPolicyFactory
from .misc import FallbackAllowPolicy, FallbackBlockPolicy, ErrorBlockPolicy, \
- DoNothingPolicy
+ MitmItPagePolicyFactory
from .web_ui import WebUIMainPolicyFactory, WebUILandingPolicyFactory
diff --git a/src/hydrilla/proxy/policies/base.py b/src/hydrilla/proxy/policies/base.py
index fcdbf9d..8ea792f 100644
--- a/src/hydrilla/proxy/policies/base.py
+++ b/src/hydrilla/proxy/policies/base.py
@@ -48,9 +48,9 @@ class PolicyPriority(int, enum.Enum):
_TWO = 2
_THREE = 3
-ProducedMessage = t.Union[
- http_messages.ProducedRequest,
- http_messages.ProducedResponse
+MessageInfo = t.Union[
+ http_messages.RequestInfo,
+ http_messages.ResponseInfo
]
class Policy(ABC):
@@ -75,7 +75,7 @@ class Policy(ABC):
return self._process_response
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> t.Optional[ProducedMessage]:
+ -> t.Optional[MessageInfo]:
raise NotImplementedError(
'This kind of policy does not consume requests.'
)
@@ -84,7 +84,7 @@ class Policy(ABC):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> t.Optional[http_messages.ProducedResponse]:
+ ) -> t.Optional[http_messages.ResponseInfo]:
raise NotImplementedError(
'This kind of policy does not consume responses.'
)
@@ -109,6 +109,11 @@ class PolicyFactory(ABC):
sorting_keys.get(other.__class__.__name__, 999)
sorting_order = (
+ 'WebUIMainPolicyFactory',
+ 'WebUILandingPolicyFactory',
+
+ 'MitmItPagePolicyFactory',
+
'PayloadResourcePolicyFactory',
'PayloadPolicyFactory',
diff --git a/src/hydrilla/proxy/policies/misc.py b/src/hydrilla/proxy/policies/misc.py
index 71692b3..81875a2 100644
--- a/src/hydrilla/proxy/policies/misc.py
+++ b/src/hydrilla/proxy/policies/misc.py
@@ -58,16 +58,19 @@ class ErrorBlockPolicy(BlockPolicy):
builtin: bool = True
-class DoNothingPolicy(base.Policy):
+
+class MitmItPagePolicy(base.Policy):
"""
A special policy class for handling of the magical mitm.it domain. It causes
- request and response not to be modified in any way, and also (unlike
+ request and response not to be modified in any way and also (unlike
FallbackAllowPolicy) prevents them from being streamed.
"""
_process_request: t.ClassVar[bool] = True
_process_response: t.ClassVar[bool] = True
anticache: t.ClassVar[bool] = False
+ priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._THREE
+
def consume_request(self, request_info: http_messages.RequestInfo) -> None:
return None
@@ -79,3 +82,11 @@ class DoNothingPolicy(base.Policy):
return None
builtin: bool = True
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class MitmItPagePolicyFactory(base.PolicyFactory):
+ builtin: bool = True
+
+ def make_policy(self, haketilo_state: state.HaketiloState) \
+ -> MitmItPagePolicy:
+ return MitmItPagePolicy()
diff --git a/src/hydrilla/proxy/policies/payload.py b/src/hydrilla/proxy/policies/payload.py
index 5b71af7..b89a1c1 100644
--- a/src/hydrilla/proxy/policies/payload.py
+++ b/src/hydrilla/proxy/policies/payload.py
@@ -31,7 +31,6 @@
import dataclasses as dc
import typing as t
-import re
from urllib.parse import urlencode
@@ -91,45 +90,6 @@ class PayloadAwarePolicyFactory(base.PolicyFactory):
return super().__lt__(other)
-# For details of 'Content-Type' header's structure, see:
-# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
-content_type_reg = re.compile(r'''
-^
-(?P<mime>[\w-]+/[\w-]+)
-\s*
-(?:
- ;
- (?:[^;]*;)* # match possible parameter other than "charset"
-)
-\s*
-charset= # no whitespace allowed in parameter as per RFC
-(?P<encoding>
- [\w-]+
- |
- "[\w-]+" # quotes are optional per RFC
-)
-(?:;[^;]+)* # match possible parameter other than "charset"
-$ # forbid possible dangling characters after closing '"'
-''', re.VERBOSE | re.IGNORECASE)
-
-def deduce_content_type(headers: http_messages.IHeaders) \
- -> tuple[t.Optional[str], t.Optional[str]]:
- """...."""
- content_type = headers.get('content-type')
- if content_type is None:
- return (None, None)
-
- match = content_type_reg.match(content_type)
- if match is None:
- return (None, None)
-
- mime, encoding = match.group('mime'), match.group('encoding')
-
- if encoding is not None:
- encoding = encoding.lower()
-
- return mime, encoding
-
UTF8_BOM = b'\xEF\xBB\xBF'
BOMs = (
(UTF8_BOM, 'utf-8'),
@@ -174,15 +134,17 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
))
def _modify_headers(self, response_info: http_messages.ResponseInfo) \
- -> t.Iterable[tuple[bytes, bytes]]:
- """...."""
- for header_name, header_value in response_info.headers.items():
- if header_name.lower() not in csp.header_names_and_dispositions:
- yield header_name.encode(), header_value.encode()
+ -> http_messages.IHeaders:
+ new_headers = []
+
+ for key, val in response_info.headers.items():
+ if key.lower() not in csp.header_names_and_dispositions:
+ new_headers.append((key, val))
new_csp = self._new_csp(response_info.url)
+ new_headers.append(('Content-Security-Policy', new_csp))
- yield b'Content-Security-Policy', new_csp.encode()
+ return http_messages.make_headers(new_headers)
def _script_urls(self, url: ParsedUrl) -> t.Iterable[str]:
"""...."""
@@ -231,22 +193,18 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
def _consume_response_unsafe(
self,
+ request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
- """...."""
- new_response = response_info.make_produced_response()
-
+ ) -> http_messages.ResponseInfo:
new_headers = self._modify_headers(response_info)
+ new_response = dc.replace(response_info, headers=new_headers)
- new_response = dc.replace(new_response, headers=new_headers)
-
- mime, encoding = deduce_content_type(response_info.headers)
- if mime is None or 'html' not in mime.lower():
+ if not http_messages.is_likely_a_page(request_info, response_info):
return new_response
data = response_info.body
- if data is None:
- data = b''
+
+ _, encoding = response_info.deduce_content_type()
# A UTF BOM overrides encoding specified by the header.
for bom, encoding_name in BOMs:
@@ -261,9 +219,9 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
try:
- return self._consume_response_unsafe(response_info)
+ return self._consume_response_unsafe(request_info, response_info)
except Exception as e:
# TODO: actually describe the errors
import traceback
@@ -274,10 +232,10 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
e.__traceback__
)
- return http_messages.ProducedResponse(
- 500,
- ((b'Content-Type', b'text/plain; charset=utf-8'),),
- '\n'.join(error_info_list).encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 500,
+ headers = (('Content-Type', 'text/plain; charset=utf-8'),),
+ body = '\n'.join(error_info_list).encode()
)
@@ -292,7 +250,7 @@ class AutoPayloadInjectPolicy(PayloadInjectPolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
try:
if self.payload_data.ref.has_problems():
raise _PayloadHasProblemsError()
@@ -317,9 +275,9 @@ class AutoPayloadInjectPolicy(PayloadInjectPolicy):
redirect_url = 'https://hkt.mitm.it/auto_install_error?' + query
msg = 'Error occured when installing payload. Redirecting.'
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = 303,
- headers = [(b'Location', redirect_url.encode())],
+ headers = [('Location', redirect_url)],
body = msg.encode()
)
@@ -332,7 +290,7 @@ class PayloadSuggestPolicy(PayloadAwarePolicy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._ONE
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
+ -> http_messages.ResponseInfo:
query = self._payload_details_to_signed_query_string(
_salt = 'package_suggestion',
next_url = request_info.url.orig_url
@@ -341,9 +299,9 @@ class PayloadSuggestPolicy(PayloadAwarePolicy):
redirect_url = 'https://hkt.mitm.it/package_suggestion?' + query
msg = 'A package was found that could be used on this site. Redirecting.'
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = 303,
- headers = [(b'Location', redirect_url.encode())],
+ headers = [('Location', redirect_url)],
body = msg.encode()
)
diff --git a/src/hydrilla/proxy/policies/payload_resource.py b/src/hydrilla/proxy/policies/payload_resource.py
index ae6a490..10a43e6 100644
--- a/src/hydrilla/proxy/policies/payload_resource.py
+++ b/src/hydrilla/proxy/policies/payload_resource.py
@@ -219,9 +219,9 @@ def merge_response_headers(
)
-ProducedAny = t.Union[
- http_messages.ProducedResponse,
- http_messages.ProducedRequest
+MessageInfo = t.Union[
+ http_messages.ResponseInfo,
+ http_messages.RequestInfo
]
@dc.dataclass(frozen=True)
@@ -251,31 +251,30 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
== ('api', 'unrestricted_http')
def _make_file_resource_response(self, path: tuple[str, ...]) \
- -> http_messages.ProducedResponse:
- """...."""
+ -> http_messages.ResponseInfo:
try:
file_data = self.payload_data.ref.get_file_data(path)
except state.MissingItemError:
return resource_blocked_response
if file_data is None:
- return http_messages.ProducedResponse(
- 404,
- [(b'Content-Type', b'text/plain; charset=utf-8')],
- _('api.file_not_found').encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 404,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body =_('api.file_not_found').encode()
)
- return http_messages.ProducedResponse(
- 200,
- ((b'Content-Type', file_data.mime_type.encode()),),
- file_data.contents
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', file_data.mime_type)],
+ body = file_data.contents
)
def _make_api_response(
self,
path: tuple[str, ...],
request_info: http_messages.RequestInfo
- ) -> ProducedAny:
+ ) -> MessageInfo:
if path[0] == 'page_init_script.js':
with jinja_lock:
template = jinja_env.get_template('page_init_script.js.jinja')
@@ -288,10 +287,10 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
haketilo_version = encode_string_for_js(ver_str)
)
- return http_messages.ProducedResponse(
- 200,
- ((b'Content-Type', b'application/javascript'),),
- js.encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', 'application/javascript')],
+ body = js.encode()
)
if path[0] == 'unrestricted_http':
@@ -315,13 +314,10 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
extra_headers = extra_headers
)
- result_headers_bytes = \
- [(h.encode(), v.encode()) for h, v in result_headers]
-
- return http_messages.ProducedRequest(
+ return http_messages.RequestInfo.make(
url = target_url,
method = request_info.method,
- headers = result_headers_bytes,
+ headers = result_headers,
body = request_info.body
)
except:
@@ -330,7 +326,7 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
return resource_blocked_response
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> ProducedAny:
+ -> MessageInfo:
resource_path = self.extract_resource_path(request_info.url)
if resource_path == ():
@@ -346,7 +342,7 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
"""
This method shall only be called for responses to unrestricted HTTP API
requests. Its purpose is to sanitize response headers and smuggle their
@@ -375,17 +371,17 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
extra_headers = extra_headers
)
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = response_info.status_code,
- headers = [(h.encode(), v.encode()) for h, v in merged_headers],
+ headers = merged_headers,
body = response_info.body,
)
-resource_blocked_response = http_messages.ProducedResponse(
- 403,
- [(b'Content-Type', b'text/plain; charset=utf-8')],
- _('api.resource_not_enabled_for_access').encode()
+resource_blocked_response = http_messages.ResponseInfo.make(
+ status_code = 403,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body = _('api.resource_not_enabled_for_access').encode()
)
@dc.dataclass(frozen=True)
@@ -396,8 +392,7 @@ class BlockedResponsePolicy(base.Policy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._THREE
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
- """...."""
+ -> http_messages.ResponseInfo:
return resource_blocked_response
diff --git a/src/hydrilla/proxy/policies/rule.py b/src/hydrilla/proxy/policies/rule.py
index 2e9443e..8272d2f 100644
--- a/src/hydrilla/proxy/policies/rule.py
+++ b/src/hydrilla/proxy/policies/rule.py
@@ -50,13 +50,14 @@ class BlockPolicy(base.Policy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._TWO
def _modify_headers(self, response_info: http_messages.ResponseInfo) \
- -> t.Iterable[tuple[bytes, bytes]]:
- """...."""
+ -> http_messages.IHeaders:
+ new_headers = []
+
csp_policies = csp.extract(response_info.headers)
- for header_name, header_value in response_info.headers.items():
- if header_name.lower() not in csp.header_names_and_dispositions:
- yield header_name.encode(), header_value.encode()
+ for key, val in response_info.headers.items():
+ if key.lower() not in csp.header_names_and_dispositions:
+ new_headers.append((key, val))
for policy in csp_policies:
if policy.disposition != 'enforce':
@@ -68,7 +69,7 @@ class BlockPolicy(base.Policy):
policy = dc.replace(policy, directives=directives.finish())
- yield policy.header_name.encode(), policy.serialize().encode()
+ new_headers.append((policy.header_name, policy.serialize()))
extra_csp = ';'.join((
"script-src 'none'",
@@ -76,19 +77,19 @@ class BlockPolicy(base.Policy):
"script-src-attr 'none'"
))
- yield b'Content-Security-Policy', extra_csp.encode()
+ new_headers.append(('Content-Security-Policy', extra_csp))
+
+ return http_messages.make_headers(new_headers)
def consume_response(
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
- new_response = response_info.make_produced_response()
-
+ ) -> http_messages.ResponseInfo:
new_headers = self._modify_headers(response_info)
- return dc.replace(new_response, headers=new_headers)
+ return dc.replace(response_info, headers=new_headers)
@dc.dataclass(frozen=True)
class RuleAllowPolicy(AllowPolicy):
diff --git a/src/hydrilla/proxy/policies/web_ui.py b/src/hydrilla/proxy/policies/web_ui.py
index f35b0b7..284d062 100644
--- a/src/hydrilla/proxy/policies/web_ui.py
+++ b/src/hydrilla/proxy/policies/web_ui.py
@@ -50,7 +50,7 @@ class WebUIPolicy(base.Policy):
ui_domain: web_ui.UIDomain
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
+ -> http_messages.ResponseInfo:
return web_ui.process_request(
request_info = request_info,
state = self.haketilo_state,
diff --git a/src/hydrilla/proxy/state_impl/concrete_state.py b/src/hydrilla/proxy/state_impl/concrete_state.py
index 83522cf..c28e360 100644
--- a/src/hydrilla/proxy/state_impl/concrete_state.py
+++ b/src/hydrilla/proxy/state_impl/concrete_state.py
@@ -223,6 +223,15 @@ class ConcreteHaketiloState(base.HaketiloStateWithFields):
item = web_ui_landing_factory
)
+ mitm_it_page_pattern = 'http://mitm.it/***'
+ mitm_it_page_factory = policies.MitmItPagePolicyFactory()
+
+ parsed_pattern, = url_patterns.parse_pattern(mitm_it_page_pattern)
+ new_policy_tree = new_policy_tree.register(
+ parsed_pattern = parsed_pattern,
+ item = mitm_it_page_factory
+ )
+
# Put script blocking/allowing rules in policy tree.
cursor.execute('SELECT pattern, allow_scripts FROM rules;')
diff --git a/src/hydrilla/proxy/web_ui/root.py b/src/hydrilla/proxy/web_ui/root.py
index 57dc958..3120d0e 100644
--- a/src/hydrilla/proxy/web_ui/root.py
+++ b/src/hydrilla/proxy/web_ui/root.py
@@ -191,7 +191,7 @@ def process_request(
request_info: http_messages.RequestInfo,
state: st.HaketiloState,
ui_domain: _app.UIDomain = _app.UIDomain.MAIN
-) -> http_messages.ProducedResponse:
+) -> http_messages.ResponseInfo:
path = '/'.join(('', *request_info.url.path_segments))
if (request_info.url.has_trailing_slash):
path += '/'
@@ -218,7 +218,7 @@ def process_request(
in flask_response.headers
]
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = flask_response.status_code,
headers = headers_bytes,
body = flask_response.data
diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py
index cc68820..5e62a28 100644
--- a/src/hydrilla/url_patterns.py
+++ b/src/hydrilla/url_patterns.py
@@ -240,3 +240,6 @@ def normalize_pattern(url_pattern: str) -> str:
reconstructed = replace_scheme_regex.sub('http*', reconstructed)
return reconstructed
+
+
+dummy_url = parse_url('http://dummy.replacement.url')