aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/proxy/policies
diff options
context:
space:
mode:
Diffstat (limited to 'src/hydrilla/proxy/policies')
-rw-r--r--src/hydrilla/proxy/policies/__init__.py2
-rw-r--r--src/hydrilla/proxy/policies/base.py15
-rw-r--r--src/hydrilla/proxy/policies/misc.py15
-rw-r--r--src/hydrilla/proxy/policies/payload.py94
-rw-r--r--src/hydrilla/proxy/policies/payload_resource.py61
-rw-r--r--src/hydrilla/proxy/policies/rule.py23
-rw-r--r--src/hydrilla/proxy/policies/web_ui.py2
7 files changed, 91 insertions, 121 deletions
diff --git a/src/hydrilla/proxy/policies/__init__.py b/src/hydrilla/proxy/policies/__init__.py
index e958cbd..2276177 100644
--- a/src/hydrilla/proxy/policies/__init__.py
+++ b/src/hydrilla/proxy/policies/__init__.py
@@ -13,6 +13,6 @@ from .payload_resource import PayloadResourcePolicyFactory
from .rule import RuleBlockPolicyFactory, RuleAllowPolicyFactory
from .misc import FallbackAllowPolicy, FallbackBlockPolicy, ErrorBlockPolicy, \
- DoNothingPolicy
+ MitmItPagePolicyFactory
from .web_ui import WebUIMainPolicyFactory, WebUILandingPolicyFactory
diff --git a/src/hydrilla/proxy/policies/base.py b/src/hydrilla/proxy/policies/base.py
index fcdbf9d..8ea792f 100644
--- a/src/hydrilla/proxy/policies/base.py
+++ b/src/hydrilla/proxy/policies/base.py
@@ -48,9 +48,9 @@ class PolicyPriority(int, enum.Enum):
_TWO = 2
_THREE = 3
-ProducedMessage = t.Union[
- http_messages.ProducedRequest,
- http_messages.ProducedResponse
+MessageInfo = t.Union[
+ http_messages.RequestInfo,
+ http_messages.ResponseInfo
]
class Policy(ABC):
@@ -75,7 +75,7 @@ class Policy(ABC):
return self._process_response
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> t.Optional[ProducedMessage]:
+ -> t.Optional[MessageInfo]:
raise NotImplementedError(
'This kind of policy does not consume requests.'
)
@@ -84,7 +84,7 @@ class Policy(ABC):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> t.Optional[http_messages.ProducedResponse]:
+ ) -> t.Optional[http_messages.ResponseInfo]:
raise NotImplementedError(
'This kind of policy does not consume responses.'
)
@@ -109,6 +109,11 @@ class PolicyFactory(ABC):
sorting_keys.get(other.__class__.__name__, 999)
sorting_order = (
+ 'WebUIMainPolicyFactory',
+ 'WebUILandingPolicyFactory',
+
+ 'MitmItPagePolicyFactory',
+
'PayloadResourcePolicyFactory',
'PayloadPolicyFactory',
diff --git a/src/hydrilla/proxy/policies/misc.py b/src/hydrilla/proxy/policies/misc.py
index 71692b3..81875a2 100644
--- a/src/hydrilla/proxy/policies/misc.py
+++ b/src/hydrilla/proxy/policies/misc.py
@@ -58,16 +58,19 @@ class ErrorBlockPolicy(BlockPolicy):
builtin: bool = True
-class DoNothingPolicy(base.Policy):
+
+class MitmItPagePolicy(base.Policy):
"""
A special policy class for handling of the magical mitm.it domain. It causes
- request and response not to be modified in any way, and also (unlike
+ request and response not to be modified in any way and also (unlike
FallbackAllowPolicy) prevents them from being streamed.
"""
_process_request: t.ClassVar[bool] = True
_process_response: t.ClassVar[bool] = True
anticache: t.ClassVar[bool] = False
+ priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._THREE
+
def consume_request(self, request_info: http_messages.RequestInfo) -> None:
return None
@@ -79,3 +82,11 @@ class DoNothingPolicy(base.Policy):
return None
builtin: bool = True
+
+@dc.dataclass(frozen=True, unsafe_hash=True)
+class MitmItPagePolicyFactory(base.PolicyFactory):
+ builtin: bool = True
+
+ def make_policy(self, haketilo_state: state.HaketiloState) \
+ -> MitmItPagePolicy:
+ return MitmItPagePolicy()
diff --git a/src/hydrilla/proxy/policies/payload.py b/src/hydrilla/proxy/policies/payload.py
index 5b71af7..b89a1c1 100644
--- a/src/hydrilla/proxy/policies/payload.py
+++ b/src/hydrilla/proxy/policies/payload.py
@@ -31,7 +31,6 @@
import dataclasses as dc
import typing as t
-import re
from urllib.parse import urlencode
@@ -91,45 +90,6 @@ class PayloadAwarePolicyFactory(base.PolicyFactory):
return super().__lt__(other)
-# For details of 'Content-Type' header's structure, see:
-# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
-content_type_reg = re.compile(r'''
-^
-(?P<mime>[\w-]+/[\w-]+)
-\s*
-(?:
- ;
- (?:[^;]*;)* # match possible parameter other than "charset"
-)
-\s*
-charset= # no whitespace allowed in parameter as per RFC
-(?P<encoding>
- [\w-]+
- |
- "[\w-]+" # quotes are optional per RFC
-)
-(?:;[^;]+)* # match possible parameter other than "charset"
-$ # forbid possible dangling characters after closing '"'
-''', re.VERBOSE | re.IGNORECASE)
-
-def deduce_content_type(headers: http_messages.IHeaders) \
- -> tuple[t.Optional[str], t.Optional[str]]:
- """...."""
- content_type = headers.get('content-type')
- if content_type is None:
- return (None, None)
-
- match = content_type_reg.match(content_type)
- if match is None:
- return (None, None)
-
- mime, encoding = match.group('mime'), match.group('encoding')
-
- if encoding is not None:
- encoding = encoding.lower()
-
- return mime, encoding
-
UTF8_BOM = b'\xEF\xBB\xBF'
BOMs = (
(UTF8_BOM, 'utf-8'),
@@ -174,15 +134,17 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
))
def _modify_headers(self, response_info: http_messages.ResponseInfo) \
- -> t.Iterable[tuple[bytes, bytes]]:
- """...."""
- for header_name, header_value in response_info.headers.items():
- if header_name.lower() not in csp.header_names_and_dispositions:
- yield header_name.encode(), header_value.encode()
+ -> http_messages.IHeaders:
+ new_headers = []
+
+ for key, val in response_info.headers.items():
+ if key.lower() not in csp.header_names_and_dispositions:
+ new_headers.append((key, val))
new_csp = self._new_csp(response_info.url)
+ new_headers.append(('Content-Security-Policy', new_csp))
- yield b'Content-Security-Policy', new_csp.encode()
+ return http_messages.make_headers(new_headers)
def _script_urls(self, url: ParsedUrl) -> t.Iterable[str]:
"""...."""
@@ -231,22 +193,18 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
def _consume_response_unsafe(
self,
+ request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
- """...."""
- new_response = response_info.make_produced_response()
-
+ ) -> http_messages.ResponseInfo:
new_headers = self._modify_headers(response_info)
+ new_response = dc.replace(response_info, headers=new_headers)
- new_response = dc.replace(new_response, headers=new_headers)
-
- mime, encoding = deduce_content_type(response_info.headers)
- if mime is None or 'html' not in mime.lower():
+ if not http_messages.is_likely_a_page(request_info, response_info):
return new_response
data = response_info.body
- if data is None:
- data = b''
+
+ _, encoding = response_info.deduce_content_type()
# A UTF BOM overrides encoding specified by the header.
for bom, encoding_name in BOMs:
@@ -261,9 +219,9 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
try:
- return self._consume_response_unsafe(response_info)
+ return self._consume_response_unsafe(request_info, response_info)
except Exception as e:
# TODO: actually describe the errors
import traceback
@@ -274,10 +232,10 @@ class PayloadInjectPolicy(PayloadAwarePolicy):
e.__traceback__
)
- return http_messages.ProducedResponse(
- 500,
- ((b'Content-Type', b'text/plain; charset=utf-8'),),
- '\n'.join(error_info_list).encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 500,
+ headers = (('Content-Type', 'text/plain; charset=utf-8'),),
+ body = '\n'.join(error_info_list).encode()
)
@@ -292,7 +250,7 @@ class AutoPayloadInjectPolicy(PayloadInjectPolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
try:
if self.payload_data.ref.has_problems():
raise _PayloadHasProblemsError()
@@ -317,9 +275,9 @@ class AutoPayloadInjectPolicy(PayloadInjectPolicy):
redirect_url = 'https://hkt.mitm.it/auto_install_error?' + query
msg = 'Error occured when installing payload. Redirecting.'
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = 303,
- headers = [(b'Location', redirect_url.encode())],
+ headers = [('Location', redirect_url)],
body = msg.encode()
)
@@ -332,7 +290,7 @@ class PayloadSuggestPolicy(PayloadAwarePolicy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._ONE
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
+ -> http_messages.ResponseInfo:
query = self._payload_details_to_signed_query_string(
_salt = 'package_suggestion',
next_url = request_info.url.orig_url
@@ -341,9 +299,9 @@ class PayloadSuggestPolicy(PayloadAwarePolicy):
redirect_url = 'https://hkt.mitm.it/package_suggestion?' + query
msg = 'A package was found that could be used on this site. Redirecting.'
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = 303,
- headers = [(b'Location', redirect_url.encode())],
+ headers = [('Location', redirect_url)],
body = msg.encode()
)
diff --git a/src/hydrilla/proxy/policies/payload_resource.py b/src/hydrilla/proxy/policies/payload_resource.py
index ae6a490..10a43e6 100644
--- a/src/hydrilla/proxy/policies/payload_resource.py
+++ b/src/hydrilla/proxy/policies/payload_resource.py
@@ -219,9 +219,9 @@ def merge_response_headers(
)
-ProducedAny = t.Union[
- http_messages.ProducedResponse,
- http_messages.ProducedRequest
+MessageInfo = t.Union[
+ http_messages.ResponseInfo,
+ http_messages.RequestInfo
]
@dc.dataclass(frozen=True)
@@ -251,31 +251,30 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
== ('api', 'unrestricted_http')
def _make_file_resource_response(self, path: tuple[str, ...]) \
- -> http_messages.ProducedResponse:
- """...."""
+ -> http_messages.ResponseInfo:
try:
file_data = self.payload_data.ref.get_file_data(path)
except state.MissingItemError:
return resource_blocked_response
if file_data is None:
- return http_messages.ProducedResponse(
- 404,
- [(b'Content-Type', b'text/plain; charset=utf-8')],
- _('api.file_not_found').encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 404,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body =_('api.file_not_found').encode()
)
- return http_messages.ProducedResponse(
- 200,
- ((b'Content-Type', file_data.mime_type.encode()),),
- file_data.contents
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', file_data.mime_type)],
+ body = file_data.contents
)
def _make_api_response(
self,
path: tuple[str, ...],
request_info: http_messages.RequestInfo
- ) -> ProducedAny:
+ ) -> MessageInfo:
if path[0] == 'page_init_script.js':
with jinja_lock:
template = jinja_env.get_template('page_init_script.js.jinja')
@@ -288,10 +287,10 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
haketilo_version = encode_string_for_js(ver_str)
)
- return http_messages.ProducedResponse(
- 200,
- ((b'Content-Type', b'application/javascript'),),
- js.encode()
+ return http_messages.ResponseInfo.make(
+ status_code = 200,
+ headers = [('Content-Type', 'application/javascript')],
+ body = js.encode()
)
if path[0] == 'unrestricted_http':
@@ -315,13 +314,10 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
extra_headers = extra_headers
)
- result_headers_bytes = \
- [(h.encode(), v.encode()) for h, v in result_headers]
-
- return http_messages.ProducedRequest(
+ return http_messages.RequestInfo.make(
url = target_url,
method = request_info.method,
- headers = result_headers_bytes,
+ headers = result_headers,
body = request_info.body
)
except:
@@ -330,7 +326,7 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
return resource_blocked_response
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> ProducedAny:
+ -> MessageInfo:
resource_path = self.extract_resource_path(request_info.url)
if resource_path == ():
@@ -346,7 +342,7 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
+ ) -> http_messages.ResponseInfo:
"""
This method shall only be called for responses to unrestricted HTTP API
requests. Its purpose is to sanitize response headers and smuggle their
@@ -375,17 +371,17 @@ class PayloadResourcePolicy(PayloadAwarePolicy):
extra_headers = extra_headers
)
- return http_messages.ProducedResponse(
+ return http_messages.ResponseInfo.make(
status_code = response_info.status_code,
- headers = [(h.encode(), v.encode()) for h, v in merged_headers],
+ headers = merged_headers,
body = response_info.body,
)
-resource_blocked_response = http_messages.ProducedResponse(
- 403,
- [(b'Content-Type', b'text/plain; charset=utf-8')],
- _('api.resource_not_enabled_for_access').encode()
+resource_blocked_response = http_messages.ResponseInfo.make(
+ status_code = 403,
+ headers = [('Content-Type', 'text/plain; charset=utf-8')],
+ body = _('api.resource_not_enabled_for_access').encode()
)
@dc.dataclass(frozen=True)
@@ -396,8 +392,7 @@ class BlockedResponsePolicy(base.Policy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._THREE
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
- """...."""
+ -> http_messages.ResponseInfo:
return resource_blocked_response
diff --git a/src/hydrilla/proxy/policies/rule.py b/src/hydrilla/proxy/policies/rule.py
index 2e9443e..8272d2f 100644
--- a/src/hydrilla/proxy/policies/rule.py
+++ b/src/hydrilla/proxy/policies/rule.py
@@ -50,13 +50,14 @@ class BlockPolicy(base.Policy):
priority: t.ClassVar[base.PolicyPriority] = base.PolicyPriority._TWO
def _modify_headers(self, response_info: http_messages.ResponseInfo) \
- -> t.Iterable[tuple[bytes, bytes]]:
- """...."""
+ -> http_messages.IHeaders:
+ new_headers = []
+
csp_policies = csp.extract(response_info.headers)
- for header_name, header_value in response_info.headers.items():
- if header_name.lower() not in csp.header_names_and_dispositions:
- yield header_name.encode(), header_value.encode()
+ for key, val in response_info.headers.items():
+ if key.lower() not in csp.header_names_and_dispositions:
+ new_headers.append((key, val))
for policy in csp_policies:
if policy.disposition != 'enforce':
@@ -68,7 +69,7 @@ class BlockPolicy(base.Policy):
policy = dc.replace(policy, directives=directives.finish())
- yield policy.header_name.encode(), policy.serialize().encode()
+ new_headers.append((policy.header_name, policy.serialize()))
extra_csp = ';'.join((
"script-src 'none'",
@@ -76,19 +77,19 @@ class BlockPolicy(base.Policy):
"script-src-attr 'none'"
))
- yield b'Content-Security-Policy', extra_csp.encode()
+ new_headers.append(('Content-Security-Policy', extra_csp))
+
+ return http_messages.make_headers(new_headers)
def consume_response(
self,
request_info: http_messages.RequestInfo,
response_info: http_messages.ResponseInfo
- ) -> http_messages.ProducedResponse:
- new_response = response_info.make_produced_response()
-
+ ) -> http_messages.ResponseInfo:
new_headers = self._modify_headers(response_info)
- return dc.replace(new_response, headers=new_headers)
+ return dc.replace(response_info, headers=new_headers)
@dc.dataclass(frozen=True)
class RuleAllowPolicy(AllowPolicy):
diff --git a/src/hydrilla/proxy/policies/web_ui.py b/src/hydrilla/proxy/policies/web_ui.py
index f35b0b7..284d062 100644
--- a/src/hydrilla/proxy/policies/web_ui.py
+++ b/src/hydrilla/proxy/policies/web_ui.py
@@ -50,7 +50,7 @@ class WebUIPolicy(base.Policy):
ui_domain: web_ui.UIDomain
def consume_request(self, request_info: http_messages.RequestInfo) \
- -> http_messages.ProducedResponse:
+ -> http_messages.ResponseInfo:
return web_ui.process_request(
request_info = request_info,
state = self.haketilo_state,