From 2e5b690e84fd168ba169c17581f59b8e1d1f430e Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Fri, 7 Oct 2022 10:30:38 +0200 Subject: [proxy] support unrestricted HTTP requests API in Haketilo payloads --- src/hydrilla/proxy/addon.py | 118 ++++++++++++++++++++++---------------------- 1 file changed, 58 insertions(+), 60 deletions(-) (limited to 'src/hydrilla/proxy/addon.py') diff --git a/src/hydrilla/proxy/addon.py b/src/hydrilla/proxy/addon.py index c1069bc..2185bcb 100644 --- a/src/hydrilla/proxy/addon.py +++ b/src/hydrilla/proxy/addon.py @@ -48,13 +48,13 @@ from mitmproxy.script import concurrent from ..exceptions import HaketiloException from ..translations import smart_gettext as _ -from ..url_patterns import parse_url +from ..url_patterns import parse_url, ParsedUrl from .state_impl import ConcreteHaketiloState from . import policies from . import http_messages -DefaultGetValue = t.TypeVar('DefaultGetValue', object, None) +DefaultGetValue = t.TypeVar('DefaultGetValue', str, None) class MitmproxyHeadersWrapper(): """....""" @@ -65,9 +65,14 @@ class MitmproxyHeadersWrapper(): __getitem__ = lambda self, key: self.headers[key] get_all = lambda self, key: self.headers.get_all(key) - def get(self, key: str, default: DefaultGetValue = None) \ + @t.overload + def get(self, key: str) -> t.Optional[str]: + ... + @t.overload + def get(self, key: str, default: DefaultGetValue) \ -> t.Union[str, DefaultGetValue]: - """....""" + ... + def get(self, key, default = None): value = self.headers.get(key) if value is None: @@ -79,6 +84,13 @@ class MitmproxyHeadersWrapper(): """....""" return self.headers.items(multi=True) + +@dc.dataclass(frozen=True) +class FlowHandlingData: + request_url: ParsedUrl + policy: policies.Policy + + @dc.dataclass class HaketiloAddon: """ @@ -87,8 +99,8 @@ class HaketiloAddon: configured: bool = False configured_lock: Lock = dc.field(default_factory=Lock) - flow_policies: dict[int, policies.Policy] = dc.field(default_factory=dict) - policies_lock: Lock = dc.field(default_factory=Lock) + flows_data: dict[int, FlowHandlingData] = dc.field(default_factory=dict) + flows_data_lock: Lock = dc.field(default_factory=Lock) state: t.Optional[ConcreteHaketiloState] = None @@ -121,37 +133,32 @@ class HaketiloAddon: self.configured = True - def try_get_policy(self, flow: http.HTTPFlow, fail_ok: bool = True) -> \ - t.Optional[policies.Policy]: - """....""" - with self.policies_lock: - policy = self.flow_policies.get(id(flow)) + def get_handling_data(self, flow: http.HTTPFlow) -> FlowHandlingData: + policy: policies.Policy - if policy is None: - try: - parsed_url = parse_url(flow.request.url) - except HaketiloException: - if fail_ok: - return None - else: - raise + assert self.state is not None - assert self.state is not None + with self.flows_data_lock: + handling_data = self.flows_data.get(id(flow)) - policy = self.state.select_policy(parsed_url) + if handling_data is None: + try: + parsed_url = parse_url(flow.request.url) + policy = self.state.select_policy(parsed_url) + except HaketiloException as e: + policy = policies.ErrorBlockPolicy(builtin=True, error=e) - with self.policies_lock: - self.flow_policies[id(flow)] = policy + handling_data = FlowHandlingData(parsed_url, policy) - return policy + with self.flows_data_lock: + self.flows_data[id(flow)] = handling_data - def get_policy(self, flow: http.HTTPFlow) -> policies.Policy: - return t.cast(policies.Policy, self.try_get_policy(flow, fail_ok=False)) + return handling_data - def forget_policy(self, flow: http.HTTPFlow) -> None: + def forget_handling_data(self, flow: http.HTTPFlow) -> None: """....""" - with self.policies_lock: - self.flow_policies.pop(id(flow), None) + with self.flows_data_lock: + self.flows_data.pop(id(flow), None) @contextmanager def http_safe_event_handling(self, flow: http.HTTPFlow) -> t.Iterator: @@ -172,19 +179,10 @@ class HaketiloAddon: headers = [(b'Content-Type', b'text/plain; charset=utf-8')] ) - self.forget_policy(flow) + self.forget_handling_data(flow) @concurrent def requestheaders(self, flow: http.HTTPFlow) -> None: - # TODO: don't account for mitmproxy 6 in the code - # Mitmproxy 6 causes even more strange behavior than described below. - # This cannot be easily worked around. Let's just use version 8 and - # make an APT package for it. - """ - Under mitmproxy 8 this handler deduces an appropriate policy for flow's - URL and assigns it to the flow. Under mitmproxy 6 the URL is not yet - available at this point, so the handler effectively does nothing. - """ with self.http_safe_event_handling(flow): referrer = flow.request.headers.get('referer') if referrer is not None: @@ -194,13 +192,13 @@ class HaketiloAddon: # visited before. flow.request.headers.pop('referer', None) - policy = self.try_get_policy(flow) + handling_data = self.get_handling_data(flow) + policy = handling_data.policy - if policy is not None: - if not policy.process_request: - flow.request.stream = True - if policy.anticache: - flow.request.anticache() + if not policy.should_process_request(handling_data.request_url): + flow.request.stream = True + if policy.anticache: + flow.request.anticache() @concurrent def request(self, flow: http.HTTPFlow) -> None: @@ -211,25 +209,23 @@ class HaketiloAddon: return with self.http_safe_event_handling(flow): - policy = self.get_policy(flow) + handling_data = self.get_handling_data(flow) request_info = http_messages.RequestInfo( - url = parse_url(flow.request.url), + url = handling_data.request_url, method = flow.request.method, headers = MitmproxyHeadersWrapper(flow.request.headers), body = flow.request.get_content(strict=False) or b'' ) - result = policy.consume_request(request_info) + result = handling_data.policy.consume_request(request_info) if result is not None: if isinstance(result, http_messages.ProducedRequest): - flow.request = http.Request.make( - url = result.url, - method = result.method, - headers = http.Headers(result.headers), - content = result.body - ) + flow.request.url = result.url + flow.request.method = result.method + flow.request.headers = http.Headers(result.headers) + flow.request.set_content(result.body or None) else: # isinstance(result, http_messages.ProducedResponse) flow.response = http.Response.make( @@ -245,9 +241,10 @@ class HaketiloAddon: assert flow.response is not None with self.http_safe_event_handling(flow): - policy = self.get_policy(flow) + handling_data = self.get_handling_data(flow) + policy = handling_data.policy - if not policy.process_response: + if not policy.should_process_response(handling_data.request_url): flow.response.stream = True @concurrent @@ -261,22 +258,23 @@ class HaketiloAddon: return with self.http_safe_event_handling(flow): - policy = self.get_policy(flow) + handling_data = self.get_handling_data(flow) response_info = http_messages.ResponseInfo( url = parse_url(flow.request.url), + orig_url = handling_data.request_url, status_code = flow.response.status_code, headers = MitmproxyHeadersWrapper(flow.response.headers), body = flow.response.get_content(strict=False) or b'' ) - result = policy.consume_response(response_info) + result = handling_data.policy.consume_response(response_info) if result is not None: flow.response.status_code = result.status_code flow.response.headers = http.Headers(result.headers) flow.response.set_content(result.body) - self.forget_policy(flow) + self.forget_handling_data(flow) def tls_clienthello(self, data: tls.ClientHelloData): if data.context.server.address is None: @@ -291,4 +289,4 @@ class HaketiloAddon: def error(self, flow: http.HTTPFlow) -> None: """....""" - self.forget_policy(flow) + self.forget_handling_data(flow) -- cgit v1.2.3