aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/url_patterns.py
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-09-12 13:55:35 +0200
committerWojtek Kosior <koszko@koszko.org>2022-09-28 14:03:18 +0200
commit8e022103636121b13d2ad63d61b84ca927e4aeb1 (patch)
tree1a84f7071a5c9fa25df96fe258a31e36e51efb16 /src/hydrilla/url_patterns.py
parent146c5467ac53eaa80e74979600a6587976740bd6 (diff)
downloadhaketilo-hydrilla-8e022103636121b13d2ad63d61b84ca927e4aeb1.tar.gz
haketilo-hydrilla-8e022103636121b13d2ad63d61b84ca927e4aeb1.zip
[proxy] Add support for script blocking/allowing rules
Diffstat (limited to 'src/hydrilla/url_patterns.py')
-rw-r--r--src/hydrilla/url_patterns.py32
1 files changed, 24 insertions, 8 deletions
diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py
index 278827a..1b5fa10 100644
--- a/src/hydrilla/url_patterns.py
+++ b/src/hydrilla/url_patterns.py
@@ -57,7 +57,7 @@ class ParsedUrl:
path_segments: tuple[str, ...] = dc.field(hash=False, compare=False)
query: str = dc.field(hash=False, compare=False)
has_trailing_slash: bool = dc.field(hash=False, compare=False)
- port: int = dc.field(hash=False, compare=False)
+ port: t.Optional[int] = dc.field(hash=False, compare=False)
@property
def url_without_path(self) -> str:
@@ -67,12 +67,12 @@ class ParsedUrl:
netloc = '.'.join(reversed(self.domain_labels))
if self.port is not None and \
- default_ports[scheme] != self.port:
+ default_ports.get(scheme) != self.port:
netloc += f':{self.port}'
return f'{scheme}://{netloc}'
- def _reconstruct_url(self) -> str:
+ def reconstruct_url(self) -> str:
"""...."""
path = '/'.join(('', *self.path_segments))
if self.has_trailing_slash:
@@ -82,7 +82,7 @@ class ParsedUrl:
def path_append(self: ParsedUrlType, *new_segments: str) -> ParsedUrlType:
"""...."""
- new_url = self._reconstruct_url()
+ new_url = self.reconstruct_url()
if not self.has_trailing_slash:
new_url += '/'
@@ -114,8 +114,11 @@ ParsedPattern = t.NewType('ParsedPattern', ParsedUrl)
# actually supported by Hydrilla server and Haketilo proxy.
supported_schemes = 'http', 'https', 'ftp', 'file'
-def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \
- -> ParsedUrl:
+def _parse_pattern_or_url(
+ url: str,
+ orig_url: str,
+ is_pattern: bool = False
+) -> ParsedUrl:
"""...."""
if not is_pattern:
assert orig_url == url
@@ -164,7 +167,7 @@ def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \
else:
raise HaketiloException(_('err.url_{}.bad_port').format(url))
- port = t.cast(int, explicit_port or default_ports.get(parse_result.scheme))
+ port = explicit_port or default_ports.get(parse_result.scheme)
# Make URL's hostname into a list of labels in reverse order. E.g.
# 'https://a.bc..de.fg.com/h/i/' -> ['com', 'fg', 'de', 'bc', 'a']
@@ -215,8 +218,21 @@ def parse_pattern(url_pattern: str) -> t.Iterator[ParsedPattern]:
patterns = [url_pattern]
for pat in patterns:
- yield ParsedPattern(_parse_pattern_or_url(pat, url_pattern, True))
+ yield ParsedPattern(
+ _parse_pattern_or_url(pat, url_pattern, True)
+ )
def parse_url(url: str) -> ParsedUrl:
"""...."""
return _parse_pattern_or_url(url, url)
+
+
+def normalize_pattern(url_pattern: str) -> str:
+ parsed = next(parse_pattern(url_pattern))
+
+ reconstructed = parsed.reconstruct_url()
+
+ if url_pattern.startswith('http*'):
+ reconstructed = replace_scheme_regex.sub('http*', reconstructed)
+
+ return reconstructed