diff options
Diffstat (limited to 'src/hydrilla/url_patterns.py')
-rw-r--r-- | src/hydrilla/url_patterns.py | 32 |
1 files changed, 24 insertions, 8 deletions
diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py index 278827a..1b5fa10 100644 --- a/src/hydrilla/url_patterns.py +++ b/src/hydrilla/url_patterns.py @@ -57,7 +57,7 @@ class ParsedUrl: path_segments: tuple[str, ...] = dc.field(hash=False, compare=False) query: str = dc.field(hash=False, compare=False) has_trailing_slash: bool = dc.field(hash=False, compare=False) - port: int = dc.field(hash=False, compare=False) + port: t.Optional[int] = dc.field(hash=False, compare=False) @property def url_without_path(self) -> str: @@ -67,12 +67,12 @@ class ParsedUrl: netloc = '.'.join(reversed(self.domain_labels)) if self.port is not None and \ - default_ports[scheme] != self.port: + default_ports.get(scheme) != self.port: netloc += f':{self.port}' return f'{scheme}://{netloc}' - def _reconstruct_url(self) -> str: + def reconstruct_url(self) -> str: """....""" path = '/'.join(('', *self.path_segments)) if self.has_trailing_slash: @@ -82,7 +82,7 @@ class ParsedUrl: def path_append(self: ParsedUrlType, *new_segments: str) -> ParsedUrlType: """....""" - new_url = self._reconstruct_url() + new_url = self.reconstruct_url() if not self.has_trailing_slash: new_url += '/' @@ -114,8 +114,11 @@ ParsedPattern = t.NewType('ParsedPattern', ParsedUrl) # actually supported by Hydrilla server and Haketilo proxy. supported_schemes = 'http', 'https', 'ftp', 'file' -def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \ - -> ParsedUrl: +def _parse_pattern_or_url( + url: str, + orig_url: str, + is_pattern: bool = False +) -> ParsedUrl: """....""" if not is_pattern: assert orig_url == url @@ -164,7 +167,7 @@ def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \ else: raise HaketiloException(_('err.url_{}.bad_port').format(url)) - port = t.cast(int, explicit_port or default_ports.get(parse_result.scheme)) + port = explicit_port or default_ports.get(parse_result.scheme) # Make URL's hostname into a list of labels in reverse order. E.g. # 'https://a.bc..de.fg.com/h/i/' -> ['com', 'fg', 'de', 'bc', 'a'] @@ -215,8 +218,21 @@ def parse_pattern(url_pattern: str) -> t.Iterator[ParsedPattern]: patterns = [url_pattern] for pat in patterns: - yield ParsedPattern(_parse_pattern_or_url(pat, url_pattern, True)) + yield ParsedPattern( + _parse_pattern_or_url(pat, url_pattern, True) + ) def parse_url(url: str) -> ParsedUrl: """....""" return _parse_pattern_or_url(url, url) + + +def normalize_pattern(url_pattern: str) -> str: + parsed = next(parse_pattern(url_pattern)) + + reconstructed = parsed.reconstruct_url() + + if url_pattern.startswith('http*'): + reconstructed = replace_scheme_regex.sub('http*', reconstructed) + + return reconstructed |