From 879c41927171efc8d77d1de2739b18e2eb57580f Mon Sep 17 00:00:00 2001 From: Wojtek Kosior Date: Wed, 27 Jul 2022 15:56:24 +0200 Subject: unfinished partial work --- src/hydrilla/url_patterns.py | 91 +++++++++++++++++++++++++++++++------------- 1 file changed, 64 insertions(+), 27 deletions(-) (limited to 'src/hydrilla/url_patterns.py') diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py index 8e80379..0a242e3 100644 --- a/src/hydrilla/url_patterns.py +++ b/src/hydrilla/url_patterns.py @@ -41,36 +41,73 @@ import dataclasses as dc from immutables import Map -from hydrilla.translations import smart_gettext as _ -from hydrilla.exceptions import HaketiloException +from .translations import smart_gettext as _ +from .exceptions import HaketiloException default_ports: t.Mapping[str, int] = Map(http=80, https=443, ftp=21) -@dc.dataclass(frozen=True, unsafe_hash=True) +ParsedUrlType = t.TypeVar('ParsedUrlType', bound='ParsedUrl') + +@dc.dataclass(frozen=True, unsafe_hash=True, order=True) class ParsedUrl: """....""" - orig_url: str # orig_url used in __hash__() - scheme: str = dc.field(hash=False) - domain_labels: tuple[str, ...] = dc.field(hash=False) - path_segments: tuple[str, ...] = dc.field(hash=False) - has_trailing_slash: bool = dc.field(hash=False) - port: int = dc.field(hash=False) - - # def reconstruct_url(self) -> str: - # """....""" - # scheme = self.orig_scheme - - # netloc = '.'.join(reversed(self.domain_labels)) - # if scheme == self.scheme and \ - # self.port is not None and \ - # default_ports[scheme] != self.port: - # netloc += f':{self.port}' - - # path = '/'.join(('', *self.path_segments)) - # if self.has_trailing_slash: - # path += '/' + orig_url: str # used in __hash__() and __lt__() + scheme: str = dc.field(hash=False, compare=False) + domain_labels: tuple[str, ...] = dc.field(hash=False, compare=False) + path_segments: tuple[str, ...] = dc.field(hash=False, compare=False) + has_trailing_slash: bool = dc.field(hash=False, compare=False) + port: int = dc.field(hash=False, compare=False) + + @property + def url_without_path(self) -> str: + """....""" + scheme = self.scheme + + netloc = '.'.join(reversed(self.domain_labels)) + + if self.port is not None and \ + default_ports[scheme] != self.port: + netloc += f':{self.port}' + + return f'{scheme}://{netloc}' + + def _reconstruct_url(self) -> str: + """....""" + path = '/'.join(('', *self.path_segments)) + if self.has_trailing_slash: + path += '/' + + return self.url_without_path + path + + def path_append(self: ParsedUrlType, *new_segments: str) -> ParsedUrlType: + """....""" + new_url = self._reconstruct_url() + if not self.has_trailing_slash: + new_url += '/' + + new_url += '/'.join(new_segments) + + return dc.replace( + self, + orig_url = new_url, + path_segments = tuple((*self.path_segments, *new_segments)), + has_trailing_slash = False + ) + +ParsedPattern = t.NewType('ParsedPattern', ParsedUrl) + +# # We sometimes need a dummy pattern that means "match everything". +# catchall_pattern = ParsedPattern( +# ParsedUrl( +# orig_url = '' +# scheme = '' +# domain_labels = ('***',) +# path_segments = ('***',) +# has_trailing_slash = False +# port = 0 +# ) +# ) - # return f'{scheme}://{netloc}{path}' # URLs with those schemes will be recognized but not all of them have to be # actually supported by Hydrilla server and Haketilo proxy. @@ -163,7 +200,7 @@ def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \ replace_scheme_regex = re.compile(r'^[^:]*') -def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]: +def parse_pattern(url_pattern: str) -> t.Iterator[ParsedPattern]: """....""" if url_pattern.startswith('http*:'): patterns = [ @@ -173,8 +210,8 @@ def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]: else: patterns = [url_pattern] - return tuple(_parse_pattern_or_url(pat, url_pattern, True) - for pat in patterns) + for pat in patterns: + yield ParsedPattern(_parse_pattern_or_url(pat, url_pattern, True)) def parse_url(url: str) -> ParsedUrl: """....""" -- cgit v1.2.3