aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/url_patterns.py
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-07-27 15:56:24 +0200
committerWojtek Kosior <koszko@koszko.org>2022-08-10 17:25:05 +0200
commit879c41927171efc8d77d1de2739b18e2eb57580f (patch)
treede0e78afe2ea49e58c9bf2c662657392a00139ee /src/hydrilla/url_patterns.py
parent52d12a4fa124daa1595529e3e7008276a7986d95 (diff)
downloadhaketilo-hydrilla-879c41927171efc8d77d1de2739b18e2eb57580f.tar.gz
haketilo-hydrilla-879c41927171efc8d77d1de2739b18e2eb57580f.zip
unfinished partial work
Diffstat (limited to 'src/hydrilla/url_patterns.py')
-rw-r--r--src/hydrilla/url_patterns.py91
1 files changed, 64 insertions, 27 deletions
diff --git a/src/hydrilla/url_patterns.py b/src/hydrilla/url_patterns.py
index 8e80379..0a242e3 100644
--- a/src/hydrilla/url_patterns.py
+++ b/src/hydrilla/url_patterns.py
@@ -41,36 +41,73 @@ import dataclasses as dc
from immutables import Map
-from hydrilla.translations import smart_gettext as _
-from hydrilla.exceptions import HaketiloException
+from .translations import smart_gettext as _
+from .exceptions import HaketiloException
default_ports: t.Mapping[str, int] = Map(http=80, https=443, ftp=21)
-@dc.dataclass(frozen=True, unsafe_hash=True)
+ParsedUrlType = t.TypeVar('ParsedUrlType', bound='ParsedUrl')
+
+@dc.dataclass(frozen=True, unsafe_hash=True, order=True)
class ParsedUrl:
"""...."""
- orig_url: str # orig_url used in __hash__()
- scheme: str = dc.field(hash=False)
- domain_labels: tuple[str, ...] = dc.field(hash=False)
- path_segments: tuple[str, ...] = dc.field(hash=False)
- has_trailing_slash: bool = dc.field(hash=False)
- port: int = dc.field(hash=False)
-
- # def reconstruct_url(self) -> str:
- # """...."""
- # scheme = self.orig_scheme
-
- # netloc = '.'.join(reversed(self.domain_labels))
- # if scheme == self.scheme and \
- # self.port is not None and \
- # default_ports[scheme] != self.port:
- # netloc += f':{self.port}'
-
- # path = '/'.join(('', *self.path_segments))
- # if self.has_trailing_slash:
- # path += '/'
+ orig_url: str # used in __hash__() and __lt__()
+ scheme: str = dc.field(hash=False, compare=False)
+ domain_labels: tuple[str, ...] = dc.field(hash=False, compare=False)
+ path_segments: tuple[str, ...] = dc.field(hash=False, compare=False)
+ has_trailing_slash: bool = dc.field(hash=False, compare=False)
+ port: int = dc.field(hash=False, compare=False)
+
+ @property
+ def url_without_path(self) -> str:
+ """...."""
+ scheme = self.scheme
+
+ netloc = '.'.join(reversed(self.domain_labels))
+
+ if self.port is not None and \
+ default_ports[scheme] != self.port:
+ netloc += f':{self.port}'
+
+ return f'{scheme}://{netloc}'
+
+ def _reconstruct_url(self) -> str:
+ """...."""
+ path = '/'.join(('', *self.path_segments))
+ if self.has_trailing_slash:
+ path += '/'
+
+ return self.url_without_path + path
+
+ def path_append(self: ParsedUrlType, *new_segments: str) -> ParsedUrlType:
+ """...."""
+ new_url = self._reconstruct_url()
+ if not self.has_trailing_slash:
+ new_url += '/'
+
+ new_url += '/'.join(new_segments)
+
+ return dc.replace(
+ self,
+ orig_url = new_url,
+ path_segments = tuple((*self.path_segments, *new_segments)),
+ has_trailing_slash = False
+ )
+
+ParsedPattern = t.NewType('ParsedPattern', ParsedUrl)
+
+# # We sometimes need a dummy pattern that means "match everything".
+# catchall_pattern = ParsedPattern(
+# ParsedUrl(
+# orig_url = '<dummy_catchall_url_pattern>'
+# scheme = '<dummy_all-scheme>'
+# domain_labels = ('***',)
+# path_segments = ('***',)
+# has_trailing_slash = False
+# port = 0
+# )
+# )
- # return f'{scheme}://{netloc}{path}'
# URLs with those schemes will be recognized but not all of them have to be
# actually supported by Hydrilla server and Haketilo proxy.
@@ -163,7 +200,7 @@ def _parse_pattern_or_url(url: str, orig_url: str, is_pattern: bool = False) \
replace_scheme_regex = re.compile(r'^[^:]*')
-def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]:
+def parse_pattern(url_pattern: str) -> t.Iterator[ParsedPattern]:
"""...."""
if url_pattern.startswith('http*:'):
patterns = [
@@ -173,8 +210,8 @@ def parse_pattern(url_pattern: str) -> t.Sequence[ParsedUrl]:
else:
patterns = [url_pattern]
- return tuple(_parse_pattern_or_url(pat, url_pattern, True)
- for pat in patterns)
+ for pat in patterns:
+ yield ParsedPattern(_parse_pattern_or_url(pat, url_pattern, True))
def parse_url(url: str) -> ParsedUrl:
"""...."""