aboutsummaryrefslogtreecommitdiff
path: root/src/hydrilla/proxy/http_messages.py
diff options
context:
space:
mode:
Diffstat (limited to 'src/hydrilla/proxy/http_messages.py')
-rw-r--r--src/hydrilla/proxy/http_messages.py33
1 files changed, 5 insertions, 28 deletions
diff --git a/src/hydrilla/proxy/http_messages.py b/src/hydrilla/proxy/http_messages.py
index 718022f..74f1f02 100644
--- a/src/hydrilla/proxy/http_messages.py
+++ b/src/hydrilla/proxy/http_messages.py
@@ -30,6 +30,7 @@
"""
import re
+import cgi
import dataclasses as dc
import typing as t
import sys
@@ -120,42 +121,18 @@ def make_parsed_url(url: t.Union[str, url_patterns.ParsedUrl]) \
return url_patterns.parse_url(url) if isinstance(url, str) else url
-# For details of 'Content-Type' header's structure, see:
-# https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
-content_type_reg = re.compile(r'''
-^
-(?P<mime>[\w-]+/[\w-]+)
-\s*
-(?:
- ;
- (?:[^;]*;)* # match possible parameter other than "charset"
-)
-\s*
-charset= # no whitespace allowed in parameter as per RFC
-(?P<encoding>
- [\w-]+
- |
- "[\w-]+" # quotes are optional per RFC
-)
-(?:;[^;]+)* # match possible parameter other than "charset"
-$ # forbid possible dangling characters after closing '"'
-''', re.VERBOSE | re.IGNORECASE)
-
@dc.dataclass(frozen=True)
class HasHeadersMixin:
headers: IHeaders
def deduce_content_type(self) -> tuple[t.Optional[str], t.Optional[str]]:
- content_type = self.headers.get('content-type')
- if content_type is None:
- return (None, None)
-
- match = content_type_reg.match(content_type)
- if match is None:
+ content_type_header = self.headers.get('content-type')
+ if content_type_header is None:
return (None, None)
- mime, encoding = match.group('mime'), match.group('encoding')
+ mime, options = cgi.parse_header(content_type_header)
+ encoding = options.get('charset')
if encoding is not None:
encoding = encoding.lower()