src/hydrilla/proxy/csp.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196

# SPDX-License-Identifier: GPL-3.0-or-later

# Tools for working with Content Security Policy headers.
#
# This file is part of Hydrilla&Haketilo.
#
# Copyright (C) 2022 Wojtek Kosior
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
#
# I, Wojtek Kosior, thereby promise not to sue for violation of this
# file's license. Although I request that you do not make use of this
# code in a proprietary program, I am not going to enforce this in
# court.

"""
.....
"""

import re
import typing as t
import dataclasses as dc

from immutables import Map, MapMutation

from . import http_messages


enforce_header_names = (
    'content-security-policy',
    'x-content-security-policy',
    'x-webkit-csp'
)

header_names = (*enforce_header_names, 'content-security-policy-report-only')

@dc.dataclass
class ContentSecurityPolicy:
    directives:  Map[str, t.Sequence[str]]
    header_name: str = 'Content-Security-Policy'
    disposition: str = 'enforce'

    def remove(self, directives: t.Sequence[str]) -> 'ContentSecurityPolicy':
        mutation = self.directives.mutate()

        for name in directives:
            mutation.pop(name, None)

        return dc.replace(self, directives = mutation.finish())

    def extend(self, directives: t.Mapping[str, t.Sequence[str]]) \
        -> 'ContentSecurityPolicy':
        mutation = self.directives.mutate()

        for name, extras in directives.items():
            if name in mutation:
                mutation[name] = (*mutation[name], *extras)

        return dc.replace(self, directives = mutation.finish())

    def serialize(self) -> tuple[str, str]:
        """
        Produces (name, value) pair suitable for use as an HTTP header.

        If a deserialized policy is being reserialized, the resulting value is
        not guaranteed to be the same as the original one. It shall be merely
        semantically equivalent.
        """
        serialized_directives = []
        for name, value_seq in self.directives.items():
            if all(val == "'none'" for val in value_seq):
                value_seq = ["'none'"]
            else:
                value_seq = [val for val in value_seq if val != "'none'"]

            serialized_directives.append(f'{name} {" ".join(value_seq)}')

        return (self.header_name, ';'.join(serialized_directives))

    @staticmethod
    def deserialize(
            serialized:  str,
            header_name: str,
            disposition: str = 'enforce'
    ) -> 'ContentSecurityPolicy':
        """
        Parses the policy as required by W3C Working Draft.

        Extra whitespace information, invalid/empty directives and the order of
        directives are not preserved, only the semantically-relevant information
        is.
        """
        # For more info, see:
        # https://www.w3.org/TR/CSP3/#parse-serialized-policy
        empty_directives: Map[str, t.Sequence[str]] = Map()

        directives = empty_directives.mutate()

        for serialized_directive in serialized.split(';'):
            if not serialized_directive.isascii():
                continue

            serialized_directive = serialized_directive.strip()
            if len(serialized_directive) == 0:
                continue

            tokens = serialized_directive.split()
            directive_name = tokens.pop(0).lower()
            directive_value = tokens

            # Specs mention giving warnings for duplicate directive names but
            # from our proxy's perspective this is not important right now.
            if directive_name in directives:
                continue

            directives[directive_name] = directive_value

        return ContentSecurityPolicy(
            directives  = directives.finish(),
            header_name = header_name,
            disposition = disposition
        )

# def extract(headers: http_messages.IHeaders) \
#     -> tuple[ContentSecurityPolicy, ...]:
#     """...."""
#     csp_policies = []

#     for header_name, disposition in header_names_and_dispositions:
#         for serialized_list in headers.get_all(header_name):
#             for serialized in serialized_list.split(','):
#                 policy = ContentSecurityPolicy.deserialize(
#                     serialized,
#                     header_name,
#                     disposition
#                 )

#                 if policy.directives != Map():
#                     csp_policies.append(policy)

#     return tuple(csp_policies)

def modify(
        headers:       http_messages.IHeaders,
        clear:         t.Union[t.Sequence[str], t.Literal['all']] = (),
        extend:        t.Mapping[str, t.Sequence[str]]            = Map(),
        add:           t.Mapping[str, t.Sequence[str]]            = Map(),
) -> http_messages.IHeaders:
    """
    This function modifies the CSP Headers. The following actions are performed
    *in order*
    1. report-only CSP Headers are removed,
    2. directives with names in `clear` are removed,
    3. directives that could cause CSP reports to be sent are removed,
    4. directives from `add` are added in a separate Content-Security-Policy,
       header.
    5. directives from `extend` are merged into the existing directives,
       effectively loosening them,

    No measures are yet implemented to prevent fingerprinting when serving HTTP
    responses with headers modified by this function. Please use wisely, you
    have been warned.
    """
    headers_list = [
        (key, val)
        for key, val in headers.items()
        if key.lower() not in header_names
    ]

    if clear != 'all':
        for name in header_names:
            for serialized_list in headers.get_all(name):
                for serialized in serialized_list.split(','):
                    policy = ContentSecurityPolicy.deserialize(serialized, name)
                    policy = policy.remove((*clear, 'report-to', 'report-uri'))
                    policy = policy.extend(extend)
                    if policy.directives != Map():
                        headers_list.append(policy.serialize())

    if add != Map():
        csp_to_add = ContentSecurityPolicy(Map(add)).extend(extend)
        headers_list.append(csp_to_add.serialize())

    return http_messages.make_headers(headers_list)