aboutsummaryrefslogtreecommitdiff
path: root/doc/man
diff options
context:
space:
mode:
authorWojtek Kosior <koszko@koszko.org>2022-06-14 11:49:12 +0200
committerWojtek Kosior <koszko@koszko.org>2022-06-14 11:49:12 +0200
commitce480820e03122468a29db0cad1c8a97b62bf6c1 (patch)
treef39b1479ae3d0863e326622db66fafa31ff70c36 /doc/man
parent089f05520a0e295738378401a946f677e5d5623a (diff)
downloadhydrilla-builder-ce480820e03122468a29db0cad1c8a97b62bf6c1.tar.gz
hydrilla-builder-ce480820e03122468a29db0cad1c8a97b62bf6c1.zip
update hydrilla-builder manpage
Diffstat (limited to 'doc/man')
-rw-r--r--doc/man/man1/hydrilla-builder.139
1 files changed, 26 insertions, 13 deletions
diff --git a/doc/man/man1/hydrilla-builder.1 b/doc/man/man1/hydrilla-builder.1
index f58ab97..20825d2 100644
--- a/doc/man/man1/hydrilla-builder.1
+++ b/doc/man/man1/hydrilla-builder.1
@@ -6,10 +6,10 @@
.\"
.\" Available under the terms of Creative Commons Zero v1.0 Universal.
-.TH HYDRILLA-BUILDER 1 2022-04-22 "Hydrilla 1.0" "Hydrilla Manual"
+.TH HYDRILLA-BUILDER 1 2022-06-14 "Hydrilla 1.1" "Hydrilla Manual"
.SH NAME
-hydrilla-builder \- Generate packages to be served by Hydrilla
+hydrilla\-builder \- Generate packages to be served by Hydrilla
.SH SYNOPSIS
.B "hydrilla\-builder \-\-help"
@@ -21,19 +21,24 @@ hydrilla-builder \- Generate packages to be served by Hydrilla
names.)
.SH DESCRIPTION
-.I hydrilla-builder
+.I hydrilla\-builder
is a tool which takes a Hydrilla source package and generates files of a
built package, suitable for serving by the Hydrilla server.
-As of Hydrilla version 1.0
-.I hydrilla-builder
-does not yet perform nor trigger actions like compilation, minification or
-bundling of source code files. Its main function is to automate the process
-of computing SHA256 cryptographic sums of package files and including them
-in JSON definitions.
+The main function of
+.I hydrilla\-builder
+is to automate the process of computing SHA256 cryptographic sums of package
+files and including them in JSON definitions.
+
+This tool does not perform nor trigger actions like compilation, minification or
+bundling of source code files. When this is needed,
+.I hydrilla\-builder
+instead relies on facilities already provided by other software distribution
+systems like APT and extracts the requested files from .deb packages. This
+feature is called \*(lqpiggybacking\*(rq.
In addition,
-.B hydrilla\-builder
+.I hydrilla\-builder
can generate an SPDX report from source package if the
\*(lqreuse_generate_spdx_report\*(rq property is set to true in index.json.
@@ -65,17 +70,25 @@ will also be included in the generated source archive as
present in the source directory.
.TP
+.BI \-p " PIGGYBACK_PATH" "\fR,\fP \-\^\-piggyback\-files=" PIGGYBACK_PATH
+Read and write foreign package archives under
+.IR PIGGYBACK_PATH .
+If not specified, a default value is computed by appending
+\*(lq.foreign-packages\*(rq to the
+.I SOURCE
+directory path.
+
+.TP
.BI \-d " DESTINATION" "\fR,\fP \-\^\-dstdir=" DESTINATION
Write generated files under
.IR DESTINATION .
-Files are written in such way that
.I DESTINATION
-is valid for being passed to Hydrilla to serve packages from.
+can then be passed to Hydrilla to serve packages from.
.TP
.B \-\^\-version
Show version information for this instance of
-.I hydrilla-builder
+.I hydrilla\-builder
on the standard output and exit successfully.
.SH "EXIT STATUS"
href='#n255'>255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
# SPDX-License-Identifier: GPL-3.0-or-later

# Haketilo addon for Mitmproxy.
#
# This file is part of Hydrilla&Haketilo.
#
# Copyright (C) 2022 Wojtek Kosior
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.
#
#
# I, Wojtek Kosior, thereby promise not to sue for violation of this
# file's license. Although I request that you do not make use this code
# in a proprietary program, I am not going to enforce this in court.

"""
This module contains the definition of a mitmproxy addon that gets instantiated
from addon script.
"""

# Enable using with Python 3.7.
from __future__ import annotations

import sys
import typing as t
import dataclasses as dc
import traceback as tb

from threading import Lock
from pathlib import Path
from contextlib import contextmanager
from urllib.parse import urlparse

from mitmproxy import tls, http, addonmanager, ctx
from mitmproxy.script import concurrent

from ..exceptions import HaketiloException
from ..translations import smart_gettext as _
from ..url_patterns import parse_url
from .state_impl import ConcreteHaketiloState
from . import policies
from . import http_messages


DefaultGetValue = t.TypeVar('DefaultGetValue', object, None)

class MitmproxyHeadersWrapper():
    """...."""
    def __init__(self, headers: http.Headers) -> None:
        """...."""
        self.headers = headers

    __getitem__ = lambda self, key: self.headers[key]
    get_all     = lambda self, key: self.headers.get_all(key)

    def get(self, key: str, default: DefaultGetValue = None) \
        -> t.Union[str, DefaultGetValue]:
        """...."""
        value = self.headers.get(key)

        if value is None:
            return default
        else:
            return t.cast(str, value)

    def items(self) -> t.Iterable[tuple[str, str]]:
        """...."""
        return self.headers.items(multi=True)

@dc.dataclass
class HaketiloAddon:
    """
    .......
    """
    configured:      bool = False
    configured_lock: Lock = dc.field(default_factory=Lock)

    flow_policies: dict[int, policies.Policy] = dc.field(default_factory=dict)
    policies_lock: Lock                       = dc.field(default_factory=Lock)

    state: t.Optional[ConcreteHaketiloState] = None

    def load(self, loader: addonmanager.Loader) -> None:
        """...."""
        loader.add_option(
            name     = 'haketilo_dir',
            typespec = str,
            default  = '~/.haketilo/',
            help     = "Point to a Haketilo data directory to use",
        )

    def configure(self, updated: set[str]) -> None:
        """...."""
        if 'haketilo_dir' not in updated:
            return

        with self.configured_lock:
            if self.configured:
                ctx.log.warn(_('haketilo_dir_already_configured'))
                return

            try:
                haketilo_dir = Path(ctx.options.haketilo_dir)

                self.state = ConcreteHaketiloState.make(haketilo_dir / 'store')
            except Exception as e:
                tb.print_exception(None, e, e.__traceback__)
                sys.exit(1)

            self.configured = True

    def try_get_policy(self, flow: http.HTTPFlow, fail_ok: bool = True) -> \
        t.Optional[policies.Policy]:
        """...."""
        with self.policies_lock:
            policy = self.flow_policies.get(id(flow))

        if policy is None:
            try:
                parsed_url = parse_url(flow.request.url)
            except HaketiloException:
                if fail_ok:
                    return None
                else:
                    raise

            assert self.state is not None

            policy = self.state.select_policy(parsed_url)

            with self.policies_lock:
                self.flow_policies[id(flow)] = policy

        return policy

    def get_policy(self, flow: http.HTTPFlow) -> policies.Policy:
        return t.cast(policies.Policy, self.try_get_policy(flow, fail_ok=False))

    def forget_policy(self, flow: http.HTTPFlow) -> None:
        """...."""
        with self.policies_lock:
            self.flow_policies.pop(id(flow), None)

    @contextmanager
    def http_safe_event_handling(self, flow: http.HTTPFlow) -> t.Iterator:
        """...."""
        with self.configured_lock:
            assert self.configured

        try:
            yield
        except Exception as e:
            tb_string = ''.join(tb.format_exception(None, e, e.__traceback__))
            error_text = _('err.proxy.unknown_error_{}_try_again')\
                .format(tb_string)\
                .encode()
            flow.response = http.Response.make(
                status_code = 500,
                content     = error_text,
                headers     = [(b'Content-Type', b'text/plain; charset=utf-8')]
            )

            self.forget_policy(flow)

    @concurrent
    def requestheaders(self, flow: http.HTTPFlow) -> None:
        # TODO: don't account for mitmproxy 6 in the code
        # Mitmproxy 6 causes even more strange behavior than described below.
        # This cannot be easily worked around. Let's just use version 8 and
        # make an APT package for it.
        """
        Under mitmproxy 8 this handler deduces an appropriate policy for flow's
        URL and assigns it to the flow. Under mitmproxy 6 the URL is not yet
        available at this point, so the handler effectively does nothing.
        """
        with self.http_safe_event_handling(flow):
            referrer = flow.request.headers.get('referer')
            if referrer is not None:
                if urlparse(referrer).netloc == 'hkt.mitm.it' and \
                   urlparse(flow.request.url).netloc != 'hkt.mitm.it':
                    # Do not reveal to the site that Haketilo meta-site was
                    # visited before.
                    flow.request.headers.pop('referer', None)

            policy = self.try_get_policy(flow)

            if policy is not None:
                if not policy.process_request:
                    flow.request.stream = True
                if policy.anticache:
                    flow.request.anticache()

    @concurrent
    def request(self, flow: http.HTTPFlow) -> None:
        """
        ....
        """
        if flow.request.stream:
            return

        with self.http_safe_event_handling(flow):
            policy = self.get_policy(flow)

            request_info = http_messages.RequestInfo(
                url     = parse_url(flow.request.url),
                method  = flow.request.method,
                headers = MitmproxyHeadersWrapper(flow.request.headers),
                body    = flow.request.get_content(strict=False) or b''
            )

            result = policy.consume_request(request_info)

            if result is not None:
                if isinstance(result, http_messages.ProducedRequest):
                    flow.request = http.Request.make(
                        url     = result.url,
                        method  = result.method,
                        headers = http.Headers(result.headers),
                        content = result.body
                    )
                else:
                    # isinstance(result, http_messages.ProducedResponse)
                    flow.response = http.Response.make(
                        status_code = result.status_code,
                         headers     = http.Headers(result.headers),
                         content     = result.body
                    )

    def responseheaders(self, flow: http.HTTPFlow) -> None:
        """
        ......
        """
        assert flow.response is not None

        with self.http_safe_event_handling(flow):
            policy = self.get_policy(flow)

            if not policy.process_response:
                flow.response.stream = True

    @concurrent
    def response(self, flow: http.HTTPFlow) -> None:
        """
        ......
        """
        assert flow.response is not None

        if flow.response.stream:
            return

        with self.http_safe_event_handling(flow):
            policy = self.get_policy(flow)

            response_info = http_messages.ResponseInfo(
                url         = parse_url(flow.request.url),
                status_code = flow.response.status_code,
                headers     = MitmproxyHeadersWrapper(flow.response.headers),
                body        = flow.response.get_content(strict=False) or b''
            )

            result = policy.consume_response(response_info)
            if result is not None:
                flow.response.status_code = result.status_code
                flow.response.headers     = http.Headers(result.headers)
                flow.response.set_content(result.body)

            self.forget_policy(flow)

    def tls_clienthello(self, data: tls.ClientHelloData):
        if data.context.server.address is None:
            return

        host, port = data.context.server.address
        if (host == 'hkt.mitm.it' or host.endswith('.hkt.mitm.it')) and \
           port == 443:
            return

        data.establish_server_tls_first = True

    def error(self, flow: http.HTTPFlow) -> None:
        """...."""
        self.forget_policy(flow)