%% SPDX-License-Identifier: CC0-1.0
%%
%% Copyright (C) 2025 Woj. Kosior <koszko@koszko.org>

\documentclass[a4paper,12pt,twoside,notitlepage]{report}

\usepackage{rotating}

\usepackage[
  margin=2.5cm,
  marginparwidth=1.5cm,
  marginparsep=2mm,
  foot=1cm,
  head=1cm
]{geometry}

\renewcommand{\baselinestretch}{1.5}
\newcommand{\smallStretchValue}{1.2}
\newenvironment{smallStretch}{\setstretch{\smallStretchValue}}{}

\usepackage{titlesec}
\titleformat
{\chapter} % command
[hang] % shape
{\bfseries\Large} % format
{\thechapter{}.{ }} % label
{0pt} % sep
{} % before-code
[] % after-code

\usepackage{fancyhdr}

\usepackage[T1]{fontenc}

\usepackage{xurl}

\usepackage[style=alphabetic,backend=biber,urldate=long,maxbibnames=9]{biblatex}

\usepackage[polish,english]{babel}
\DefineBibliographyStrings{english}{%
  mathesis = {Master's thesis},
}

\addbibresource{doc.bib}

\renewcommand*\finalnamedelim{, \addspace{}and\addspace{}}

\usepackage{mathptmx}
\usepackage{fontspec}
\usepackage{newunicodechar}
\newfontfamily{\fallbackfont}{DejaVu Sans}[Scale=MatchLowercase]
\DeclareTextFontCommand{\textfallback}{\fallbackfont}
\newunicodechar{│}{\textfallback{│}}
\newunicodechar{├}{\textfallback{├}}
\newunicodechar{─}{\textfallback{─}}
\newunicodechar{˷}{,,}

\usepackage{footnote}

\usepackage{caption}
\usepackage{setspace}
\captionsetup[lstlisting]{
  font={stretch=\smallStretchValue}
} % fix to prevent listings' changed stretch from affecting their captions

\usepackage[table]{xcolor}
\usepackage{array}
\usepackage{calc}

% https://tex.stackexchange.com/questions/249040/scale-column-of-a-table-in-percentage-of-textwidth#answer-249043
\newdimen\netTableWidth
\newcommand{\columnsCount}[1]{%
  \netTableWidth=\dimexpr
    \linewidth
    - #1\tabcolsep - #1\tabcolsep
    - #1\arrayrulewidth -2\arrayrulewidth
  \relax%
}

\newenvironment{fancycell}
    {%
        \hspace{0.1in}%
        \nolinebreak%
        \begin{minipage}[t]{\dimexpr\linewidth-0.1in\relax}
        \raggedright
        \hspace{-0.15in}%
    }
    {%
        \end{minipage}%
        \vspace{0.07in}
    }

\usepackage{tikz}

\newcommand{\lightbullet}{%
  \begin{tikzpicture}
    \pgfmathsetlengthmacro\Xheight{height("X")}
    \draw (0, 0) node[
      inner sep=0,
      anchor=east,
      minimum height=\Xheight
    ] {};
    \fill[radius=0.25*\Xheight] circle;
  \end{tikzpicture}%
}

\def\cellitems{\def\item{\par
   \noindent\hbox to1.2em{\hss\lightbullet\hss\hss\hss}\hangindent=1.5em }}

\usepackage[hyperfootnotes=false,pdftex]{hyperref}

\hypersetup{
  colorlinks = true,
  linkbordercolor = .,
  linkcolor = [rgb]{0,0.2,0}, % to be changed after ToC&friends
  urlcolor = [rgb]{0,0,0.5},
  citecolor = [rgb]{0.5,0,0}
}

\definecolor{PATCHGREEN}{rgb}{0,0.5,0}
\definecolor{PATCHRED}{rgb}{0.5,0,0}
\definecolor{TEXTGRAY}{gray}{0.2}
\definecolor{FRAMEGRAY}{gray}{0.6}

\usepackage{listings}
\usepackage{accsupp}

\renewcommand{\lstlistlistingname}{List of Listings}

\lstset{
  basicstyle=\fontsize{7}{9}\selectfont\ttfamily,
%  lineskip=-0.2em,
  numbers=left,
  numberstyle=\fontsize{7}{9}\color{TEXTGRAY},
  inputencoding=utf8,
  escapeinside={(*}{*)},
  extendedchars=true,
  breaklines=true,
  postbreak=\mbox{%
    \hspace{-5em}%
    {\BeginAccSupp{ActualText={}}%
      \textcolor{TEXTGRAY}{$\hookrightarrow$}%
      \space%
      \EndAccSupp{}%
    }
  },
  breakautoindent=false,
  captionpos=b,
  frame=tbrl,
  rulecolor=\color{FRAMEGRAY}
}

\lstdefinelanguage{diffoscope}{
  delim = [s][\bfseries]{@@}{@@},
  moredelim = [l][\color{PATCHRED}]{\ -},
  moredelim = [l][\color{PATCHGREEN}]{\ +}
}

\lstdefinelanguage{shc-patch}{
  delim = [s][\bfseries]{@@}{@@},
  moredelim = [l][\color{PATCHRED}]{-shell},
  moredelim = [l][\color{PATCHRED}\bfseries]{---},
  moredelim = [l][\color{PATCHGREEN}]{+shell},
  moredelim = [l][\color{PATCHGREEN}\bfseries]{+++}
}

\definecolor{tango-string}{HTML}{5c3566}
\definecolor{tango-comment}{HTML}{5f615c}
\definecolor{tango-keyword}{HTML}{346604}

\lstdefinelanguage{guix-package-definition}[]{Lisp}{
  stringstyle = \color{tango-string},
  commentstyle = \itshape\color{tango-comment},
  keywordsprefix = {\#:},
  keywords = {},
  keywordstyle = \color{tango-string},
  keywords = [2]{list,lambda,for-each,define-public},
  keywordstyle = [2]\color{tango-keyword},
  keywords = [3]{\#t},
  keywordstyle = [3]\bfseries
}

\lstdefinelanguage{guix-commit}[]{guix-package-definition}{
  delim = [s][\bfseries]{@@}{@@},
  moredelim = [l][\color{PATCHRED}]{-\ },
  moredelim = [l][\color{PATCHGREEN}]{+\ }
}

\lstdefinelanguage{shell-command}{
  delim = [s][\color{tango-keyword}]{--}{\ },
  alsoletter = {\\},
  keywords = {\\},
  keywordstyle = \color{tango-string}
}

\lstdefinelanguage{package-lock-json}{
  stringstyle = \color{tango-string},
  string = [b]{"}
}

\newcommand{\code}[1]{\mbox{\color{TEXTGRAY}\fontsize{8}{10}\texttt{#1}}}

%% https://tex.stackexchange.com/questions/320342/lstinputlisting-ranges-and-unicode-characters
\makeatletter
\lst@InputCatcodes
\def\lst@DefEC{%
 \lst@CCECUse \lst@ProcessLetter
  ^^80^^81^^82^^83^^84^^85^^86^^87^^88^^89^^8a^^8b^^8c^^8d^^8e^^8f%
  ^^90^^91^^92^^93^^94^^95^^96^^97^^98^^99^^9a^^9b^^9c^^9d^^9e^^9f%
  ^^a0^^a1^^a2^^a3^^a4^^a5^^a6^^a7^^a8^^a9^^aa^^ab^^ac^^ad^^ae^^af%
  ^^b0^^b1^^b2^^b3^^b4^^b5^^b6^^b7^^b8^^b9^^ba^^bb^^bc^^bd^^be^^bf%
  ^^c0^^c1^^c2^^c3^^c4^^c5^^c6^^c7^^c8^^c9^^ca^^cb^^cc^^cd^^ce^^cf%
  ^^d0^^d1^^d2^^d3^^d4^^d5^^d6^^d7^^d8^^d9^^da^^db^^dc^^dd^^de^^df%
  ^^e0^^e1^^e2^^e3^^e4^^e5^^e6^^e7^^e8^^e9^^ea^^eb^^ec^^ed^^ee^^ef%
  ^^f0^^f1^^f2^^f3^^f4^^f5^^f6^^f7^^f8^^f9^^fa^^fb^^fc^^fd^^fe^^ff%
  ^^^^2502^^^^251c^^^^2500% `│', `├' and `─'
  ^^00}
\lst@RestoreCatcodes
\makeatother

%% was useful when I had no chapters and current chapters were sections
% \usepackage[section]{placeins}

% https://aty.sdsu.edu/bibliog/latex/floats.html

% Alter some LaTeX defaults for better treatment of figures:
% See p.105 of "TeX Unbound" for suggested values.
% See pp. 199-200 of Lamport's "LaTeX" book for details.
%   General parameters, for ALL pages:
\renewcommand{\topfraction}{0.9}	% max fraction of floats at top
\renewcommand{\bottomfraction}{0.8}	% max fraction of floats at bottom
%   Parameters for TEXT pages (not float pages):
\setcounter{topnumber}{2}
\setcounter{bottomnumber}{2}
\setcounter{totalnumber}{4}     % 2 may work better
\setcounter{dbltopnumber}{2}    % for 2-column pages
\renewcommand{\dbltopfraction}{0.9}	% fit big float above 2-col. text
\renewcommand{\textfraction}{0.07}	% allow minimal text w. figs
%   Parameters for FLOAT pages (not text pages):
\renewcommand{\floatpagefraction}{0.7}	% require fuller float pages
% N.B.: floatpagefraction MUST be less than topfraction !!
\renewcommand{\dblfloatpagefraction}{0.7}	% require fuller float pages

%% \usepackage{float}

%% \newfloat{flist}{H}{dummyfile}
%% \newenvironment{fitemize}
%%     {\begin{itemize}}
%%     {\end{itemize}}
%%     %% {\begin{flist}\begin{itemize}}
%%     %% {\end{itemize}\vspace{-2.5em}\end{flist}}
%% \newenvironment{fenumerate}
%%     {\begin{enumerate}}
%%     {\end{enumerate}}
%%     %% {\begin{flist}\begin{enumerate}}
%%     %% {\end{enumerate}\vspace{-2.5em}\end{flist}}

\usepackage{enumitem}
\usepackage{svg}
\usepackage{graphics}

%% \newcommand{\workNote}[1]{\emph{\color{red}\footnotesize(#1)}}
%% \newcommand{\TODONote}[1]{\emph{\color{red}\footnotesize(TODO: #1)}}
%% \newcommand{\marginNote}[2][9in]{%
%%   \marginpar{%
%%     \setstretch{1.1}%
%%     \begin{turn}{90}%
%%       \begin{minipage}{#1}%
%%         \workNote{#2}%
%%       \end{minipage}%
%%     \end{turn}%
%%   }%
%% }

\newcommand{\givemeatilde}{%
  {\raisebox{0.5ex}{\texttildelow}}%
}

\newcommand{\tresholdDate}{April 14th, 2025}
\newcommand{\tresholdGuixCommit}{\code{143faecec3}}
\newcommand{\tresholdDateAndCommit}
           {\tresholdDate{}, GNU Guix Git commit \tresholdGuixCommit{}}
\newcommand{\debianTresholdDate}{June 3rd, 2025}

\input{definitions-computed-from-results.tex}

\fancypagestyle{fancyplain}{ %
  \fancyfoot[C]{Kraków, 2025}
  \renewcommand{\headrulewidth}{0pt} % remove lines as well
  \renewcommand{\footrulewidth}{0pt}
}

\title{Software Provenance Assurance through Reproducible Builds}

% https://tex.stackexchange.com/questions/15804/how-to-use-the-content-of-title-as-a-reference
\makeatletter
\let\inserttitle\@title
\makeatother

\newcommand{\insertauthor}{Wojciech Kosior}

\hypersetup{
  pdftitle = {\inserttitle},
  pdfauthor = {\insertauthor}
}

\begin{document}

\newpage

\pagenumbering{roman}

\titlepage

\thispagestyle{fancyplain}

\fancyhf{}
\fancyfoot[C]{Kraków, 2025}

\mbox{}
\vspace{0.5in}

\begin{smallStretch}
  \fontfamily{cmr}\selectfont
  \sffamily

  \begin{center}
    \large

    \includesvg[
      width=0.2\linewidth,
      inkscapelatex=false
    ]{agh.svg}

    \vspace{0.2in}

    \MakeUppercase{\small\bfseries {\large{}AGH} {\large{}U}niversity of
      {\large{}K}rakow}

    \vspace{0.2in}

    \MakeUppercase{\small\bfseries THE FACULTY OF COMPUTER SCIENCE,
      \\ ELECTRONICS AND TELECOMMUNICATIONS}

    \vspace{0.45in}

    Master's thesis

    \vspace{0.45in}

    \textit{\inserttitle}

    \vspace{0.2in}

    \textit{\normalsize{}Potwierdzanie autentyczności oprogramowania \\ poprzez
      powtarzalność kompilacji}

    \vspace{0.45in}

    Keywords: software supply chain threats, reproducible builds, software
    provenance, software packaging, npm Registry
  \end{center}

  \vspace*{\fill}

  \renewcommand{\arraystretch}{\baselinestretch}
  \columnsCount{2}
  \begin{tabular}{ m{.35\netTableWidth} m{.65\netTableWidth} }
    
    Author: & \insertauthor \\
    Major: & Cybersecurity \\
    Supervisor: & dr hab.\ inż.\ Piotr Pacyna, prof.\ AGH
  \end{tabular}
\end{smallStretch}

\clearpage

\chapter*{Acknowledgements}

I could have decided not to enroll for the MSc course in the first place.  But,
having experienced some failures in my career, I started asking what God would
like me to do.  I recalled that even though at various moments it was unpleasant
to be a student, university was a place that suited me more that places I have
been to afterwards.  I assumed that maybe -- just maybe -- God did not want me
to succeed anywhere else because He wants me to be happy here, in the academia.
And so I am, finishing this unusual piece of research.  Thank God.

I also want to thank my wife, Joanna, who supported me even though she regularly
had to suffer listening about my boring computer topics.  I thank my father, who
at times seems to care for my successful defence even more than I do.

Finally, I could not forget about my supervisor who -- luckily for me -- was
corageous enough to undertake supervising a thesis in the software supply chain
field, which few students pursued before.  Thank you!

\begin{smallStretch}
  \clearpage

  \setcounter{tocdepth}{1}
  \tableofcontents

  \clearpage

  \phantomsection{}
  \addcontentsline{toc}{chapter}{\listfigurename}
  \listoffigures

  \clearpage

  \phantomsection{}
  \addcontentsline{toc}{chapter}{\lstlistlistingname}
  \lstlistoflistings

  \clearpage

  \phantomsection{}
  \addcontentsline{toc}{chapter}{\listtablename}
  \listoftables
\end{smallStretch}

\clearpage

\phantomsection{}
\addcontentsline{toc}{chapter}{\abstractname}
\begin{abstract}
  Software faces risk of contamination on multiple stages of its creation and
  distribution.  One such stage is software's build, where its initial form --
  the source code -- is transformed into a form suitable for distribution.  A
  build is called reproducible when it yields bit-to-bit identical outputs when
  repeated.  The reproducible build can be secured by repeating it on multiple
  infrastructures and comparing the outputs.  This decreaces the risk of the
  software getting contaminated through the build infrastructure used.  Certain
  software projects -- in particular, the Debian operating system -- already
  levarage reproducible builds as a security tool.  Meanwhile, several software
  ecosystems rely on repositories whose packages cannot be reliably rebuilt and
  tested for reproducibility.  An example is a popular repository called npm
  Registry.  The software available through the npm Registry gets included in
  reproducibility-focused distributions like Debian at slow pace.  A great
  number and complexity of dependency relations between npm packages were
  hypothesized to be the primary hindrances to this process.  In this work, a
  statistical analysis of the npm ecosystem was performed and existing
  approaches to reproducibility in the context of dependency resolution were
  identified.  Additionally, alternative approaches were proposed that would
  allow for easier packaging of npm software while making reproducibility
  achievable.  To verify several stated hypotheses, an experiment was performed,
  where builds of the most popular npm projects were attempted.  Projects were
  built multiple times to test what subset of their npm Registry dependencies
  can be removed without causing the build to fail.  The complexity and size of
  project's minimal dependency tree was found not to be related to the
  likelihood of the project having a corresponding Debian package.  The results
  lead to conclusion that even numerous and complex dependency relations can be
  handled in existing reproducibility-focused software distributions.  This
  means that employing the proposed new approaches is not necessary to apply
  reproducibility to npm software in the future.  It also means that the
  inclusion of npm software in reproducibility-focused software distributions is
  mostly hindered by other factors that need to be countered.  These were also
  pointed at the end of this work.
\end{abstract}

\clearpage

\selectlanguage{polish} \phantomsection{}
\addcontentsline{toc}{chapter}{\abstractname}
\begin{abstract}
  Bezpieczeństwo oprogramowania może zostać zagrożone na różnych etapach jego
  tworzenia i~dystrybucji.  Jednym z~nich jest szeroko rozumiana kompilacja
  oprogramowania, gdzie jego pierwotna postać -- kod źródłowy -- jest
  przekształcana do postaci odpowiedniej dla dystrybucji.  Proces kompilacji
  nazywamy powtarzalnym, jeśli przy wielokrotnym przeprowadzeniu daje wyniki bit
  do bitu identyczne.  Powtarzalny proces kompilacji może zostać zabezpieczony
  poprzez przeprowadzenie go na różnych infrastrukturach i~porównanie wyników.
  Zmniejsza to ryzyko zanieczyszczenia kodu oprogramowania przez użytą
  infrastrukturę.  Pewne oprogramowanie -- w~szczególności system Debian -- już
  wykorzystuje powtarzalność jako narzędzie bezpieczeństwa.  Jednocześnie,
  niektóre ekosystemy oprogramowania polegają na repozytoriach, których pakiety
  nie mogą być w~sposób niezawodny przekompilowane i~sprawdzone pod kątem
  powtarzalności.  Przykładem jest popularne repozytorium o~nazwie npm Registry.
  Oprogramowanie dostępne przez~npm Registry jest też, aczkolwiek w~wolnym
  tempie, włączane do dystrybucji typu Debian dbających o~powtarzalność
  kompilacji.  Według postawionej hipotezy to duża liczba i~złożoność relacji
  zależności między pakietami npm są głównymi utrudnieniami w~tym procesie.
  W~ramach pracy została wykonana analiza statystyczna ekosystemu npm oraz
  zostały zidentyfikowane istniejące podejścia do powtarzalności w~kontekście
  procesu rozwiązywania zależności.  Dodatkowo, zostały zaproponowane
  alternatywne podejścia, w~których pakowanie oprogramowania npm miałoby być
  łatwiejsze, a~powtarzalność byłaby wciąż osiągalna.  Dla zweryfikowania
  postawionych hipotez przeprowadzony został eksperyment -- próba kompilacji
  najpopularniejszych projektów npm.  Projekty kompilowano wielokrotnie,
  sprawdzając, jaka część ich zależności z~npm Registry może zostać usunięta
  z~zachowaniem powodzenia procesu kompilacji.  Złożoność i~rozmiar minimalnego
  drzewa zależności projektu okazały się nie być powiązane
  z~prawdopodobieństwiem istnienia odpowiadającego pakietu w~Debianie.  Wyniki
  prowadzą do wniosku, że istniejące dystrubucje oprogramowania dbające
  o~powtarzalność mogą sobie poradzić także z~licznymi i~złożonymi relacjami
  zależności.  Oznacza to, że wprowadzenie zaproponowanych nowych podejść nie
  jest konieczne, żeby można było w~przyszłości zastosować powtarzalność do
  oprogramowania npm.  Oznacza to też, że włączanie oprogramowania npm do
  dystrybucji oprogramowania dbających o~powtarzalność jest w~głównej mierze
  powstrzymywane przez inne czynniki wymagające zwalczenia.  Zostały one
  wskazane pod koniec pracy.
\end{abstract}
\selectlanguage{english}

\chapter{Introduction}

\hypersetup{ linkcolor = [rgb]{0,0.5,0} }

\renewcommand{\arraystretch}{1.5}

\pagenumbering{arabic}

\fancypagestyle{plain}{
  \renewcommand{\headrulewidth}{0pt} \renewcommand{\footrulewidth}{0pt} }

\rowcolors{2}{gray!20}{white}

%% W opisie warto dodac uzupelnienie - wyjasnienie szerokiego podloza problemu,
%% ktory jest glownym tematem. Moznaby zacząc od kwestii problemu "supply chain
%% management" w ogolnosci (w gospodarce) - wymienic problemy, troski, obawy i
%% potrzeby.

Most products of modern industry are composed or made using a number of
half-products and tools.  These tend to come from different producers, who
themselves rely on their suppliers.  Such half-products might be produced with
violations of human rights, unecologically, without meeting certain quality
standards, or with patent violations.  The assembly from half-products also
creates an opportunity for deliberate sabotage on part of the supplier.  So far
businesses have not always successfully mitigated these threats, which later
reverberated in many ways.

%% W kolejnym akapicie podac krotką charakterystyki problemow z tym związanych,
%% ale juz w kontekscie procesu wytworczego oprogramowania (powtorzyc istotne
%% zagadnienia "supply ..." lub uszczegolowic)

Just as a design is often used to manufacture physical products, code written in
a programming language is used to produce software in its target form, e.g., an
executable, a firmware image, or a collection of files.  This process of
producing software in its target form can be referred to as software
\textbf{build}.  Items resulting from it are \textbf{build outputs}.  A software
build process overview is presented in Figure~\ref{fig:build-process-overview}.
Depending on the programming languages and technologies in use, the build might
encompass different actions, e.g., macro processing, compilation, linking,
bundling, or compressed archive creation.  It can also utilize a multitude of
different tools.  Adjusting the build process to meet certain requirements is
often a complex task that requires understanding the workings of various tools
involved.  In some software distribution projects, hundreds of lines of scripts
are maintained to allow proper building of a single piece of software written by
another party.  Builds can even involve applying changes to upstream code, often
through the use of patches, i.e., machine-readable files describing changes to
project code.

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]{build-process-overview.svg}
  \caption{Overview of a sample software build process.}
  \label{fig:build-process-overview}
\end{figure}

Similarly to physical products, software is made using preexisting elements
delivered by other parties.  These are often called \textbf{dependencies}.  They
include

\begin{itemize}
\item
  runtime dependencies -- components to be distributed inside or alognside the
  final program and used during its execution, for example reusable modules of
  code called libraries or special fonts, and
\item
  development dependencies -- elements not needed during program's execution but
  useful to its developers, furter classifiable as
  \begin{itemize}
  \item
    build dependencies -- compilers, linkers, test frameworks, etc.\ needed in
    the build process, typically able to function non-intereactively and invoked
    through some layer of automation, sometimes categorized further, for example
    into native and non-native or into build and test dependencies, and
  \item other development tools -- tools to work with software that are not
    needed in the actual build process, more often manually-operated, like
    IDEs\footnote{Integrated Development Environments} and their plugins,
    debuggers, or linters.
  \end{itemize}
\end{itemize}

\section{Problem formulation}

\textbf{If either an external dependency suffers from contamination, the
  infrastructure handling the build is compromised, or the organization or
  individuals attempt a sabotage, then a backdoor or other security
  vulnerability can be implanted in the software being built.  In a basic
  setting, each dependency, the infrastructure, and the organization are all
  single points of failure.  The last two of these points can be secured through
  additional build outputs verification that utilizes software reproducibility.
  This work aims at exploiting reproducibility to secure software's build
  process, with special focus on eliminating the gaps that would leave single
  points of failure.}

A reproducible build is one that produces the same, bit-to-bit identical outputs
when repeated.  For example, the resulting program executables are bit-to-bit
identical.  This concept assumes that a set of \textbf{build inputs} with the
same contents is used in every repetition.  E.g., program sources and
dependencies in exact same versions are used.  As a consequence, one
prerequisite of reproducibility is \textbf{hermeticity} -- the quality of a
build process that depends exclusively on a set of predefined dependencies.  A
hermetic build must not depend on changeable network resources nor on machine's
installed software other than build's formal inputs.  Hermeticity is usually
ensured by performing software build inside a minimal, isolated environment,
often a container utilizing Linux namespaces.

Multiparty verification of build's reproducible output can help increase
confidence that built software is not contaminated due to compromise of
infrastructure underpinning the build environment nor malicious actions of
infrastructure operators.  The verification shall be unsuccessful if the
contamination is present in an output of one build and not in those of the
others.  The overviews of successful and unsuccessful verification performed by
end user -- a scheme that does not create unnecessary single points of failure
-- are presented in Figures \ref{fig:rebuilds-no-contamination-diagram}
and~\ref{fig:rebuilds-contamination-diagram}, respectively.  Contamination is
represented by a frowning face.  The extra confidence coming from verification
can empower both software vendors willing to reduce the risk of distributing
compromised code and software consumers wanting to secure their operations.

\begin{figure}[htpb]
  \centering
  \includesvg[
    width=\linewidth,
    inkscapelatex=false
  ]{rebuilds-no-contamination-diagram.svg}
  \caption{Overview of a successful multiparty build verification process.}
  \label{fig:rebuilds-no-contamination-diagram}
\end{figure}

\begin{figure}[htpb]
  \centering
  \includesvg[
    width=\linewidth,
    inkscapelatex=false
  ]{rebuilds-contamination-diagram.svg}
  \caption{Overview of an unsuccessful build verification process.}
  \label{fig:rebuilds-contamination-diagram}
\end{figure}

Single-party verification is also applicable if only the infrastructure threats
are considered.  Meanwhile, the party itself retains the ability to undetectably
compromise software builds, i.e., implant backdoors.  Figure
~\ref{fig:rebuilds-contamination-1party} depicts an occurrence of such
compromise while single-party verification of build's reproducible output is
taking place.  Contamination is represented by a frowning face.

In addition to the above, just as reproducible builds performed by a single
organization are insufficient to protect from contamination introduced
deterministically by the organization, reproducible builds performed on a single
infrastructure would be insufficient to protect from contamination spreading
deterministically from that infrastructure.

\begin{figure}[htpb]
  \centering
  \includesvg[
    width=\linewidth,
    inkscapelatex=false
  ]{rebuilds-contamination-1party.svg}
  \caption{Overview of a build verification process which only compared the
    outputs of builds performed by a single party and failed to detect malice.}
  \label{fig:rebuilds-contamination-1party}
\end{figure}

For software to be secured with reproducible builds, its build process has to
gain the quality of reproducibility, where repetition of the same build produces
output without variations.  Achieving that quality can itself be challenging.
This involves identification of sources of build process' nondeterminism -- like
timestamps and a changing order of filenames in directory listings.  Identified
sources need to be removed, e.g., by use of fixed date in timestamps or sorting
of filenames obtained from directory scans.  Achieving this is nowadays easier
because common sources of nondeterminism have already been investigated and
workarounds have been implemented.  For example, since version 7 the GNU C
Compiler checks for the existence of a \code{SOURCE\_DATE\_EPOCH} environment
variable containing a time value.  It automatically uses this value in generated
file timestamps~\cite{source-date-epoch}.  Additionally, dedicated tooling for
investigating non-reproducibility issues has been developed, notably the
\textbf{diffoscope} program~\cite{DBLP:journals/corr/abs-2104-06020}.  To
decrease the chance of contamination from a compromized operating system,
firmware, and hardware, the actual build -- once its reproducibility issues are
resolved -- should be performed on infrastructures that differ as much as
possible, except for the invariant set of build inputs and environment
configuration needed to ensure reproducibile outputs.

This work does not address the challenges of avoiding nondeterminism in software
builds.  Instead, the work's goal is to ensure that -- in practical scenarios --
the build inputs remain invariant in all build repetition attempts.  The work's
second major concern is that all machine-performed operations -- even those
deemed preparatory -- can have their effect on build output controlled through
reproducibility.  All of this, in turn, can make reproducible builds a more
reliable and more complete security mechanism.

Despite their benefits, one should nevertheless realize that reproducible builds
only address a particular subset of software supply chain threats -- ones
affecting the build process -- and are mostly useful if other stages of that
chain are also secured.  Some of the practices that can help with this are

\begin{enumerate}
\item
  making sure the software sources relied upon are audited against backdoors, at
  least informally, e.g., by the virtue of being developed in the Bazaar model,
  where the public has a high chance of noticing malicious
  changes~\cite{raymond2001cathedral},
\item
  making sure the software sources do not get altered by any party after the
  verification from the step above, and
\item
  using reproducible builds and other mechanisms to verify software's
  dependencies, possibly recursively.
\end{enumerate}

One example of a threat not remediated through reproducible builds alone is the
loud XZ backdoor from 2024.  Among others, it targeted Debian and Fedora
software distributions.  Backdoor's activation code was only present in the
official source release archives and not in the source repository, which is most
often looked at by programmers~\cite{journals/corr/abs-2404-08987}.  When the
software was built from source archives, it had backdoor code linked in, but
build results were deterministic.  Attacks in such form would not be possible if
the source archives were verified to correspond to version-controlled software
sources.

\chapter{Contemporary guidance and standards in the field of software supply
  chain security}

Threats related to software build and delivery process were known already long
ago.  One interesting self-implanting compiler backdoor was described by Ken
Thompson in his Turing Award lecture in 1984, ``Reflections on Trusting
Trust''~\cite{Thompson:1984}.  Later signs of interest in supply chain threats
in certain circles include for example David A.\ Wheeler's PhD dissertation
titled ``Fully Countering Trusting Trust through Diverse Double-Compiling''
\cite{phd/basesearch/Wheeler09a} and eventually Bitcoin's use of Gitian
deterministic builder\footnote{before replacing Gitian with GNU Guix in 2021}.
Also, for many years certain software distributions have been compiling their
software from sources on dedicated servers, in isolated environments with only
mininum dependencies for a given build.  One example of such distribution is
Debian.

At the same time, for a wide public it's been a norm to rely on prebuilt
software that gives virtually no guarantees that it was built in a secure
environment.  Multiple software repositories
% like npm Registry and PyPI\footnote{Python Package Index}
allow publishing developer-built software packages.  Such software -- built from
a valid VCS\footnote{version control system} checkout but on developer's
infected machine -- could be maliciously modified and distributed to unaware
software integrators wishing to utilize it in their projects.  Neither
cryptographic signing of packages nor VCS source code audits would mitigate such
attacks.

Several spectacular supply chain attacks of recent years became the catalyst of
work towards increasing the level of security.  In case of the SolarWinds attack
from 2020, also known under the name Sunburst, software distributed among
reportedly more than $18\,000$ customers turned out to contain a backdoor
implanted after a compromise of vendor's
infrastructure~\cite{conf/uic/SterleB21}.  It was exactly the kind of threat
that reproducible builds address.  As a result of the event, SolarWinds
Corporation suffered great economic losses and pejoration of its brand's image.
Additionally, the company exposed thousands of customers to cyberattacks
leveraging its compromised software.  All of this could have been avoided
through reproducible verification of software build outputs.

As more attacks on software build and distribution are reported, software supply
chain security becomes a hot topic.  It attracts the attention of public
institutions and private organizations alike.  Some prominent undertakings by
nonprofits are: launch of OWASP's\footnote{Open Worldwide Application Security
Project} SCVS\footnote{Software Component Verification Standard} in 2019,
foundation of OpenSSF\footnote{Open Source Security Foundation} in 2020, launch
of its SLSA\footnote{Supply Chain Levels for Software Artifacts} framework in
2021, Microsoft's donation of S2C2F\footnote{Secure Supply Chain Consumption
Framework} to OpenSSF in 2022, as well as the publishing of
CNCF's\footnote{Cloud Native Computing Foundation, a project of Linux
Foundation} ``Software Supply Chain Best Practices'' in 2021.  State actors also
took voice by the means of ``Securing the Software Supply Chain: Recommended
Practices for Developers'' and subsequent two guides from 2022 developed by the
ESF\footnote{Enduring Security Framework} partnership with support from
CISA\footnote{Cybersecurity and Infrastructure Security Agency}, the
NSA\footnote{National Security Agency}, and the Office of the Director of
National Intelligence.  Another possibly relevant document is NSA's
``Recommendations for Software Bill of Materials (SBOM) Management'' from 2023.

\section{Software Component Verification Standard}

SCVS \cite{owasp-scvs} describes itself as a ``community-driven effort to
establish a framework for identifying activities, controls, and best practices,
which can help in identifying and reducing risk in a software supply chain''.
Despite being developed by OWASP it is generic and not limited to web
applications in its scope.  Authors recognize the unfeasibility of applying all
good practices and threat mitigations at every phase of every software project
and categorize their security requirements into three levels, each implying the
previous one and extending it.

Practices listed in SCVS are grouped into six topics and formulated briefly.
They are agnostic about the technology stack and data formats in use.  At length
explanation of the importance of prescribed actions is not part of the document.

As of version 1.0 of the standard, level 2 requirements include a method to
locate ``specific source codes in version control'' that correspond to a given
version of a third-party package from software repository.  While it is stated
that the correspondence must be verifiable, further details are not given.
SBOM\footnote{software bill of materials} and repeatable build process are
required for an application being developed but not for third-party components.
Additionally, listed practices regarding the build environment mention neither
the goal of reproducibility nor the weaker hermeticity.  While authors might
have -- justifiably -- judged such rules as unfeasible given the current state
of the software ecosystem, it is interesting from the point of view of this
work.  Threats that could not be addressed a few years ago in a generic setting
might be remediable now in the context of one or several technology stacks.

\section{Supply Chain Levels for Software Artifacts}

SLSA \cite{slsa} uses similar but conceptually more complex categorization than
SCVS.  Practices are going to be assigned to so-called ``tracks'' which
correspond to different aspects of software supply chain security and which
might use different numbers of security levels.  As of framwork version 1.0
there only exists a ``Build'' track with three levels, not counting the empty
zeroth level.  In addition to the specification of requirements, SLSA documents
different threats grouped into those concerning the source, dependencies, build,
availability, and verification of an artifact.  Historical examples of attacks
using some of these techniques are listed in the documentation.  Finally, it
also includes instructions how to apply SLSA and how to use it with attestation
formats from the in-toto framework~\cite{conf/uss/Torres-AriasAKC19}.

Many aspects of the build process are addressed in the specified requirements
set but the qualities of hermeticity and reproducibility were removed from the
set at the drafting stage.  SLSA explicitly calls verified reproducible builds
one of multiple methods of implementing the requirements.  In the context of the
particular threat of compromised infrastructure, framework's focus is instead on
stronger security controls for the build platform.  The platform, however,
remains a single point of failure.  Incidents like that of SolarWinds could
still occur.  Reproducibility and hermeticity might be re-introduced in
subsequent revisions of SLSA, as explained on its ``Future directions'' page.

The specification currently also does not cover the recursive application of its
requirements to input artifacts used.  It is nonetheless suggested that users
could apply SLSA independently to transitive dependencies.  This approach is
presented as a possible mitigation to attacks like that performed on
event-stream library in 2018.

\section{Secure Supply Chain Consumption Framework}

S2C2F \cite{s2c2f} is complementary to SLSA in that it embraces software
consumer's point of view.  It introduces four ``levels of maturity'' of
requirements with the highest level mandating a consumer-performed rebuild of
all artifacts.  Having the artifact built reproducibly by several third parties
is mentioned as an alternative approach.  Neither method is presented as more
secure, even though local rebuild still suffers from being a single point of
failure.

\section{``Software Supply Chain Best Practices''}

As of version 1, this paper \cite{cncf-sscp} recognizes three categories of risk
environments and three categories of assurance requirements.  These are -- in
both cases -- ``low'', ``moderate'', and ``high''.  A methodology for securing
the software supply chain is presented in five stages, with four themes of
``Verification'', ``Automation'', ``Authorization in Controlled Environments'',
and ``Secure Authentication'' being repeated in them.  Recommendations are
organized into paragraphs rather than tables or lists.  Authors point towards
existing tools useful for some of the tasks, notably the in-toto
framwork~\cite{conf/uss/Torres-AriasAKC19} and Rebuilderd
system~\cite{drexel2025reproduciblebuildsinsightsindependent}.  At the same
time, they openly admit that some of the practices they describe might require
extra effort to implement because certain challenges have not yet been countered
by the supply chain industry.

Reproducible builds are presented as potentially leverageable when high
assurance is needed.  The topic is discussed in more detail than in the previous
documents from OWASP and OpenSSF.  In addition, hermeticity is included as a
recommendation for high-risk and high-assurance environments.  Recursive
dependencies are treated with equal care to the direct ones, consistently with
authors' statement that ``a supply chain's security is defined by its weakest
link''.  The issue of bootstrapping a system image for builds is also discussed
in the paper.

\section{``Securing the Software Supply Chain: Recommended Practices Guide''}

The series was developed by a public-private working group with members from
both the industry and U.S.\ government agencies.  It is described as
informational only and does not define any standard.  Subsequent parts are
addressed at software developers, suppliers -- who are considered to be
``liaising between the customer and software developer'' -- and customers.

Although these series do not group recommendations into levels, two mitigations
in the first guide from August 2022 \cite{nsa-esf-recommended-practices-devs}
are called ``advanced'' and described as providing ``additional protection''.
These are the hermetic and reproducible builds.  A suggestion is made that the
same builds are performed ``in both cloud and on-premise environments'' and
their outputs compared.  Additionally, authors state a justification should be
required when it is impossible to perform certain build reproducibly.  The text
of this requirement has been copied verbatim from SLSA draft back before being
removed there.

The guide also recommends that images used to deploy the build environment
should be created from sources except where ``there is an understanding of the
provenance and trust of delivery''.  No statements explicitly concerning
rebuilds of transitive dependencies of a product are made.

\section{``Recommendations for SBOM Management''}
\label{sec:recommendations-for-sbom}

The paper \cite{nsa-sbom-management} calls itself a guidance.  It lists
recommendations for general software suppliers and consumers but also dedicates
a big part to users and owners of NSS\footnote{U.S.\ national security systems
-- a specific category of information systems used on behalf of U.S.\ agencies}.
Document's primary focus is on functionalities that tools used to manage SBOMs
should provide.

NSA's guidance concerns SBOMs, which hold information about software components
comprising final product.  The guidance does not directly address build process
threats and does not touch the topics of reproducibility and transitive
dependencies of software.  In fact, the industry recognizes another type of bill
of materials, not mentioned in the document, which is more relevant to the topic
of reproducibility than SBOM.  It is manufacturing bill of materials.  In the
context of software, MBOM conveys information about all components needed for
its build.  This also includes project's build dependencies which would not be
recorded in an SBOM.  MBOMs are relevant from reproducibility perspective
because information in them can make software rebuilds possible.  Even though
MBOMs are not directly mentioned in version 1.1 of NSA's guidance, one of the
recommendations present there is labeled as ``Scalable architecture''.  It is
described as one that can also ``handle other types of BOMs''.

\section{Summary}

Published documents' attitutes to reproducibility and hermeticity range from
agnosticism to suggestion and recommendation in the context of certain
environments.  Reproducibility and its requisite -- hermeticity -- are difficult
to achieve with a great subset of existing popular software projects.  This
difficulty might stand behind the limited focus on these measures in documents
other than CNCF's ``Software Supply Chain Best Practices''.  It appears that the
means of securing the software supply chain which are more straightforward to
employ are also more often recommended.  In such case, making reproducibility
easier to achieve for all kinds of software projects should lead to it being
more frequently discussed and therefore more broadly leveraged.

\chapter{Security tools leveraging reproducible builds}
\label{chp:existing-security-tools}

Several initiatives and pieces of software exist that are concerned with the
verification of reproducibility of software packages.  The champion of these
efforts is the Reproducible Builds project, also affiliated with Debian.

\section{in-toto apt-transport for Debian packages}

in-toto framework, developed under the CNCF, aims to secure the integrity of
software supply chains~\cite{conf/uss/Torres-AriasAKC19}.  Debian GNU/Linux is
an operating system distribution founded in 1993.  It provides thousands of
pieces of software in form of \textbf{packages} that can be installed in the
system via Debian's package manager, APT\footnote{Advanced Package Tool}.

In 2018 it became possible to use in-toto together with Debian's APT, to verify
that a package being installed has been verified through reproducible builds.
The package manager can be configured to abort installation if the package was
not reproduced by at least $k$ independent rebuilders, with $k$ configurable by
the user.  In the process, cryptographically signed attestations of rebuilt
packages are fetched over the network from rebuilder URIs that are also
configured by the user.

\subsection{How a Debian package is made and rebuilt}
\label{sub:how-debian-package-is-made}

Most packages available in Debian contain software written by third parties,
i.e., the \textbf{upstream}.  That software was released in source form and
subsequently integrated into Debian.  A package typically has a maintainer who
is a Debian volunteer taking care of the package, possibly with the aid of other
people~\cite[Chapter~1]{debian-new-maintainers-guide}.

Upon initial creation or a version update of a Debian package, its maintainer
first prepares what is called a \textbf{source package}.  It is the primary
input of the build process that will produce the final, installable package.
The installable package is also sometimes called a \textbf{binary package} to
distinguish it from the source package.  A single build process with a single
source package can also prouce multiple binary packages.  For example, a
programming library written in the C programming language can have its
dynamically linked binary and its header files placed in distinct binary
packages.  As of \debianTresholdDate{}, the current release of Debian -- Debian
12, codenamed ``Bookworm'' -- offered $63\,465$ packages for x86\_64
architecture, as found in its \code{main} pool.  They were produced from
$34\,217$ source packages.

%% This is actually 38,546 buildinfo files and this many builds -- because
%% separates builds are conducted with a single source pacakge to produce
%% architecture-specific and architecture-independent outputs…

%% gawk '
%% BEGIN{
%%   delete registered_builds[""];
%% }
%% /^Package:/{
%%   source = 0;
%%   ver = 0;
%%   arch = 0;
%% }
%% /^Source:/{
%%   source = $2;
%% }
%% /^Source:.*[(].*[)]/{
%%   ver = gensub("Source: .*[(](.*)[)]", "\\1", 1);
%% }
%% /^Architecture:/{
%%   arch = $2;
%% }
%% /^Filename:/{
%%   prefix_dir = gensub("Filename: pool/main/([^/]+).*", "\\1", 1);
%%   if (!source) {
%%     source = gensub("Filename: pool/main/[^/]+/([^_/]+).*", "\\1", 1);
%%   }
%%   if (!ver) {
%%     ver = gensub("[^_]+_([^_]+).*", "\\1", 1);
%%   }
%%   source_id = source "_" ver;
%%   build_id = source_id "_" arch;
%%   dir_url = "https://buildinfos.debian.net/buildinfo-pool/" \
%%       prefix_dir "/" source
%%   url = dir_url "/" build_id ".buildinfo";
%%   if (!(build_id in registered_builds)) {
%%     print source_id " " build_id " " url;
%%     registered_builds[build_id] = 1;
%%     alt_url = dir_url "/" build_id "-source.buildinfo";
%%     print source_id " " build_id " " alt_url;
%%   }
%% }
%% ' Packages > buildinfo-urls

%% gawk '{print $1}' buildinfo-urls | sort | uniq | wc -l

%% mkdir -p buildinfos
%% process_line() {
%%   local SOURCE_ID=$1
%%   local BUILD_ID=$2
%%   local URL=$3
%%   if [ ! -e buildinfos/$BUILD_ID.buildinfo ]; then
%%     if [ -e buildinfos-dumb/$BUILD_ID.buildinfo ]; then
%%       mv buildinfos-dumb/$BUILD_ID.buildinfo buildinfos/$BUILD_ID.buildinfo;
%%     else
%%       wget --no-verbose -O buildinfos/$BUILD_ID.buildinfo $URL \
%%         >> buildinfos-download.log
%%     fi
%%   fi
%% }
%% while read LINE; do
%%   process_line $LINE;
%% done < buildinfo-urls

An official Debian source package typically consists of

\begin{enumerate}
\item
  software sources taken from the upstream -- possibly with inappropriately
  licensed components removed and other changes applied to meet Debian
  guidelines -- taking form of one or more compressed archives,
\item
  package's recipe, taking form of a compressed archive, including, among others,
  \begin{itemize}
  \item
    a list of software's build and runtime dependencies, placed -- with other
    metadata -- in a file named \code{debian-control}, with an example in
    Listing~\ref{lst:debian-control-excerpts},
  \item
    optional patch files that describe Debian-specific changes which are to be
    applied to upstream software as part of the automated build process, with an
    example in Listing~\ref{lst:debian-shc-patch}, and
  \item
    a script directing the build process, placed in a file named
    \code{debian/rules}, invoked as a Makefile, with an example in
    Listing~\ref{lst:debian-rules-excerpt}, and
  \end{itemize}
\item
  a text file with \code{.dsc} suffix containing cryptographically signed source
  package metadata, including hashes of compressed archives from the above
  points.
\end{enumerate}

\lstinputlisting[
  float=htpb,
  caption=Excerpts from a $176$-lines long \code{debian/control} file of the
  \code{nodejs} Debian package.,
  label=lst:debian-control-excerpts,
  numbers=none
]{debian-control-excerpt.txt}

\lstinputlisting[
  float=htpb,
  language=shc-patch,
  caption={A patch used by Debian package \code{shc} to provide an upstream
    script with the correct path of the \code{rc} executable, as present in
    Debian.},
  label=lst:debian-shc-patch
]{debian-shc.patch}

\lstinputlisting[
  float=htpb,
  caption=Excerpt from a $2045$-lines long \code{debian/rules} Makefile of the
  \code{binutils} Debian package.,
  label=lst:debian-rules-excerpt,
  numbers=none
]{debian-rules-excerpt.txt}

The package maintainer is likely to perform one or several builds when working
on a new or updated source package.  However, except for special cases, it is
only the source package and not the maintainer-built binary packages that gets
uploaded to what is called the \textbf{Debian archive}.  Uploaded source
packages are ``built automatically by the build daemons in a controlled and
predictable environment''~\cite[Chapter~5]{debian03developers}.

Besides producing binary packages, the build daemons also record the metadata of
performed builds, which is later published with cryptographic signatures as
\code{.buildinfo} files\footnote{which can be considered a type of MBOM that was
described in \ref{sec:recommendations-for-sbom}}.  For a given binary package,
it is usually possible to locate and download its corresponding
\code{.buildinfo} file.  That file contains, among others, a list of names and
versions of Debian packages that were installed in the minimal build
environment.  An example of a \code{.buildinfo} file is shown in Listing
\ref{lst:haskell-sth-buildinfo-excerpts}.

\lstinputlisting[
  float=htpb,
  caption=Excerpt from a $256$-lines long \code{.buildinfo} file of the
  \code{haskell-base-compat-batteries} Debian package.,
  label=lst:haskell-sth-buildinfo-excerpts,
  numbers=none
]{haskell-sth-buildinfo-excerpts.txt}

The \code{.buildinfo} files can be used by parties other that The Debian
Project to rebuild the official packages, write in-toto metadata of the process,
sign it, and subsequently serve it to the users.

\section{\code{guix challenge} command of GNU Guix}

GNU Guix is an operating system distribution and a package manager that appeared
in 2013~\cite{conf/els/Courtes13}.  It implements functional package management
model described by Eelco Dolstra in ``The Purely Functional Software Deployment
Model'' in 2006 \cite{phd/basesearch/Dolstra06} and pioneered by Nix package
manager.  For avoidance of confusion with an unrelated ``GUIX'' U.S.\ trademark
registered in 2019 and owned by Microsoft, the GNU Guix package manager shall be
referred to with its ``GNU'' prefix throughout this document.

Similarly to Debian, GNU Guix relies on software written by upstream authors and
makes it available in the form of installable packages.  However, the data
formats, build mechanisms, and nomenclature differ.  The equivalent of Debian's
binary package is referred to as a \textbf{substitute}.  Since 2015, GNU Guix
provides a \code{guix challenge} command which ``allows users to challenge
the authenticity of substitutes provided by a
server''~\cite{gnu-guix-0.9.0-released}.  End users can invoke this command to
compare the outputs of package builds performed on multiple infrastructures and
report which packages were not built reproducibly -- either due to
nondeterminism of the build process or because of build's compromise.

\subsection{How a GNU Guix package is made and built}
\label{sub:how-gnu-guix-package-is-made}

GNU Guix package collection is determined by a set of package recipes.  Unlike
Debian package recipes, these do not take the form of compressed achives.  Here,
packages are defined in Scheme programming language from Lisp family.  A recipe
consists of code that instantiates and populates a \code{<package>} data
structure representing a package that can be built.  Recipe's code can use the
facilities of the Turing-complete Scheme programming language to dynamically
compute parts of this new \code{<package>} instance.  A \code{<package>}
instance does -- in most cases -- get bound to a Scheme variable, which other
recipes' code can reference.  Lists of package's explicit build and runtime
dependencies are typically constructed using references to other package
variables.  A package recipe example is shown in Listing
\ref{lst:guix-python-axolotl-package}.  It defines a variable of the same name
as the package and declares its explicit dependencies by referencing
\code{python-protobuf} and other two package variables.  A Scheme code snippet
is supplied to be executed during the hermetic build process.  It customizes the
process by deleting unnecessary files.

\lstinputlisting[
  float=htpb,
  language=guix-package-definition,
  caption=Recipe of \code{python-axolotl} GNU Guix package.,
  label=lst:guix-python-axolotl-package,
  numbers=none
]{guix-python-axolotl-package.scm}

The recipes of all official GNU Guix packages are kept and maintained in a
single VCS repository.  As of \tresholdDate{}, this is a repository that houses
both the recipes collection and the GNU Guix application, although this setup is
not imposed by design and might change in the future.  Recipes in the repository
can also have accompanying patch files.  However, patches are used less
frequently here than in Debian package recipes.  Regular expression
substitutions performed by Scheme code are preferred by GNU Guix developers for
trivial modifications of upstream source.

Package recipes in GNU Guix generally reference remote software sources using
URLs and hashes that are considered cryptographically secure.  The hashes are
used for verification of sources' integrity upon download and make it possible
to safely download them from fallback servers, among which is the archive of
Software Heritage \cite{journals/corr/abs-2405-15516}.  Several remote resource
formats are supported, including traditional compressed archives as well as
repositories of popular VCSes.  Referencing VSC repositories of upstream
projects -- although not practiced universally in the recipes collection --
allows the correspondence of build inputs to public-reviewed sources to be more
easily tracked.

The GNU Guix package manager is able to build packages locally, on the system on
which their installation was requested.  Each deployment of GNU Guix comes -- by
design -- with a copy of the recipes collection, such that only the source
inputs -- identified by cryptographic hashes -- need to be downloaded for a
build to be performed.  Local builds are fully automated, are performed in
isolated environments created in the background and are a first-class citizen in
the model of GNU Guix.  For practical reasons, it is made possible to instead
download prebuilt packages -- the substitutes that substitute their
locally-built equivalents.

The \code{guix challenge} command allows the build outputs advertised by
configured substitute servers to be compared with each other and with the
outputs of local builds, when available.

Lack of automatized integration of reproducibility verification with package
deployment is a notable limitation of the \code{guix challenge} command of GNU
Guix.  The command has to be invoked explicitly by the user.  As of
\tresholdDate{}, there is no official way to \textbf{automatically} challenge
the binary substitutes that GNU Guix downloads as part of other actions, such as
software installation.  Thus, in practice, the command is less easily usable as
an end user's preventive security tool and more as an investigation and internal
verification aid.

Bitcoin Core, possibly the most famous piece of software using GNU Guix to
reproducibly verify its binaries, does not rely on the
\code{guix challenge} command and instead uses its own custom scripts to
perform code signing.

\section{Continuous tests}

The tools presented so far allow the end users to verify binaries before they
are put in use.  The user can first learn whether a set of software packages was
rebuilt with bit-to-bit identical results on independent infrastructure and can
then make an informed decision whether to install the packages.  The benefit of
this type of verification is that it leaves no single point of failure, except
for end user's device.  However, if the latter were compromised in the first
place, no software-based scheme would reliably remediate that.  This scenario is
therefore out of scope of this work.

The drawback of this type of verification is that accidential
non-reproducibility due to an overlooked source of nondeterminism in the build
process leads to verification failures, as depicted in
Figure~\ref{fig:rebuilds-inconclusive-diagram}.  A lag of independent rebuilders
can likewise make verification impossible, as also shown in the figure.  If
reproducible builds are to be used as a preventive security measure, any such
failure would need to stop the end user from performing the attempted software
installation or update.  Until the percentage of reproducibly buildable software
in distributions is close to 100\% and enough resources are invested in
independent infrastructure performing continuous rebuilding, this problem can be
prohibitive.

\begin{figure}[htpb]
  \centering
  \includesvg[
    width=\linewidth,
    inkscapelatex=false
  ]{rebuilds-inconclusive-diagram.svg}
  \caption{Overview of a verification process inconclusive due to rebuilder's
    delay and lack of determinism.}
  \label{fig:rebuilds-inconclusive-diagram}
\end{figure}

However, there are several projects where verification of reproducibility is
performed by someone else than the end user.  Although this does not eliminate
the single point of failure from software installation process, such
verification can still make supply chain attacks harder.  For example, if an
organization performs internal tests of reproducibility and analyzes their
results, it is more likely to detect code contamination early on and react to
it.

As of \debianTresholdDate{}, the Reproducible Builds project performs continuous
tests of the reproducibility of

\begin{itemize}
\item files from coreboot, FreeBSD, NetBSD, and OpenWrt projects, as well as
\item packages from Debian repositories, with package reproducibility statistics
  being reported, as in Figure~\ref{fig:bookworm-stats-pkg-state}.
\end{itemize}

\begin{figure}[htpb]
\centering
\includegraphics[width=\linewidth]{bookworm-stats-pkg-state.png}
\caption{Reproducibility of Debian Bookworm packages over time, as presented on
  Reproducible Builds' continuous tests website.}
\label{fig:bookworm-stats-pkg-state}
\end{figure}

As of \debianTresholdDate{}, $33\,214$ source packages from Debian Bookworm were
reported to have been rebuilt reproducibly for the x86\_64 architecture.  That
means approximately 97\% reproducibility in the collection.  The remaining
packages either could not be built on the Reproducible Builds infrastructure,
with various possible reasons, or were built with outputs differing on binary
level.

The Reproducible Builds project also lists several others -- including GNU Guix
mentioned earlier and its predecessor NixOS -- that monitor the reproducibility
of their files and/or repository packages without relying on the Reproducible
Builds' infrastructure~\cite{reproducible-builds-continuous}.  One notable
undertaking in this category is the development of Rebuilderd tool for
reproducible rebuilds of packages from Arch Linux and recently other
distributions~\cite{drexel2025reproduciblebuildsinsightsindependent}.  An
application also exists that can consult a Rebuilderd instance to automatically
verify packages installed in user's Arch Linux system~\cite{archlinux-repro}.

The continuous tests platform used by GNU Guix is capable of generating
reproducibility reports, which are viewable on pages at
\url{https://data.guix.gnu.org}.  Part of such report is shown in
Figure~\ref{fig:guix-pkg-repro-stats}.  According to it, there were $39\,344$
packages available for the x86\_64 architecture as of \tresholdDateAndCommit{}.
$35\,415$ of them -- approximately 90\% -- were rebuilt reproducibly, albeit
with $2\,087$ remaining untested.  Frequent package updates and builders' lag
are possible reasons for the large number of untested packages.  Out of all the
successfully rebuilt GNU Guix packages, approximately 95\% had outputs that were
bit-to-bit identical with those produced on another infrastructure.

\begin{figure}[htpb]
  \centering
  \includegraphics[width=\linewidth]{guix-pkg-repro-stats.png}
  \caption{Reproducibility of GNU Guix packages as reported by its continuous
    tests platform.}
  \label{fig:guix-pkg-repro-stats}
\end{figure}

Unfortunately, several of the reproducibility tests listed on the Reproducible
Builds website have become unmaintained.  The testing for Fedora and Alpine
operating system distributions was disabled at some point.  Although the
reproducibility statistics of GNU Guix packages are still delivered, their web
pages sometimes cannot be viewed due to timeouts, as also witnessed by Internet
Archive's Wayback
Machine\footnote{\url{https://web.archive.org/web/20250625124729/https://data.guix.gnu.org/repository/1/branch/master/latest-processed-revision/package-reproducibility}}.

\chapter{Applicability of reproducibility workflows to different software ecosystems}
\label{chp:applicability-of-workflows}

Current reproducible software distributions, like GNU Guix and Debian, are
\textbf{system software distributions} -- ones that contain a collection of
packages that can form a complete operating system.  As such, a mixture of
software technologies can be found in them.

Certain programming languages and computing platforms form software ecosystems
centered around them, for instance, the ecosystem of the Python programming
language with CPython\footnote{the most popular implementation of the Python
programming language, written in C} and PyPy being its most popular runtimes.
These ecosystems evolve their specific software package formats and workflows
for building these packages.  Many popular ecosystems have their dedicated
package repositories that usually serve as primary distribution channels of
software written for the ecosystem's computing platform.  Such
ecosystem-specific software repositories are often, although imprecisely,
referred to as language-specific repositories.  They are typically open to the
public for registration and package creation.  As such, they form environments
of software with varying levels of quality and significance.

Many ecosystem-specific repositories distribute software without all the
metadata that is necessary to automate rebuilding it.  Moreover, if a project
uses multiple packages from such repository, it relies on security of each of
the machines used by these packages' developers for builds and uploads.  A
partial remedy -- facility to publish packages together with build provenance
data cryptographically signed by a build service -- was employed by
ecosystem-specific repositories \textbf{npm Registry} and
\textbf{PyPI}\footnote{Python Package Index} in 2023 and 2024, respectively.  A
dedicated build service -- with the most popular ones being GitHub Actions and
GitLab CI/CD -- can be considered better secured than an average developer's
computer, for the benefit of packages that are confirmed to have been built
there.  In addition, build provenance data identifies the source repository used
in the build.  However, even when secured, build service remains a single point
of failure of the process.  In addition, support for only one or few selected
build services -- as offered by the npm Registry as of \tresholdDate{} -- leads
to vendor lock-in.

\section{Degree of inclusion in Debian and GNU Guix}

Software utilizing the respective computing platforms and distributed primarily
through an ecosystem-specific software repository might, at some point, also get
included in a system software distribution.  However, so far it did not happen
with certain popular and strategic pieces of software.  One example is Electron
framework that is used, among others, by Signal application and Visual Studio
Code IDEs.  As of \tresholdDate{}, Electron is declared a development dependency
by $4\,533$ packages in the npm Registry.  At the same time, software
distributions that test for package reproducibility usually lack Electron and
Electron-based applications, as do Debian and GNU Guix.  Another software
distribution that tests for package reproducibility, NixOS, redistributes
Electron's upstream binaries without actually building the software.  In this
case, the build itself is not being verified through reproducibility.

Certain programming languages and computing platforms have seen more packaging
progress in system software distributions.  Let us consider the PyPI, npm, and
crates ecosystems, which are centered around their respective repositories and
technologies, as shown in Table \ref{tab:software-ecosystems}.  For this work,
repositories were chosen as a basis for distinguishing the ecosystems.  It is a
feasible criterion, although not the only possible one.  There are overlaps of
various sizes between different repositories, runtimes, and project management
tools.  Also, in some cases a software package has formal of informal
dependencies that are not distributed through the reposotory that the package
itself uses.

\begin{table}[htpb]
  \caption{Considered software ecosystems.}
  \centering
  \label{tab:software-ecosystems}
  \footnotesize
  \columnsCount{4}
  \begin{tabular}{
      >{\raggedright\arraybackslash}p{.31\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
    }
    \rowcolor{gray!40}
    &
    \textbf{PyPI} &
    \textbf{npm} &
    \textbf{crates} \\

    \textbf{repository} &
    PyPI &
    npm Registry &
    crates.io \\

    \textbf{{primary} \mbox{programming} \mbox{languages}} &
    \cellitems \item Python, \item Cython &
    \cellitems \item JavaScript, \item TypeScript, \item WASM &
    \cellitems \item Rust \\

    \textbf{\mbox{sample} \mbox{runtimes} or \mbox{compilers}} &
    \cellitems \item CPython, \item PyPy\ &
    \cellitems \item Node.js, \item Deno, \item Bun &
    \cellitems \item rustc \\
    
    \textbf{sample project management tools} &
    \cellitems \item setuptools, \item Poetry, \item Hatch &
    \cellitems \item npm, \item Yarn, \item pnpm &
    \cellitems \item Cargo
  \end{tabular}
\end{table}

We shall compare the numbers of software projects from the chosen ecosystems
that are packaged in system software distributions described in detail
in~\ref{chp:existing-security-tools}.  The numbers presented in Table
\ref{tab:ecosystem-packaged-numbers} were estimated based on snapshots of
package collections offered by Debian Bookworm as of \debianTresholdDate{} and
GNU Guix as of \tresholdDateAndCommit{}.

% grep -R node-build-system "$GUIX_CHECKOUT"/gnu/packages | wc -l
% grep -Re '\(pyproject\|python\)-build-system' "$GUIX_CHECKOUT"/gnu/packages | wc -l

%% $ grep -R dh-nodejs ../buildinfos | awk -F _ '{print $1 "_" $2}' | sort | uniq | wc -l
%% 998

%% grep -R dh-cargo ../buildinfos | awk -F _ '{print $1 "_" $2}' | sort | uniq | wc -l                                                    
%% 1424

%% $ grep -R dh-python ../buildinfos | awk -F _ '{print $1 "_" $2}' | sort | uniq | wc -l                                                   
%% 5312

\begin{table}[htpb]
  \caption{Estimated numbers of Debian and GNU Guix packages corresponding to
    software from considered ecosystems.}  \centering
  \label{tab:ecosystem-packaged-numbers}
  \footnotesize
  \columnsCount{4}
  \begin{tabular}{
      >{\raggedright\arraybackslash}p{.31\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
      >{\raggedright\arraybackslash}p{.23\netTableWidth}
    }
    \rowcolor{gray!40}
    &
    \textbf{PyPI} &
    \textbf{npm} &
    \textbf{crates} \\

    \textbf{GNU Guix packages} &
    $3\,699$ &
    $55$ &
    $3\,704$ \\

    estimated as use counts of which \code{build-system}s in recipes &
    \code{pyproject-build-system}, \code{python-build-system} &
    \code{node-build-system} &
    \code{cargo-build-system} \\

    \textbf{Debian packages} &
    $5\,312$ &
    $998$ &
    $1\,424$ \\

    estimated as counts of source packages referencing which
    debhelper package &
    \code{dh-python} &
    \code{dh-nodejs} &
    \code{dh-cargo}
  \end{tabular}
\end{table}

A conclusion arises that for some reason npm packages are less likely to be
packaged when adhering to the rigor of existing software distributions that
utilize hermetic and reproducible builds.  We can try to name the main causes
and judge whether the difficulties could be worked around without sacrificing
security.

\section{Dependency tree sizes}

It can be noticed that on avarage, npm projects have more recursive dependencies
than, for example, Python projects~\cite{btao-wot-for-npm}.  This means that
packaging an end-user application written in JavaScript\footnote{also referred
to by its official name: ECMAScript} typically requires more labor of bringing
the intermediate packages to the distribution -- an issue that has been talked
about in the GNU Guix community for at least ten
years~\cite{lets-package-jquery}.

Large dependency trees can be partially caused by the JavaScript language
historically having a relatively modest standard library.  While such design can
bring some benefits, it might also lead to proliferation of small libraries that
have overlapping functionality.  Independent, competing packages with similar
purposes are then more likely to appear together in a single dependency tree.

Creation of many small packages and eager use of dependencies for simple tasks
-- all of which leads to larger dependency trees -- can also be attributed to
the culture of developers working with the npm Registry~\cite{Abdalkareem2020}.

\section{Age of the ecosystem}

The npm tool first appeared in 2010.  The PyPI ecosystem is older, with its
repository being launched in 2002.  It can therefore be argued that software
from the latter has had more time to be included in Debian and several other
software distributions.  However, this is not sufficient to explain the lack of
inclusion of npm packages in GNU Guix, which itself came to existence in 2012.
Additionally, the crates ecosystem, which came to existence in 2014, is younger
than all of the previous repositories.  Despite that, software from it has
larger presence in Debian an GNU Guix than software from the npm ecosystem.

\section{Conflicting dependencies}
\label{sec:conflicting-deps}

System software distributions typically only allow a single version of a package
to be installed at any given time.  This rule is sometimes relaxed in various
ways.  For example, as of \debianTresholdDate{}, Debian Bookworm had distinct
packages named \code{gcc-12} and \code{gcc-11}.  Both of them provide the GNU C
Compiler, albeit in different major versions.  These packages can be installed
side-by-side.  GNU Guix, on the other hand, has facilities to create independent
environments with different sets of packages in each.  If multiple versions of
the same package reside in different environments, they do not cause a conflict.
There are also other nuances that provide some degree of flexibility.

Nonetheless, an application that requires multiple versions of a single
dependency is more difficult to include in such software distributions.  This is
a relative small issue for, e.g., Python applications.  Their runtime does not
support simultaneous loading of multiple versions of the same Python library in
the first place.  I.e., if it is possible to install package's dependencies from
PyPI and use that package, it means there are no conflicting dependencies.  At
the same time, npm and the Node.js runtime allow multiple versions of the same
library to appear in the dependency tree of a project.

\subsection{Support in npm\&Node.js}
\label{sub:conflicting-deps-in-npm}

Let us consider the dependency tree recorded in \code{package-lock.json} file of
the sigstore project
repository\footnote{\url{https://raw.githubusercontent.com/sigstore/sigstore-js/759e4d9f706aa0bea883267009fa1da8f2705eab/package-lock.json}}.
We shall look at the revision designated by Git commit \code{759e4d9f70} from
Aug 5, 2024.  Entries of interest are shown in Listing
\ref{lst:occurances-of-tslib}.  A library identified as \code{tslib} appears two
times in the tree.  There is a copy of version 2.6.3 and a copy of version
1.14.1.  This happened because a dependency, \code{tsyringe}, has a requirement
on a version of \code{tslib} that is at least 1.9.3 but lower than 2.0.0.
Version 1.14.1 present in the tree satisfies this requirement.  Another
dependency, \code{pvtsutils}, requires \code{tslib} in version that is at least
2.6.1 but lower than 3.0.0.  Several other entries, omitted for clarity, have a
different requirement on \code{tslib}.  All these are satisfied by version
2.6.3.

\lstinputlisting[
  float=htpb,
  language=package-lock-json,
  caption=Multiple occurances of \code{tslib} package in a dependency tree.,
  label=lst:occurances-of-tslib,
  numbers=none
]{occurances-of-tslib.txt}

When this project's code is run and the \code{tsyringe} library tries to load
\code{tslib}, Node.js runtime instantiates version 1.14.1 of \code{tslib}
from \code{tsyringe}'s private subtree.  \code{tsyringe} then works with that
instance of \code{tslib}.  For all other parts of the project that attempt to
load \code{tslib}, version 2.6.3 is instantiated and used.

\subsection{Effects of Semantic Versioning}
\label{sub:effects-of-semver}

In the npm ecosystem a system called ``Semantic Versioning''
\cite{prestonwerner2013semantic} is widely applied.  This system assumes that
software version is given as three numbers -- major, minor, and patch.  E.g.,
2.6.3.  It also permits optional labels for pre-release and build
metadata.  When Semantic Versioning is followed, then a new software release
that breaks backward compatibility with the previous release has its major
version number increased and the other two numbers reset to zero.  A release
that adds new functionality without breaking backward compatibility has only its
minor number increased, with patch number reset to zero.  And a release that
adds no new functionality -- typically a bugfix release -- has its patch number
increased.

Assume project \code{foo} utilizes a semantically versioned dependency
\code{bar}.  The developers could verify that \code{foo}'s code integrates
properly with a particular version of \code{bar}, e.g., version 3.4.5.  The
developers would then record a requirement that in the future, either this
version of \code{bar} or a later one -- but without compatibility-breaking
changes -- can be used by \code{foo}.  This means only \code{bar} versions
between 3.4.5 and 4.0.0, excluding 4.0.0 itself, would satisfy the requirement.
If \code{bar} then increases its major number in a new release, the developers
of \code{foo} can ensure that its code integrates properly with the newer
\code{bar}.  They would then update the requirements in \code{foo} and the next
release of \code{foo} could officially use the 4.x.x series of \code{bar}.  It
would, however, still be forbidden from using the hypothetical 5.x.x series that
could bring subsequent compatibility breakages.

In our example from~\ref{sub:conflicting-deps-in-npm}, the libraries
\code{tsyringe} and \code{pvtsutils} both apply this approach to their
dependency, \code{tslib}.  As a result, \code{tsyringe} is protected from
possible breaking changes introduced by version 2.0.0 of \code{tslib},
but at the same time it is impossible to satisfy all requirements of the project
with just a single copy of \code{tslib}.  In practice, there are sometimes tens
or hundreds such conflicts in a single dependency tree.

The breaking changes that necessitate increasing package's major version number
sometimes concern a part of package's functionality that the specific user does
not rely upon.  When project's developers know or suspect that the requirements
specified by certain dependencies could be safely loosened, they can forcibly
override them.  Such overrides, supported natively in npm, are used by some when
addressing security vulnerabilities deep in a project's dependency tree.  The
same approach could be used to eliminate all dependency conflicts.  However,
with many overrides there's a lower chance of avoiding a breakage due to
inter-package compatibility issues.

\section{Difficult bootstrappability}

GNU Guix and Debian are self-contained in the sense that build dependencies of
their packages are also their packages.  When packaging a program that requires,
e.g., a C compiler to build, no problems arise -- C compilers are already
present in these system software distributions and one of them can be used as a
build dependency of the new package.  However, packaging a program written in a
new programming language requires a compiler or interpreter of that programming
language to be present in the distribution in the first place.  The same applies
to other types of build tools, e.g., bundlers that amalgamate many JavaScript
files into a few or a single file.

Packaging a program for such distribution involves first packaging all its build
tools.  Making a package buildable with only the tools from the distribution is
sometimes referred to as \textbf{bootstrapping}.

\subsection{Self-depending software}
\label{sub:self-depending-software}

Certain tools exist that depend on themselves to build, making bootstrapping
challenging.  Selected examples from the npm ecosystem are presented in Table
\ref{tab:self-depending-packages}.  Packages in the table were ranked based on
how many other npm Registry packages specified them as development dependencies
as of \tresholdDate{}.  The presented selection is by no means exhaustive, more
highly popular self-depending npm packages might exist.

\begin{table}[htpb]
  \caption{npm Registry packages that require themselves to build.}
  \label{tab:self-depending-packages}
  \centering
  \footnotesize
  \columnsCount{3}
  \begin{tabular}{
      >{\raggedright\arraybackslash}p{.13\netTableWidth}
      >{\raggedright\arraybackslash}p{.27\netTableWidth}
      >{\raggedright\arraybackslash}p{.6\netTableWidth}
    }
    \rowcolor{gray!40}
    \textbf{name} &
    \textbf{popularity ranking} &
    \textbf{notes} \\

    \code{typescript} &
    $1$ ($473\,235$ dependees) &
    the original implementation of TypeScript programming language \\

    \code{@babel/core} &
    $10$ ($138\,704$ dependees) &
    part of a JavaScript compiler, requring itself indirectly through
    dependencies that themselves build with \code{@babel/core} \\

    \code{rollup} &
    $26$ ($95\,965$ dependees) &
    a bundler \\

    \code{gulp} &
    $40$ ($61\,077$ dependees) &
    a build system, requiring itself through its runtime dependency
    \code{gulp-cli} \\

    \code{sucrase} &
    $1\,793$ ($528$ dependees) &
    an alternative to Babel, used as a proof of concept for bootstrapping a GNU
    Guix package
  \end{tabular}
\end{table}

In GNU Guix, the preferred approach to packaging a self-depending tool is making
it bootstrappable~\cite{courtès2022buildingsecuresoftwaresupply}.  This can
happen by packaging a chain of historical versions of the tool, where each can
be built with the nearest older packaged one, down to an early version that did
not have a self-dependency.  Sometimes it is possible to eliminate or shorten
such ``bootstrap chain'', for example by replacing a complex build tool with
scripts or by using a bootstrappable drop-in replacement to some tool.  The
latter was an approach used to package the official, self-hosting implementation
of the Rust programming language for GNU Guix in 2018.  There, an unofficial
Rust compiler, written in C++, was used to compile an official Rust release from
July 2017~\cite{bootstraping-rust}.

Bootstrapping helps prevent the ``Trusting Trust'' attack demonstrated by Ken
Thompson in 1984, but as of today there is little evidence of such attack type
ever being used by threat actors.  In some cases software distributions under
consideration make exceptions and allow a non-bootstrappable program prebuilt by
another party to be made into a distribution package.  For example, the set of
OCaml and Haskell compilers in GNU Guix depends on such third party binaries
that cannot be rebuilt from any package recipe in the distribution.

In 2022 a proof of concept GNU Guix bootstrap of \code{sucrase}, a
self-depending build tool from the npm ecosystem, was
done~\cite{re-bringing-npm-to-guix}.

\subsection{Recursive dependency closure}

By package's recursive development dependency closure we mean a set containing
all its declared runtime dependencies and development dependencies, their
runtime dependencies and development dependencies, etc.  In other words, the
closure is the minimal set that contains the dependencies of its every member
and also of the package for which the closure is being computed.  The size of
package's recursive dependency closure can illustrate the bootstrapping
challenge complexity.  An attempt to compute such closure was made for npm
package \code{typescript} as part of this work.  npm Registry metadata limited
to package releases from before \tresholdDate{} was used.  For simplicity,
version constraints were disregarded and packages' all historical dependencies
were considered.  The result was a $60\,843$-elements big set of package names,
with additional $2\,433$ referenced names that do not exists in the Registry.
These were largely the results of mistakes and possibly private/unpublished
packages.  Of course, non-crucial tools like linters tend to be declared
developments dependencies and the closure of truly necessary dependencies would
be much smaller, as also reported in~\ref{sec:typical-dep-tree-sizes}.
Nonetheless, this example shows how difficult it is to reason about what is
needed for bootstrapping tasks.

\section{Inconvenience of system software distributions}
\label{sec:inconvenience-of-distros}

Despite looser security practices and more frequent reports of malicious
packages in repositories like the npm Registry
\cite{malicious-npm-techtarget-nichols,malicious-npm-infosec-muncaster,malicious-npm-cybernews-naprys,malicious-npm-bleep-toulas,malicious-npm-hacker-news-ravie},
many developers still prefer to work with them rather than with system software
distributions.  Packages in the latter are adjusted to work with and inside
their distributions and are typically not compatible with the usual workflow of
developers of, e.g., npm projects.  For example, it could be unstraightforward
to use npm libraries from Debian for producing distribution files of a mobile
application.  Another deterrent is the delay with which newer releases of
packages reach system software distributions.

This limited interest in availability of software from certain ecosystems in
distributions like Debian and GNU Guix also leads to decreased incentive for
authors of these distributions to work on it.

\chapter{Overview of the npm ecosystem}
\label{chp:npm-ecosystem-overview}

Software from the npm ecosystem was found to be more challenging to be made into
system software distribution packages.  To provide deeper insight into this
problem, this chapter provides more information about this ecosystem, with focus
on dependency relations between npm packages.

npm Registry -- the software repository around which the npm ecosystem is
centered -- was created as a distribution channel for JavaScript libraries,
frameworks, and applications using Node.js runtime.  It was targeted towards
server-side developers.  The platform allows the general public to register
accounts and publish software packages.  Throughout the years, the npm Registry
also became home to client-side JavaScript, i.e., software to be executed in web
browsers.  The repository is nowadays also used by related programming
languages, notably TypeScript.  As of \tresholdDate{}, the repository was
serving over $3.5$ million published packages, many of which come in multiple
versions.

\section{Recognized dependency types}

Projects using npm can use a structured format to list their dependencies, i.e.,
the npm packages they use.  Four types of dependencies can be specified in
package's metadata kept in a file named \code{package.json} in project's source
directory.  These types are described in Table~\ref{tab:npm-dep-types}.
Throughout the rest of this work, the opposite of a dependency shall be called a
\textbf{dependee}.

\begin{table}[htpb]
  \caption{Dependency types recognized by npm.}
  \label{tab:npm-dep-types}
  \centering
  \footnotesize
  \columnsCount{2}
  \begin{tabular}{
      >{\raggedright\arraybackslash}p{.3\netTableWidth}
      >{\raggedright\arraybackslash}p{.7\netTableWidth}
    }
    \rowcolor{gray!40}

    \textbf{Metadata key} &
    \textbf{Meaning} \\

    \code{dependencies} &
    Packages needed at runtime.  \\

    \code{devDependencies} &
    Packages needed or useful for development, often minifiers/bundlers, test
    frameworks, linters, and version control integration tools.  \\

    \code{optionalDependencies} &
    Similar to \code{dependencies} but only needed for some additional
    functionality.  npm supports installing a package without its optional
    dependencies, but by default it does install them.  \\

    \code{peerDependencies} &
    Used to specify compatible versions of tools for which the dependee is a
    plugin.
\end{tabular}
\end{table}

\section{Statistical analysis of the npm ecosystem}

To determine which projects using npm Registry are the most popular among
developers, the dependency relations between packages were counted and analyzed.
First, the metadata of all published packages in JSON format was downloaded.
Download took place on the days following \tresholdDate{}.  The metadata was
processed to only include information about releases made after \tresholdDate{},
yielding $3\,519\,767$ package entries.  Curl program was used to make requests
to Registry's CouchDB view at \url{https://replicate.npmjs.com/_all_docs}.  It
is worth noting that this API endpoint's functionality has since changed and
other means would be necessary to download the entire Registry metadata again in
the future.

For the purpose of rankings discussed next, the dependees being multiple
versions of a single package were counted as one.  Similiarly, version
constraints in dependency specifications were ignored.

\subsection{The most popular dependencies -- changes over five years}
\label{sub:npm-changes-5-years}

One of several metrics of package's popularity is its number of public
dependees.  Up to 2019 such a ranking of $1\,000$ packages most often specified
as others' dependencies used to be published by Andrei
Kashcha~\cite{anvaka-rank-gist}.  A similar list computed from newer data for
the purpose of this work was used to check how much the set of the most popular
packages changed between August 2019 and April 2025.  The goal was to find out
how many of the previously popular projects keep to be chosen by developers and
how many stopped being actively used, perhaps becoming legacy software.  The
overlap between the rankings is visualised in
Figure~\ref{fig:common-2019-2025-percent}.  For each natural $n$ in the range
$[1, 1000]$, $n$ most popular dependencies from both rankings were selected.
The overlap of selected packages from first and second ranking was computed and
plotted with the values of $n$ on the X axis of the figure.

\begin{figure}[htpb]
\centering
\includesvg[width=\linewidth,inkscapelatex=false]{common_2019_2025_percent.svg}
\caption{Overlap of the most popular npm dependencies from 2019 and 2025.}
\label{fig:common-2019-2025-percent}
\end{figure}

The ``winners'' in 2019 and 2025 were \code{lodash} and \code{react} with
$69\,147$ and $263\,038$ dependees, repsectively.  It can be seen that about
$150$ most depended packages form a relatively stable forefront, with further
part of the ranking having changed more over five years.  Nevertheless, it is
worth noting that certain packages with no new releases for several years still
rank relatively high in 2025.  Examples are \code{lodash}, \code{request}, and
\code{q} ranking third, $17$th, and $157$th, respectively.

As a conclusion, if certain software is intended to be used for more than a few
years, dependencies for it must be considered more carefully when they are not
from among the \givemeatilde150 most popular ones.  Otherwise, the risk of
project's direct dependency becoming legacy software grows.  However, often
other characteristics of a package will determine whether it should be
considered reliable.  Ultimately, the matter of who maintains a package and how
it could help the project are more relevant than a ranking position.

\subsection{The most popular dependencies -- popularity tresholds}
\label{sub:runtime-deps-popularity-tresholds}

The numbers of dependees corresponding to ranking positions can be used to infer
some qualities of the ecosystem.  This correspondence is presented in
Figure~\ref{fig:dependee-counts}.

\begin{figure}[htpb]
\centering
\includesvg[width=\linewidth,inkscapelatex=false]{dependee_counts.svg}
\caption{Number of packages using the most popular dependencies.}
\label{fig:dependee-counts}
\end{figure}

In 2019, the $1000$th most popular dependency package in the npm Registry had
$346$ dependees.  By April 2025, the $1000$th package in the ranking had already
$4\,771$ dependees.  This reflects the growth of the entire ecosystem, whose
package repository had about one million packages in July 2019 and about $3.5$
million packages in April 2025.  However, this would by itself only explain a
rise in dependee counts by about a ratio of $3.5$.  The aforementioned increase
from $346$ to $4\,771$ dependees is over four times greater.  This needs to be
attributed to growing projects' complexity, as there is a tendency to use more
dependencies.  A plausible additional explanation is higher overlap of
functionalities between packages, i.e., situations occur where multiple popular
libraries exist for a single task.

\section{The most popular development dependencies}
\label{chp:most-popular-dev-deps}

In the context of supply chain security, the development dependencies are as
important to research as the runtime dependencies.  A popularity ranking similar
to the previous one was compiled for packages occuring the most in the
\code{devDependencies} collections of others.  An analogous correspondence of
ranking position to development dependee count is presented in
Figure~\ref{fig:dev-dependee-counts}.  No development dependencies ranking from
2019 was found that could be used for comparison.  Instead, the runtime
dependencies plot from Figure \ref{fig:dependee-counts} was re-included for
easier reference.

\begin{figure}[htpb]
\centering
\includesvg[width=\linewidth,inkscapelatex=false]{dev_dependee_counts.svg}
\caption{Number of packages using the most popular development dependencies.}
\label{fig:dev-dependee-counts}
\end{figure}

The first position belongs to \code{typescript} with $840\,161$ development
dependees.  A treshold of $2\,185$ development dependees needed to be reached by
a package to be included in the ranking.  The curve for development dependencies
is steeper, meaning there is a clearer forefront.  This in turn indicates that
the functionality overlap mentioned in
\ref{sub:runtime-deps-popularity-tresholds} is possibly a smaller problem in
this case.

\section{Overlap of the most popular runtime and development dependencies}
\label{sec:overlap-of-runtime-and-dev-deps}

It is possible for a popular development dependency to also be specified as a
runtime dependency by some packages.  Realizing how often this happens can help
judge whether certain kinds of issues are likely to occur in the ecosystem.  The
overlap of runtime and development dependencies is visualized in
Figure~\ref{fig:common-nondev-dev-percent}, using the same approach as for the
overlap in Figure \ref{fig:common-2019-2025-percent} discussed in
\ref{sub:npm-changes-5-years}.

\begin{figure}[htpb]
\centering
\includesvg[width=\linewidth,inkscapelatex=false]{common_nondev_dev_percent.svg}
\caption{Overlap of the most popular npm runtime and development dependencies in
  2025.}
\label{fig:common-nondev-dev-percent}
\end{figure}

Since packages listed as \code{dependencies} are often libraries or
frameworks and those listed as \code{devDependensies} are commonly
applications, one could expect a smaller overlap than that of about 15-30\%
which was found.  A possible explanation is that unlisting a package from
\code{devDependencies} add instead including it among
\code{dependencies} creates no major change for project developers.  A
command like \code{npm install} shall still resolve that dependency and
include it in the environment it creates.  It is therefore possible that a
non-negligible number of dependencies is incorrectly categorized by the
dependees.

It used to be a known fact that among packages listed as \code{devDependencies}
there are many which are not needed to merely rebuild a project.  These could be
automatic code formatters, tools responsible for integration with version
control, etc.\ and they could be eliminated from automated builds to make them
lighter on resources and to decrease the attack surface.  Based on these results
it is reasonable to expect that the similar holds for runtime dependencies.
This provides a justification for experiments aimed at eliminating the
extraneous dependencies without breaking the functionality of packages.

\chapter{Possible paradigms for hermeticity and reproducibility}

Certain popular software technologies -- with npm being one of them -- prove
difficult to combine with existing reproducibility-focused workflows.  Possible
causes of this were discussed in~\ref{chp:applicability-of-workflows}.  The
package creation workflows of -- largely reproducible -- system software
distributions Debian and GNU Guix were explained in
\ref{sub:how-debian-package-is-made} and~\ref{sub:how-gnu-guix-package-is-made},
respectively.  An explanation of the npm ecosystem followed in
\ref{chp:npm-ecosystem-overview}.

With all the above being considered, the ability to handle software with
numerous dependencies -- which can have complex relatonships -- appears relevant
to the goal of rebuilding parts of the npm ecosystem hermetically and
reproducibly.  Based on the knowledge gathered, possible approaches to
hermeticity and reproducibility in the context of dependency resolution shall be
critically analyzed.  They shall be classified as distinct paradigms.  The
introduced paradigms shall be discussed in the context of security and their
applicability to the build process of npm pacakges.

Paradigms 0 through 2 represent existing approaches.  Paradigm 3 is an
intermediate one that leads to 4, which is a generalization of the former.
Paradigms 3 and 4 are an innovation originating from this work.  They are meant
to optimize the way build inputs are determined and also ensure that no
unnecessary single points of failure are created which would not be secured
through verified reproducibility.  The new paradigms are suggested as bases for
hypothetical new software packaging workflows that would make hermeticity and
reproducibility easier to achieve with software from, among others, the npm
ecosystem.

\section{Paradigm 0 -- lack of actual reproducibility}
\label{sec:paradigm-0}

If one is to build an npm package in the most basic way, with use of commands
like \code{npm install} and without a pre-computed dependency tree, then network
connectivity is necessary.  Without it, the npm tool cannot download packages
metadata from its repository, the npm Registry.  But if network access is
allowed, then the build -- and therefore its result -- might depend on
downloaded code and data other than dependencies' metadata.  Some commonly used
npm packages require additional connections to function.  For example, the
\code{playwright-webkit} library, upon installation by npm, downloads
executables from a third party server.  That library is an intermediate test
dependency of a popular web library, JQuery, used by about 74\% of the most
popular websites~\cite{w3techs-javascript-library}.

Author of an npm package can assign so-called \textbf{distribution tags} to its
specific versions.  The tag can be though of as a string label that points to a
package version.  Tags can be used by dependees as an alternative way of
specifying the depended version of the package.  Some of the commonly used tag
names are \code{next}, \code{beta}, and \code{latest}.  When the developer
publishes a new package version, the npm tool by default automatically assigns
the \code{latest} tag to that version.

The build process of an npm project relies on downloaded metadata of packages.
As a result, if a dependency author publishes its new version or alters the
distribution tags, it might cause later rebuilds of the package to use a
different set of inputs.  The final result can be different, so it is not
reproducible.  Additionally, the repository constitutes a single point of
failure because compromising the repository allows altering the served metadata
of package versions.  The compromised repository could, for example, spoof
package's distribution tags or hide the existence of certain versions of a
package, thus allowing only vulnerable versions to be used.

With files coming from a third party server, we have even less guarantee that
they were not maliciously tampered with.  A library that downloads such files
during package build could verify them.  For example, its authors could make the
library contain cryptographic hashes of the required external files.  The
library could then check that every downloaded file has a matching hash.
Unfortunately, we have no mechanisms to mandate that this kind of verification
takes place in cases like that of \code{playwright-webkit}.  This means ad-hoc
file downloads are a bad security practice.  They would need to be eliminated or
restricted for reproducibility to be leveraged.

Despite the above, reproducibility tests of npm packages are actually attempted,
with one example being Pronnoy Goswami's research
\cite{goswami-reproducibility}.  It has to be noted that the results of such
tests are likely to be unstable, yielding different results if repeated at a
later date.

\section{Paradigm 1 -- inputs determined by human-maintained references}

One of the possible methods of determining the set of build inputs is used by
GNU Guix.  Its package recipes contain references to dependency packages that
have their versions predetermined.  As a result, questions like ``Should
\code{foo} use its dependency \code{bar} in version 2.4.5 or 3.0.1?''  are
already answered.  An update to a package definition results in the updated
package being used everywhere that particular definition was referenced.  The
update takes form of a commit or commit series in the Git VCS and is subject to
review by co-authors of the distribution.

If we fix a GNU Guix revision to be used, then in a package build -- which is
hermetic by design -- all inputs are unambigiously determined.  No repository of
metadata can open the user to the risk of using incorrect dependency versions.
In other words: the threat on the part of improperly defined dependency versions
is of the same nature as that on the part of improperly written code.  And -- as
users of any kind of software -- we are deemed to accept threats of this nature.

Maintenance of this kind of system is, of course, more labor-intensive.  Every
alteration of a package recipe -- also including software's version updates --
is an update to GNU Guix' Git repository.  Such update involves labor of
distribution maintainers, similarly to Debian's case.  A sample list of $26$
consecutive commits to GNU Guix' repository -- with $15$ package updates among
them -- is presented in Listing~\ref{lst:guix-package-update-commits}.  The
changes in the list were made by multiple contributors during an hour between
12:00 and 13:00 on April 11, 2025.  The changes are listed oldest to newets.
Details of the newest one are additionally shown.  A non-negligible amount of
work is clearly needed to handle many changes manually.  The great number of
small changes might therefore lead to a yet unverified assumption that too big
effort is required of distribution maintainers.  If true, this could hamper the
growth of the software collection available through GNU Guix.

\lstinputlisting[
  float=htpb,
  language=guix-commit,
  caption={List of consecutive changes committed to GNU Guix, with contents of
    the bottommost one included for reference.},
  label=lst:guix-package-update-commits,
  numbers=none
]{guix-package-update-commits.txt}

In addition, many package managers following other paradigms can make use of
permitted dependency version ranges declared by packages.  This way npm, APT,
and others can automatically avoid using incompatible dependency versions.
However, Paradigm 1 does not allow such optimization to be employed.

It is worth highlighting that in GNU Guix the URLs and hashes that comprise
identification data of program sources are maintained together with package
recipes, as can be seen in Listing~\ref{lst:guix-package-update-commits}.  As
explained, this approach might have additional consequences in the amount of
distribution maintainers' labor.  Nonetheless, it can also positively or
negatively affect the chances of malicious sources being referenced in a recipe.
This is an important supply chain issue to recognize, but it is independent from
the concept of paradigms introduced in this chapter.

\section{Paradigm 2 -- reproducibility not applied to dependency resolution}

The problem of the dependency resolution process being unreproducible was
explained in~\ref{sec:paradigm-0}.  In this context, the actual package build
can be partitioned into several distinct steps, for example

\begin{enumerate}
\item dependency resolution,
\item dependency installation,
\item code transformation/generation,
\item automated tests, and
\item installation/packing.
\end{enumerate}

Steps 1 and 2 are sometimes performed together, for example as part of a single
command invocation.  However, in case of some package managers -- including npm
-- the set of resolved dependencies with their versions can also be recorded for
later reuse.  It is done with so-called \textbf{lockfile} -- a file that project
developers can add to a VCS and which allows dependency installation to be
repeated without re-downloading metadata nor re-running the resolution
algorithm.  In npm projects this file is saved as \code{package-lock.json} or
\code{npm-shrinkwrap.json}.

With a precomputed \code{package-lock.json} we can therefore download the
dependencies and use them as inputs of the hermetized build, narrowed to steps
2-5.  Upstream software's original build procedures sporadically expect network
access during these steps.  The build process of the aforementioned JQuery is
one of those few cases where this occurs.  Such problems would need to be
corrected manually, in package recipes of a hypothetical distribution applying
Paradigm 2 to a broader population of npm packages.  A typical solution in,
e.g., Debian is a patch that eliminates such access attempt or replaces it with
a reference to a local, formally-approved input.

If npm project authors fail to provide an appropriate lockfile -- which can
happen -- it could be generated by one of the parties that rebuild the software.
Step 1 would then need to be performed unhermetically, with network access.  The
obtained \code{package-lock.json} would then be treated as additional build
metadata, distributed to the other parties.  When a build were to be repeated to
verify the reproducibiliy of the result or for other purposes, presence of this
metadata would be required.

The benefit of Paradigm 2 is that one can proceed in achieving reproducibility
of most of the build process and further leverage it.  In fact, comments in the
source code of JQuery indicate that its developers -- to some extent and with
disregard for possible changes in files being downloaded during the build
process -- did actually work on making JQuery's build process deterministic when
the \code{package-lock.json} is used.

The main disadvantage of Paradigm 2 is that dependency resolution is still not
secured by hermeticity nor reproducibility.  Even when changes to project's
\code{package-lock.json} take the form of version control system commits, these
are unlikely to be reviewed as carefully as ordinary software code changes.
Dependency trees can be complex. \code{package-lock.json} files counting over
$1000$ entries are common.  As a result, the shape of a particular resolved
dependency tree is difficult to explain without additional tools.

The described approach requires generalization to building a project that uses
multiple repositories, e.g., npm Registry + Python Package Index + Rust Package
Registry.  That is because multiple dependency trees from multiple software
ecosystems are involved.  Theoretically, even in terms of a single ecosystem and
a single repository, we might need to resolve multiple sets of dependencies in
step 1.  In effect, an actual collection of lockfiles would need to be treated
as the aforementioned additional build metadata.

\subsection{Debian implementation}

Interestingly, a variant of Paradigm 2 can be found in Debian, which is
considered one of the most reproducible software distributions.  That is because
the package recipe shared as \code{debian.tar.xz} file contains the names of
direct build dependencies but not necessarily their precise versions nor the
indirect dependency names.  It is actually the \code{.buildinfo} files where the
published packages' build environments metadata can be found.  Much of this
metadata is determined by the dependency resolution process, as performed by
Debian's APT tool during the initial Debian package build.

Although this does formally fall into the scope of Paradigm 2, Debian packagers'
perspective is still similar to that of Paradigm 1 users.  That is because -- as
explained in~\ref{sec:conflicting-deps} -- a single Debian release typically
only advertises a single version of a given package at any point in time.
Unless multiple Debian releases are mixed together, this makes the input
metadata of APT's dependency resolution process flat.  This, in turn, makes
packagers ultimately responsible for ensuring version compatibility between
packages in this flat space.

\section{Paradigm 3 -- deterministic dependency resolution inputs ensured}
\label{sec:paradigm-3}

For our dependency trees from Paradigm 2's step 1 to be secured through
reproducibility, we need to be able to repeat the dependency resolution step
using the same data about candidate dependency packages.  Neither
\code{.buildinfo} nor \code{package-lock.json} files preserve all metadata
actually consulted by the resolution algorithm.  They lack information about
packages that were considered but rejected as final dependency tree members.  As
such, full dependency resolution cannot be performed based on just these files'
contents.  It can be argued that the risks this causes for Debian are small
because the general public cannot create new packages that could then be
immediately used as dependencies.  Here, one of the most likely dependency
resolution attack scenarios involves supplying the build with an outdated,
faulty compiler package already present in the distribution.  One theoretical
attack utilizing a compiler bug was described
in~\cite{deniable-backdoors-compiler-bugs}.  In contrast, manipulation of
\code{package-lock.json} in an npm package build can more easily lead to an
attacker-published package being installed in the build environment.

In case of npm projects, one of the simplest solutions would be pointing the npm
tool to a local mock of a repository server, speaking the HTTP protocol.  The
mock would function as a proxy that downloads required packages' metadata from
the original npm Registry server, alters it and returns it as responses to the
npm tool's requests.  Each response -- containing the metadata of all versions
of a single npm package -- would be filtered not to include the versions of
packages that were published after a chosen time treshold.  The treshold could
be, e.g., the release date of the project version being built.  In repeated
build attempts, the relevant metadata served by mocked registry ought not to
change.  Corner cases shall occur, in form of dependencies being removed from
the official registry due to copyright claims or in form of projects' dependence
on particular developer-alterable distribution tags of npm packages.  These
problems should be rare enough to be fixable manually or with reasonable
defaults.  For example, a mock \code{latest} tag could be attached to the newest
version of each npm package whose metadata is served.

This approach does not completely eliminate the threat of the dependency
resolution process being maliciously influenced.  In particular, the packages'
metadata could be maliciously modified even earlier, for example as a result of
the official registry's infrastructure being compromised.  However, compared to
Paradigm 2, the number of moments when malicious modifications could occur is
decreased.  Similarly, the scope of what could be modified is more limited.  To
decrease the changes of the hypothetized attack on the registry being
successful, additional means of detection and mitigation could be employed.  For
example, trusted third parties can serve as ``canaries'', publishing
cryptographically signed information about what package metadata was being
served by the repository as of given date.  The initial builder can also record
the resolution metadata and make it available to rebuilders, effectively acting
as one of the suggested canaries.  The holy grail of avoiding a single point of
failure -- one in the form of a centralized registry -- would be deriving the
resolution metadata of packages from those packages themselves once they are
also rebuilt locally.  This would present a bootstrapping challenge that -- when
solved -- would open the way to dependency resolution without major reliance on
any centralized service.

Regardless of the employed approach to securing the dependency resolution
inputs, the actual concept of Paradigm 3 is to make the inputs plausibly
deterministic and then repeat the dependency resolution process upon every
repetition of a given package build.  The remaining steps of the build process
are performed analogously to those in Paradigm~2.  The issue of generalization
to projects utilizing multiple repositories is also analogous to that in
Paradigm~2.

\section{Paradigm 4 -- hermeticity relaxed and deterministic dynamic inputs allowed}

One can notice that in paradigms 2 and 3 the first step, dependency resolution,
is treated different from the subsequent ones.  The result of step 1 is a
collection of one or more lockfiles that identify dependencies' files, e.g.,
through names and versions or URLs and hashes.  A tool that implements a given
paradigm would need to -- between steps 1 and 2 -- prepare an appropriate
isolated environment for package build, for example a Linux container.
Lockfile-identified dependencies would need to be exposed inside.

In Paradigm 3, the initial download of packages metadata can happen through a
locally-run mock of a repository server.  I.e., the isolated dependency
resolution process has a service perform possibly hermeticity-violating actions
on its behalf.  Yet, care is taken to make the results of those actions
deterministic.  Paradigm 4 extends this approach to all steps of the build.  The
installation step, knowing project's recursive dependencies identified by the
lockfiles from step 1, could have the service supply the dependencies' files
into the otherwise isolated build environment.  There is no more need to provide
separate isolated environments to two different parts of the build process --
step 1 and the chain of remaining steps.  As long as the hermeticity-violating
actions performed by the service on build's behalf are deterministic, this
should not make build results less reproducible.  The process can be thought of
as \textbf{eventually-hermetic}, bacause repeated builds are likely to request
the exact same actions, requiring the same external data, which could be cached
and reused, making subsequent runs network-independent.  At the same time, this
approach \textbf{removes the need of having all build inputs identified in
  advance}, simplifying the entire build process.

Let us provide another example of an action that could be deterministically
carried out on behalf of the build -- checking out of a Git repository revision.
Under Paradigm 4 this could happen through the hermeticity-violating service,
making the repository a \textbf{build input determined dynamically}.  If the
checkout operation uses a repository URL and, e.g., a Git tag\footnote{unrelated
to npm package's distribution tag and not to be confused with it}, it is by
itself not deterministic -- result can vary in time, for example due to tags
being changed in the upstream repository.  In this case, additional means --
like those already mentioned -- would be needed to ensure the determinism of the
checkout action.  However, no such extra measures are necessary if the checkout
operation uses a commit hash made with an algorithm deemed cryptographically
secure.  Preimage attack resistance is of practical relevance, making even SHA-1
applicable as of 2025.

This approach makes the logic of an eventually-hermetic package build more
straighforward.  If, for example, step 3 required an extra resource or tool, in
paradigms 1-3 that requisite would need to be identified beforehand.  Under
Paradigm 4 this is not necessary.

\subsection{Security-oriented justification}

How secure would the Paradigm 4 be?  Its security relies on the viability of
employed means of ensuring the determinism of dynamic inputs.  A GNU Guix-like
approach of maintaining the cryptographic hashes of all downloadable resources
in a VCS is possible.  While the collection of resources still needs to be
identified in advance of any build, there is no more need to record exactly
which ones are needed for which particular package build -- by itself a huge
simplification.  This makes Paradigm 4 no worse than -- seemingly the most
secure -- Paradigm 1, as implemented in GNU Guix.

However, the concept of paradigms -- as introduced by this work -- is not
strictly dependent on the way of ensuring the integrity and determinism of
software sources and of other build inputs.  The approach of keeping hashes of
packages' sources embedded in code kept in a VCS can be criticized.  In theory,
changes to the version-controlled recipes code -- with input resources' hashes
-- are subject to review.  However, despite the positive security aspects of
human-conducted code reviews, such system makes it easy for reviewers to lose
vigilance -- especially when facing a ``flood'' of package recipe updates, as
shown in Listing \ref{lst:guix-package-update-commits}.  Some could argue that
it would be beneficial to completely replace the version-controlled hashes of
sources with a network of canaries that record the tagged revisions found in VCS
repositories and the contents of software's published release files.  This
aproach is applicable to Paradigms 4, 3, and 1 alike.  It simply happens not to
be employed by GNU Guix as of 2025.

\chapter{Automated package builds experiment}
\label{chp:experiment}

The previous chapters of this work has lead to the following hypotheses and
questions.

%% \let\labelenumiOld\labelenumi
%% \newdimen\labelwidthOriginal
%% \labelwidthOriginal=\labelwidth

\newcounter{hyQuCounter}

\newcommand{\hyQuItem}[1]{%
  \stepcounter{hyQuCounter}%
  \item[#1 \thehyQuCounter{}.]%
}

\newcommand{\hypothesis}{\hyQuItem{HYPOTHESIS}}
\newcommand{\question}{\hyQuItem{QUESTION}}

\begin{description}
  \hypothesis{} The dependency tree sizes of npm packages and acceptance of
  conflicting dependencies by the platform appear to be the major sources of
  difficulty in packaging the npm ecosystem in reproducibility-focused
  distributions.  Is this truly the main factor?

  \hypothesis{} Re-generation of npm lockfiles -- an operation necessary for the
  security improvement offered by proposed paradigms 3 and 4 over Paradigm 2 --
  is expected to rarely cause npm package build failures which would not occur
  with developer-supplied lockfiles.  Are such failures indeed uncommon?

  \hypothesis{} As speculated in \ref{sec:overlap-of-runtime-and-dev-deps}, both
  direct and indirect dependencies of npm projects are often unnecessary.  Is
  this indeed the case?

  \question{} What are the typical sizes of dependency trees needed to build npm
  projects and how much can these trees be typically shrinked?

  \question{} How often do dependency conflicts actually occur in npm dependency
  trees and how often -- or to what extent -- can they usually be forcibly
  eliminated without causing breakage?

  \question{} Are forced removals of npm project's dependencies and forced
  elimination of dependency conflicts likely to cause non-obvious breakages that
  only become apparent when seemingly successfully-built package turns out to be
  disfunctional or nonfunctional?  If so, how best to avoid them?

  \hypothesis{} npm projects' dependencies are seldom specified by distribution
  tags and removal of distribution tags from npm dependency resolution metadata,
  with automatic addition of a mock \code{latest} tag as mentioned
  in~\ref{sec:paradigm-3}, is expected to cause few dependency resolution
  failures.  Is this a valid assumption?

  \question{} Can we deliver a prototype that performs npm project's dependency
  resolution as proposed in paradigms 3 and 4?
\end{description}

To verify and answer these, an experiment was conducted which involved automated
build attempts of top npm projects, selected by their position in the npm
Registry package rankings.  The selected set consisted of projects belonging to
the first $200$ of either the most popular \code{dependencies} or
\code{devDependencies} as of \tresholdDate{}.  Due to some overlap between the
rankings, the actual size of the set was \allSelectedCount{}.  The build
procedure, described in the following subsection, was designed with the help of
a trial-and-error approach.

\section{Method and environment}

The experiment was conducted on an x86\_64 machine.  For details, see
Listing~\ref{lst:cpu-info}.  All operations were performed under version
5.15.0 of the Linux kernel.  All filesystem operations were backed by an
ext4 filesystem.  No effort was made to employ means like disorderfs, described
in~\cite{DBLP:journals/corr/abs-2104-06020}, because this work is not concerned
with eliminating the traditional sources of nondeterminism.

\lstinputlisting[
  float=htpb,
  caption={Details of the processor used during the experiment, as reported by
    cpuinfo utility.},
  label=lst:cpu-info
]{cpu-info.txt}

The diagram in Figure \ref{fig:experiment-activity} describes the flow of
activities during testing of a single npm project.  In the diagram, start and
end are denoted by a filled circle and a circle with a white ring inside,
respectively.  Flow branches are represented by hexagons and merges -- by
rhombuses.  The particular operations present in the diagram are described
further below.

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]{experiment-activity.svg}
  \caption{Activity diagram describing the experiment as performed on each
    tested npm project.}
  \label{fig:experiment-activity}
\end{figure}

\subsection{Containerized environment creation}

For each tested project, the version to build was first selected as its highest
non-pre-release package version published before \tresholdDate{}.  Versions'
publishing dates that were consulted were part of packages' metadata downloaded
from the npm Registry.  For the selected version, the relevant Git repository
URL and -- where possible -- release's Git commit hash were extracted from the
available metadata.  The URL was sometimes present with a \code{git+} or
\code{git:} prefix, which had to be dropped.  The repository at learned URL
was then cloned to a local directory, with submodules included through the use
of Git's \code{--recurse-submodules} option.

Upon successful retrieval of source repository contents, a semi-isolated
environment, based on Linux containers, was created.  Upon creation, the
environment only had access to a minimal collection of software.  It comprised

\begin{itemize}
\item
  Node.js runtime including bundled npm application,
\item
  Git version control tool,
\item
  GNU Bash shell, which served as the POSIX-compliant shell used by \code{exec}
  function of Node.js,
\item
  GNU Coreutils,
\item
  GNU which, invoked by experiment's code,
\item
  GNU Guile and several Guile libraries, being the driver for the experiment's
  code, and
\item
  dependencies of the above, e.g., a C library.
\end{itemize}

The environment was created as a container shell managed by GNU Guix.  The
version of GNU Guix used was built from Git revision \tresholdGuixCommit{}, the
last one before \tresholdDate{}.  It featured Node.js runtime in version
22.14.0, npm in version 10.9.2, and Git in version 2.49.0.
Inside the environment, the applications listed above were available through the
\code{PATH} variable and also symlinked under \code{/bin} directory through the
use of \code{--emulate-fhs} option of \code{guix shell}.  The environment had no
direct access to outside network, allowing us to state that experiment's results
reflect the behavior of a hermetic build process.  Network isolation also helped
make sure that no dependency was installed ``on the side'', without being
recorded in project's lockfile.  The environment was also isolated
filsystem-wise, with specially prepared directories shared between the
environment and the host.  They were shared read-write or read-only, according
to needs.  Shared directories allowed container's guest to

\begin{enumerate}
\item
  access the code used to drive the experiment,
\item
  access npm project's cloned repository,
\item 
  request npm packages' metadata and files from the host through named fifos,
\item
  receive them as files, and
\item
  persist lockfiles and final package files generated during build, for later
  inspection.
\end{enumerate}

\subsection{Source code checkout preparation}

Inside the just-created environment, a checkout to npm project's appropriate
revision was attempted in the following ways.

\begin{enumerate}
\item
  By switching to the commit hash previously extracted from the package
  metadata.
\item
  By switching to a Git tag identical -- as a string -- to the version being
  built.
\item
  By switching to a Git tag identical to the version being built, prefixed with
  letter ``v''.  E.g., for a project version 3.2.2 a switch to Git tag
  \code{v3.2.2} would be attempted.
\end{enumerate}

This sequence of tries was chosen based on findings from initial manual
experiments and also on prior knowledge of common developer practices.  In
particular, it was found that a Git commit hash is not always advertised for
a given npm package version.  When it is, it sometimes corresponds to a revision
that was never pushed to project's public repository.  This appears to be most
often caused by an automated release publishing software that makes a local
commit as part of its operation.  It was decided that in both cases -- of the
unknown and nonexistent commit hash -- it is best to fall back to probable Git
tags.

If directories named \code{node\_modules} or \code{dist} existed in a
successfully checked-out source repository, they were deleted before the actual
build attempt.  These directories are used to store npm project's installed
dependencies and generated files, respectively.  Although they are sometimes
checked into version control, they are not sources per se and a hygienic npm
project build should be performed without them.

It is worth noting that every build was conducted inside a full git repository
checkout, with access to the \code{.git} directory containing project's history.
This is unlike the practice of GNU Guix, Debian, and many other distributions
where build inputs typically do not include any version control metadata.  The
decision was made based on the following considerations.

\begin{enumerate}
\item
  Build procedures most often rely on version control metadata for side tasks
  like generation of software authors list.  These tasks are not highly relevant
  to our stated questions, but their failures could decrease the number of
  successful package builds that we seek to further analyze.
\item
  In actual distribution software packaging, the build process' reliance on
  version control metadata is considered easy to solve compared to the issues
  of dependencies.
\item
  While availability of version control metadata could theoretically ease
  smuggling of backdoor code in XZ-style attacks, it would be hardly practical
  -- the backdoor would need to be somehow retrieved from version control
  history and invoked, in a hard-to-notice way.  Building with full version
  control metadata is therefore a secure enough approach to be suggested for
  adoption by distributions.
\end{enumerate}

\subsection{Dependency resolution in a network-isolated environment}

Dependency resolution was performed with the help of a dummy
\code{npm uninstall} command, as shown in Listing~\ref{lst:npm-uninstall}.
The options used made npm

\begin{itemize}
\item 
  refrain from attempting network requests unrelated to the actual dependency
  resolution,
\item
  refrain from actually installing the resolved dependencies or running their
  hooks,
\item
  update project's lockfile to the current format if an older one was
  encountered, and
\item
  make requests to a local mock of npm's repository.
\end{itemize}

The command either created the \code{package-lock.json} file from scratch, wrote
a new version of it based on an existing lockfile found or left it unchanged.
The latter happened whenever the existing \code{package-lock.json} was already
in sync with dependency constraints specified in project's \code{package.json}.
It can be noted that npm would also automatically use an
\code{npm-shrinkwrap.json} file over \code{package-lock.json} if the former were
present.  However, this was not the case for any of the npm projects tested.

\lstinputlisting[
  float=htpb,
  language=shell-command,
  caption=The npm command used to produce an up-to-date lockfile.,
  label=lst:npm-uninstall
]{npm-uninstall.txt}

For projects that utilize workspaces, attempt was made to also add
workspace-related options \code{-}\code{-workspaces},
\code{-}\code{-include-workspace-root}, and
\code{-}\code{-workspace} as appropriate to \code{npm uninstall}
and subsequent npm invocations.  Workspaces are a feature that allows multiple
subprojects to be developed in subdirectories of a single npm parent project,
with the parent and each subproject having its own \code{package.json}
file.  Despite the effort, all workspaced projects that were tested failed to
build for other reasons.  Several tested projects were found to use workspaces
for smaller satellite utilities while having the project's package described by
the \code{package.json} file in the root directory of the repository.
Those were built without any workspace-specific npm options.

A minimal server, written for this experiment, listened for HTTP requests on
port 8080 inside the network-isolated build environment.  It received npm
package metadata requests and passed the requested package names via a fifo to a
service running outside the container.  The service downloaded the metadata and
filtered it to only contain information about package versions published before
\tresholdDate{}.  The original npm distribution tags were stripped, but a mock
\code{latest} tag was added for every package, pointing at its newest
non-pre-release version from before \tresholdDate{}.  Pruned pieces of metadata
were supplied to the guest-side server for use as responses to the npm tool.

\subsection{Remaining build steps}

After successful dependency resolution, packages listed in the lockfile were
installed with the help of an \code{npm ci} command, as shown in
Listing~\ref{lst:npm-ci}.  Analogously to the dependency resolution step,
package file requests were sent through the HTTP protocol to the local port 8080
and were handled by the guest-side server, which in turn relied on the host-side
service to perform the actual downloads on guest's behalf.

\lstinputlisting[
  float=htpb,
  language=shell-command,
  caption=The npm command used to install dependencies.,
  label=lst:npm-ci
]{npm-ci.txt}

Successful installation of dependencies was followed by an invocation of a
\code{build} action that npm projects conventionally define in their
\code{package.json} files.  The command used is shown in
Listing~\ref{lst:npm-run-build}.

\lstinputlisting[
  float=htpb,
  language=shell-command,
  caption=The npm command used to invoke project-specific build operations.,
  label=lst:npm-run-build
]{npm-run-build.txt}

At this point, the \code{package-locks.json} file was copied to a subdirectory
of the results directory to be persisted after experiment's end.  The same was
done at subsequent builds, described further below, which involved modifications
to the dependency tree.  The collected lockfiles later allowed calculation of
dependency tree sizes.  When dependency tree modifications were found to cause
changes to the built package, these lockfiles were also useful in understanding
the exact reasons behind those changes.

As of \tresholdDate{}, built npm packages are distributed as \code{.tgz} archive
files.  In the jargon they are called ``tarballs'' and in case of npm packages
they are compressed using gzip algorithm.  An \code{npm pack} command exists
which can produce such archive from project's files.  Although the same could be
achieved with a traditional tar program, the npm's command is convenient,
because -- among others -- it automatically omits unneeded files like the
\code{node\_modules} directory.  The exact form of the command used to persist
the built package is shown in Listing~\ref{lst:npm-pack}.

\lstinputlisting[
  float=htpb,
  language=shell-command,
  caption=The npm command used to create the built package file.,
  label=lst:npm-pack
]{npm-pack.txt}

\subsection{Repeated builds without upstream lockfiles}

For a project that was successfully built with the described procedure, the
process was repeated with alterations.  Upon each repetition, the repository was
brought to a clean state and had added Git hooks -- if any -- removed.  However,
re-creation of the entire semi-isolated environment was deemed unnecessary for
the purpose of the experiment.  After the repository was cleaned, each repeated
build started with the Git revision checkout attempts described earlier.

The first alteration of the build was the removal of existing lockfiles
recognized by npm or its alternatives: \code{npm-shrinkwrap.json},
\code{package-lock.json}, \code{yarn.lock}, and
\code{pnpm-lock.yaml}.  It happened right after the removal of
version-controlled \code{node\_modules} and \code{dist} directories.
The removal of lockfiles was done to force a full dependency resolution.  If
successful, the build in this form was performed twice to check if dependency
resolution and lockfile generation in npm suffer from obvious nondeterminism
issues.

Additionally, all later builds of the project also involved the removal of
existing lockfiles, besides other alterations.

\subsection{Elimination of unnecessary direct dependencies}

Each project known to build successfully with and without the removal of its
version-controlled lockfile -- if any -- was tested further.  The experiment
checked whether it had the ability to build with each of its direct dependencies
-- tried in reverse alphabetical order -- removed.  E.g., a project with nine
direct dependencies specified in its \code{package.json} -- including those
listed as \code{dependencies}, \code{devDependencies}, and
\code{optionalDependencies} but not \code{peerDependencies} -- was built nine
times, each time with another direct dependency removed for the first time.  The
build was considered successful when the npm commands all finished with zero
status.  For each such successful build the tested dependency was recorded as
unnecessary and was also removed in all subsequent build attempts, together with
the dependency tested in a given attempt.  E.g., if five out of first eight
dependencies were found to be unnecessary, then subsequent build was performed
with the ninth dependency plus the initial five removed.  I.e., a total of six
dependencies were removed in that build.

The removal consisted of erasing of dependency's entry in project's
\code{package.json} file right after lockfiles deletion.  However, the original
\code{package.json} contents were always recorded and restored before the
\code{npm pack} invocation.  This was done to have the built package tarballs --
each of which contains a copy of the \code{package.json} -- easier to compare
for other differences.  Interestingly, for some projects the \code{npm pack} did
not place the \code{package.json} inside the tarball verbatim and instead
generated a variant of that file with some fields changed in a way custom to the
project.  One such case, concerning the \code{@testing-library/user-event}
package, is discussed in~\ref{sub:apparently-disfunctional-pkgs}.

All later builds of the project also involved the removal of dependencies
identified at this point and the described restoration of the original
\code{package.json} file.

\subsection{Elimination of unnecessary indirect dependencies}

With all apparently-unnecessary direct dependencies identified, the remaining
indirect dependencies were tested.  For it is unstraightforward to forcibly
remove an indirect dependency from npm project's dependency tree, a choice was
made to instead attempt ``dummifying'' it.  The npm feature of overrides --
mentioned in~\ref{sub:effects-of-semver} -- was used to force the npm's
resolution algorithm to always select a mocked, dummy version
``0.0.0-msc-experiment-dummy'' of a given dependency.  At the same time, for the
dependency package meant to be dummified, the local server providing packages'
files and metadata on port 8080 would not respond with that package's real
metadata.  Instead, it would give a response indicating that the only available
version of that package is the dummy version, which has no own dependencies.
Additionally, it would serve the corresponding dummy package tarball with only
minimal contents.

This process of identifying project's unnecessary indirect dependencies was
analogous to that concerning direct dependencies.  It involved multiple builds
-- more than a thousand in case of one npm project tested.  In each build a
single tested indirect dependency -- together with the unnecessary indirect
dependencies identified previously -- was dummified.  Each time the overrides
were added to project's clean \code{package.json} file.  The addition of
overrides was carried out together with the removal of unnecessary direct
dependencies from \code{package.json}.  Build's all npm commands had to finish
with zero status for the tested dependency to be assumed dummifiable.  Each time
the \code{package-lock.json} from the last successful build was consulted to
determine the next dependency to test.  Applicable dependencies were tried in
reverse alphabetical order.

All later builds of the project also involved the dummification of indirect
dependencies identified at this point.  During the entire experiment, whenever a
dependency to override already had an upstream override specified in
\code{package.json}, the original override was being removed.

\subsection{Elimination of dependency conflicts}

Even after the elimination of unnecessary direct and indirect dependencies,
project's dependency tree could still contain extraneous conflicting
dependencies.  Subsequent builds were carried out to forcibly remove those
conflicts where possible, utilizing overrides.  For every dependency that
occured multiple times in multiple versions in the tree, a build attempt was
made with an override which forced that package to be always used in the same,
single version.

\begin{itemize}
\item
  If it happened to be both a direct and indirect dependency of the project --
  it was overriden with the version that was previously used to satisfy
  project's direct dependency.
\item
  If it was only an indirect dependency -- it was overriden with the highest of
  the versions in which it previously appeared.
\end{itemize}

Just like before, the build was repeated to identify every dependency conflict
that -- when forcibly removed -- does not cause any npm invocation finish with
non-zero status.

\section{Build attempt results}
\label{sec:build-attempt-results}

Two projects were found not to actually exist as real pieces of software.  I.e.,
their npm packages were placeholders.  Another \skippedDueToLimitationsCount{}
projects could not be tested due to limitations of experment's environment --
they used dependency packages that are distributed through servers other than
the official npm Registry.  This made the npm tool attempt downloading these
directly, which failed in a network-isolated environment.  The results from
build attempts of the final \allTestedCount{} projects are presented in
Figure~\ref{fig:status-counts}.  Different types of failures were classified
based on the first error reported in the build attempt.  It means that, e.g., a
project with unresolvable dependencies and a missing \code{build} action was
classified as failing at the dependency resolution step.

\begin{figure}[htpb]
\centering
\includesvg[width=\linewidth,inkscapelatex=false]{status-counts.svg}
\caption{Statuses of automated hermetized build of top npm projects.}
\label{fig:status-counts}
\end{figure}

\subsection{Projects whose source repositories failed to be cloned}

For projects in this category, sources could not be automatically retrieved.
Either no repository URL was included in published npm metadata of the package
version or the published URL was not valid.

Some packages were found to provide SSH URLs to their projects' GitHub
repositories.  Such URLs could not be used for anonymous cloning, despite the
repositories themselves being -- at least in some cases -- public and
anonymously cloneable through HTTP.  A sample Git error message is presented in
Listing~\ref{lst:ssh-clone-fail}.  It was printed upon an attempt to use the
\code{ssh://git@github.com/sinonjs/sinon.git} URL in a \code{git clone} command.

\lstinputlisting[
  float=htpb,
  caption=Error reported by Git upon an attempt to clone a repository using an
  SSH URL.,
  label=lst:ssh-clone-fail,
  numbers=none
]{ssh-clone-fail.txt}

There was also a single case where an HTTP URL pointed at a repository that no
longer existed.  Other interesting unworkable URLs were ones with a branch name
appended\footnote{e.g., \code{https://github.com/emotion-js/emotion.git\#main}}.
Some unworkable URLs were also pointing to web pages of repositories'
subdirectories\footnote{e.g.,
\code{https://github.com/babel/babel/tree/master/packages/babel-core/}}.  In a
vast majority of cases a correction of URL with the help of a simple regular
expression could be attempted.  Interestingly, none of the tested projects were
found to use a VSC other than Git.

\subsection{Projects whose relevant source control revisions were not found}

For projects in this category, the git source repository could be cloned but it
contained neither the commit specified in package's metadata nor a tag
corresponding to the version number being built.  Reasons included

\begin{itemize}
\item
  VCS revisions being tagged differently, e.g., \code{PACKAGE-NAME@VERSION},
\item
  particular version's tag being missing, and
\item
  tags not being used altogether.
\end{itemize}

\subsection{Projects that do not follow the conventions}
\label{sub:packages-not-following-conventions}

Projects in this category either lacked a \code{package.json} file in
repository's root or lacked a \code{build} action.

Size of this category seems to suggest that the conventions which we and others
\cite{goswami-reproducibility} rely upon are very loosely followed.  However,
some projects classified here are trivial ones that simply do not require any
operations to be performed as part of a \code{build} action.  For example,
package \code{semver}, as distributed through the npm Registry, was found to
only contain files that are present in its project's source repository.  I.e.,
none of the files were created or modified as part of the build process
performed by that project's developers.  The files in \code{semver}'s built
package archive were found identical to those in the relevant revision of the
source repository, with the repository additionally holding some other files,
e.g., test scripts.  \code{semver} does indeed not require compilation nor
similar build steps.  It has no need for a \code{build} action and therefore
does not have one specified in its \code{package.json} file.

\subsection{Projects with dependency resolution failures}
\label{sub:resolution-failures}

Projects in this category had the \code{npm uninstall} command fail to
create or update the lockfile.  The predominant source of failure is related to
peer dependency resolution, with a sample error message shown in
Listing~\ref{lst:eresolve-error}.  Simplifying, peer dependencies are a feature
through which developers can forbid npm from creating a dependency conflict with
a particular package.  Typically, an add-on package specifies its base package
-- which it enhances -- as its peer dependency.  If the base package were
specified as add-on's casual dependency, npm's resolution algorithm could make
the add-on package use its own copy of that base package.  This is typically not
the behavior the developer or user wants.  Peer dependencies are a mean to
prevent it.

\lstinputlisting[
  float=htpb,
  caption=Error reported upon peer dependency resolution failure during
  \code{ts-node} project build.,
  label=lst:eresolve-error
]{eresolve-error.txt}

The exact behavior of peer dependencies changed through the history of npm.  One
alternative package manager for the npm ecosystem -- Yarn -- is also known for
behaving different than npm in some situations.  It is suspected that most of
the projects in this category could have their dependencies resolved
successfully with older version of npm or with Yarn.  It was found that
\failedToResolveAndUsingYarnCount{} packages in this category do have a
\code{yarn.lock} file in the VSC, indicating their developers likely use
Yarn over npm.

\subsection{Projects with invalid upstream lockfiles}

The \code{npm uninstall} command was invoked during every project build to make
sure an up-to-date lockfile is in place.  Despite that, for two packages a
lockfile was left behind that \code{npm ci} later reported as invalid due to
being out of sync with project's \code{package.json}.  One of these projects had
a preexisting \code{package-lock.json} file and the other had a \code{yarn.lock}
file\footnote{npm also reads a \code{yarn.lock} when no other lockfile is
present}.

\subsection{Projects that expect network access to build}
\label{sub:expect-network-access}

Projects in this category failed to build due to unsuccessful network request
attempts other than the attempts mentioned at the beginning
of~\ref{sec:build-attempt-results}.

The majority of build failures in this category occured when project's
development dependency was trying to download a web browser binary for
browser-based tests.  Examples of other non-npm resources that projects tried to
download were font files from Google Fonts and sources for automated native
compilation of a library whose Node.js bindings package was being installed.

It can be stated that network accesses during npm project builds are commonly
made to facilitate installation of architecture-specific software binaries, as
these are inconvenient to distribute through the architecture-agnostic npm
Registry.

\subsection{Projects that require a build tool other than npm}

Projects in this category are known to require either Yarn or pnpm to build.
They could be classified with certainty because either

\begin{itemize}
\item
  their \code{package.json} files contained special URLs or package names that
  npm could not handle, or
\item
  their build processes printed messages that explicitly informed the developer
  about the need to use a particular tool.
\end{itemize}

There are many more projects which likely rely on Yarn or pnpm but could not be
classified here with certainty, see~\ref{sub:resolution-failures}.

\subsection{Projects with additional non-npm dependencies}

Projects in this category need additional tools that are not installable through
npm.  Unlike projects mentioned in~\ref{sub:expect-network-access}, these rely
on the developer to install the additional tool.

Experiment logs indicated failures upon searching for Python executable and for
configuration files of popular shells.

\subsection{Projects with other build failures}

Projects in this category failed to build due to reasons other than those
discussed up to this point.  Failures occured due to problems like missing
modules, missing variable, and an operation hanging indefinitely.

\subsection{Projects that could be built only when using upstream lockfiles}

Projects in this category failed to build only after their upstream lockfiles
were removed.  After seemingly successfult dependency resolution, errors were
raised during TypeScript compilation.  The errors almost certainly resulted from
newer versions of project's dependencies being used.  This occured despite the
use of Semantic Versioning and the respecting of dependency constraints declared
by projects.

\subsection{Projects built with both upstream and re-generated lockfiles}

Packages in this category are considered to have been built successfully,
because all npm command invocations from the first two builds finished with zero
status.  There is no guarantee that the packages built are fully functional.
For example, some projects like \code{@testing-library/react} rely on an
additional tool called semantic-release, which is not invoked as part of
\code{npm run build}.  That tool is responsible for analyzing project's change
history and determining the right version number to be recorded in project's
\code{package.json} file~\cite{semantic-release}.  When its use is omitted, the
built package is reported as having a placeholder version, e.g.,
``0.0.0-semantically-released''.

It is expected that a more polished and defect-free build process would often
involve a dependency tree of several more or several less packages than in this
experiment.  Nonetheless, it was assumed that dependency sets found necessary
for successful \code{npm install} and \code{npm build} invocations do
represent the characteristics of the projects well enough.

Results presented through the rest of this chapter concern the dependency trees
of projects from this very category.

\section{Dependency trees after removals of dependencies and their conflicts}
\label{sec:trees-after-pruning}

The sizes of the original dependency trees, produced in project builds with
upstream lockfiles removed, are shown in Figure~\ref{fig:tree-size-stats},
together with the sizes of pruned trees.  Each pair of boxes represents the
experiment at a different stage.  The left box of each pair shows the average
number of project dependencies where all distinct versions of the same package
are counted.  E.g., an npm package \code{foo} that occurs in a dependency tree
three times as \code{foo@1.2.3}, \code{foo@2.1.0}, and again \code{foo@2.1.0} is
counted as two.  The right box of each pair shows the average number of
dependencies with all versions of a single package counted as one.  E.g., the
\code{foo} introduced before is now counted as one.  Standard deviation of the
sample is additionally plotted over every box.

Individual projects' dependency tree sizes are plotted as circles over every
box.  \inDebianCount{} projects were found to also have their corresponding
packages present in Debian Bookworm as of \debianTresholdDate.  They are
represented by filled, black circles.  No clear relation between dependency tree
sizes and presence in Debian can be seen at any stage of the experiment.  Also
consult Figure \ref{fig:tree-size-stats-no-bc} for a variant of this plot that
omits builds of packages which appear to be nonfunctional due to aggressive
dependency elimination.

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]{tree-size-stats.svg}
  \caption{Dependency tree sizes of built npm projects.}
  \label{fig:tree-size-stats}
\end{figure}

\section{Dependency conflict counts}
\label{sec:dep-conflict-counts}

Projects were further categorized by the number of dependency conflicts that
could not be removed with the method used.  The categories are visualized in
Figure~\ref{fig:unflattened-multiver-counts}.  Counts of all projects and counts
of projects having corresponding packages in Debian are shown.  Also consult
Figure \ref{fig:unflattened-multiver-counts-no-bc} for a variant of this plot
that omits builds of packages which appear to be nonfunctional due to aggressive
dependency elimination.

As can be seen, most of the projects had few to no remaining dependency
conflicts.  Once again, there was no clear relation between the number of
remaining dependency conflicts and presence in Debian.  Given this
distribution's norms, this suggest that the authors of Debian packaging likely
managed to further remove some conflicts that could not be eliminated with the
experiment's method.  They possibly used more invasive methods like source code
patching.

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]{unflattened-multiver-counts.svg}
  \caption{Counts of built projects with different numbers of unremovable
    dependency conflicts.}
  \label{fig:unflattened-multiver-counts}
\end{figure}

\section{Differences in build outputs produced during the experiment}

The repeated builds with upstream lockfiles removed had their produced package
tarballs and generated \code{package-lock.json} files compared.  The files
produced on two build runs were found identical in case of every successfully
built project\footnote{npm authors should be credited for making \code{npm pack}
produce tarballs without nondeterministic timestamps}.  This does not yet
guarantee that the builds and dependency resolutions are reproducible.  However,
it does indicate that differences found after dependency removals, etc.\ are
likely the effect of those alterations and not manifestations of builds
nondeterminism.

It was found that \withAnyTarballsDifferentCount{} out of
\builtSuccessfullyCount{} successfully built projects had their package tarballs
differ as the result of either dependency removals, dummifications, or conflict
removals.  All these cases were manually inspected.  Two differences were found
to be caused by reordered \code{package.json} entries and appear to be mere
artifacts of this experiment's method of restoring project's original
\code{package.json} file before \code{npm pack} invocation.  Some of the more
interesting cases are discussed below.

\subsection{Use of a different dependency version}
\label{sub:use-of-different-dep-ver}

Several times an alteration of the build process allowed the npm's resolution
algorithm to use a newer version of a dependency which then caused generation of
different but still correct code.  One example is the \code{concurrently}
console application written in TypeScript.  Package \code{typescript}, providing
a TypeScript compiler, was specified as its direct development dependency.
During the experiment this direct dependency was removed, but package
\code{typescript} still made its way to the dependency tree due to being
required by another dependency -- \code{@hirez\_io/observer-spy}.  Its
constraint on \code{typescript}'s version was looser than that present before,
causing version 5.8.3 to be used instead of former 5.2.2.  A sample of changes
caused by the use of that newer TypeScript compiler is presented in
Listing~\ref{lst:newer-typescript-changes}.

\lstinputlisting[
  float=htpb,
  language=diffoscope,
  caption=Excerpt from diffoscope's report of differences in built
  \code{concurrently} package tarballs.,
  label=lst:newer-typescript-changes,
  numbers=none
]{newer-typescript-changes.diffoscope}

It is worth noting that the dependency \code{@hirez\_io/observer-spy} -- even
if not necessary by itself -- could not be eliminated with this experiment's
method.

\subsection{Inlining of a dependency}
\label{sub:inlining-of-a-dep}

One similar case was that of built \code{axios} package, which -- besides
also showing generated code differences resulting from changed compiler/bundler
version -- had its dependency \code{proxy-from-env} treated differently,
despite always occuring in the same version 1.1.0.  Initially,
\code{proxy-from-env} was specified as \code{axios}' runtime
dependency and was merely referenced from the code being generated during build.
When, as part of the experiment, \code{proxy-from-env} was removed as
project's direct dependency, it remained present in the dependency tree due to
being required by certain development dependency.  \code{proxy-from-env}
was therefore itself flagged as an indirect development dependency, which made
the bundler treat it differently and inline it in \code{axios}' generated
code.

\subsection{Digest included in the generated package}

If project's generated files include a hash of its dependency specifications or
a similar derived value, it is an obvious source of difference in this
experiment.  One of such projects is \code{ts-jest}, which places a digest
like \code{4ec902e59f1ac29ff410d624dcccf9b192920639} in a
\code{.ts-jest-digest} file inside its package tarball.

\subsection{Apparently disfunctional built packages}
\label{sub:apparently-disfunctional-pkgs}

Several times a change to project's dependency tree did actually cause a
significant change to the build output.  In multiple cases, a removed package
could not be found by a bundler tool called Rollup, which then treated it as an
``external'' dependency than need not be bundled.  Rollup merely issued a
warning about a reference to the now-absent code module and proceeded without
it.  An example of this can be seen in Listing
\ref{lst:warning-module-as-external} with an excerpt from the log of
\code{rollup-plugin-typescript2} project build.
\code{rollup-plugin-typescript2} specified package \code{object-hash} as a
development dependency and had its code included in an amalgamated script file
generated by Rollup.  After \code{object-hash} was removed, the invocation of
\code{rollup-plugin-typescript2}'s build action still finished with zero status,
with the generated amalgamated script file being smaller by the size of the
missing dependency.  If the \code{rollup-plugin-typescript2} package built this
way were to be later used, its code would likely encounter an error when trying
to import the \code{object-hash} module.

\lstinputlisting[
  float=htpb,
  caption=The output of \code{npm run build} invocation with a missing
  dependency reported by Rollup.,
  label=lst:warning-module-as-external
]{warning-module-as-external.txt}

A single interesting case was that of \code{@testing-library/user-event} project
and the \code{package.json} file generated for its package tarballs.  Several --
seemingly vital -- \code{package.json} keys were no longer present after
project's \code{typescript} dependency was removed.  The change caused by
\code{typescript}'s removal is shown in
Listing~\ref{lst:removed-typescript-changes}).

\lstinputlisting[
  float=htpb,
  language=diffoscope,
  caption=Excerpt from diffoscope's report of differences in
  \code{package.json} files inside built
  \code{@testing-library/user-event} package tarballs.,
  label=lst:removed-typescript-changes,
  numbers=none
]{removed-typescript-changes.diffoscope}

In some cases, as subsequent dependencies of a project were eliminated, a
bundler tool combined the dependencies in a different order, making amalgamated
scripts difficult to compare with diff-like tools.  There were also cases where
the size of individual generated files would increase by an order of magniture
or even go up and down during a series of builds.  One suspected reason for
increasing amalgamated script size -- besides the one discussed in
\ref{sub:inlining-of-a-dep} -- is polyfilling.  It is the action of replacing
newer JavaScript language constructs used by programmers with code that is
compatible with older language runtimes.  An older version of a build tool would
typically aim to support more legacy runtimes, applying more polyfills and
increasing the produced script sizes as a result.  Nonetheless, for the purpose
of this experiment, whenever the nature and effect of changes in a build output
were unclear, the package was considered one of the total of eight disfunctional
packages.

\subsection{Updated statistics}

We are interested in the relation between project's dependency tree
characteristics and its packagability for software distributions.  The
statistics presented in \ref{sec:trees-after-pruning} and
\ref{sec:dep-conflict-counts} included eight projects with assumed
disfunctionalities introduced by this very experiment.  Three of these do have
corresponding Debian packages.  As these eight cases could make our results
deceptive, the statistics are now presented again, with problematic projects not
taken into account.  Dependency tree sizes at various stages of the experiment
are presented in Figure \ref{fig:tree-size-stats-no-bc}.  Projects'
categorization by the number of remaining dependency conflicts is visualized in
Figure~\ref{fig:unflattened-multiver-counts-no-bc}.

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]{tree-size-stats-no-bc.svg}
  \caption{Dependency tree sizes of built npm projects.  Packages which appear
    to be nonfunctional due to aggressive dependency elimination are not
    included.}
  \label{fig:tree-size-stats-no-bc}
\end{figure}

\begin{figure}[htpb]
  \centering
  \includesvg[width=\linewidth,inkscapelatex=false]
             {unflattened-multiver-counts-no-bc.svg}
  \caption{Counts of built projects with different numbers of unremovable
    dependency conflicts.  Packages which appear to be nonfunctional due to
    aggressive dependency elimination are not included.}
  \label{fig:unflattened-multiver-counts-no-bc}
\end{figure}

As one can see, even these ``cleaned'' results show no relation between
project's dependency tree sizes and its Debian presence.

\chapter{Conclusions}

The results of conducted experiment allow the questions stated at the beginning
of \ref{chp:experiment} to be answered.

\section{Naming the main hindrance to packaging the npm ecosystem}
\label{sub:naming-the-hindrance}

We expected that huge dependency trees and presence of conflicting dependencies
are the major obstacles to packaging npm projects into reproducibility-focused
software distributions.  The experiment results show the contrary.  If this
hypothesis were true, we would see npm projects with more complex dependency
trees less frequently packaged into Debian -- but there is no such relation.

What are then the most likely reasons for relatively small number of software
from the npm ecosystem in Debian and GNU Guix?  For the latter, the challange of
bootstrapping several popular, self-depending build tools appears relevant.
Five of these were mentioned in \ref{sub:self-depending-software}.  As of
\debianTresholdDate{}, four of them -- \code{typescript}, \code{@babel/core},
\code{rollup}, and \code{gulp} -- are present in Debian, albeit under different
names.  The Debian packages of all four were also found to depend on themselves
to build.  Since GNU Guix' policies make it more difficult to add self-depending
packages to the distribution, this explains the drastically different coverage
of the npm ecosystem by these system software distributions.

Aside from that, the incompatibility of JavaScript developers' workflows with
system software distribution packages -- as highlighted
in~\ref{sec:inconvenience-of-distros} -- should be considered the major issue.

\section{Developer-supplied lockfile being infrequently necessary}

As found, only two tested projects could be built with an upsteam lockfile but
failed when it was re-generated.  Meanwhile, \builtSuccessfullyCount{} packages
were built successfully in both ways.  This is consistent with the expectations.
Repetition of the dependency resolution is not a likely source of build
failures.

\section{Indispensibility of direct and indirect npm build dependencies}

It was found that a non-negligible subset of both direct and indrect npm project
dependencies is unnecessary for a successful build.  The effect of removal of
unnecessary direct dependencies can be comprehended by comparing the leftmost
two pairs of boxes in Figure~\ref{fig:tree-size-stats-no-bc}.  There is on
average an almost triple reduction in dependency tree sizes, although with a
huge variance.  The average number of direct dependencies shrank from
\allDirectDepsAvg{} with sample standard deviation of \allDirectDepsStdDev{} to
\necessaryDepsAvg{} with sample standard deviation of~\necessaryDepsStdDev{}.

Comparing the second and third pair of boxes in Figure
\ref{fig:tree-size-stats-no-bc} shows that almost a half of projects' remaining
indirect dependencies is also not necessary during build, again with a huge
variance.  The experiment's automated method of determining the unnecessary
dependencies was not perfect, as shown in~\ref{sub:use-of-different-dep-ver},
but sufficient to allow a conclusion that during packaging, the elimination of
both kinds of dependencies can be worth attempting.

\section{Typical dependency tree sizes of npm projects}
\label{sec:typical-dep-tree-sizes}

The average sizes of built projects' dependency trees ranged from \origTreeMin{}
to \origTreeMax{} with sample standard deviation of~\origTreeStdDev{}, as shown
in Figure~\ref{fig:tree-size-stats-no-bc}.  With the experiment's method, it was
possible to reduce the tree sizes by about a ratio of five on average.

The final dependecy tree sizes of about $160$ are not drastically higher than
those in other software ecosystems.  For example, as of \debianTresholdDate,
Debian package \code{python-xrt} was found to be built in an environment
with $180$ installed packages named \code{python-*}, as evidenced in its
\code{.buildinfo} file.

It is worth noting that the numbers discussed here and in the following section
might be representative of only the more complex npm packages.  As explained
in~\ref{sub:packages-not-following-conventions}, there can be many popular npm
projects like \code{semver} that do not require an actual \code{build} action,
likely have fewer declared dependencies, and likely only require the npm tool to
create the package tarball.

\section{Frequency of dependency conflicts in npm projects}

Among the npm projects built successfully, only one had no conflicting
dependencies in its original tree.  This shows that dependency conflicts are
indeed a normal and accepted thing in the npm ecosystem.

In the original dependency trees, the count of dependencies in conflict averaged
at \origTreeMultiverAvg{} and sample standard deviation
was~\origTreeMultiverStdDev{}.  In case of more than half of the projects the
conflicts were completely eliminated.  Several cases of unremovable conflicts
remained, as can be sees in Figure~\ref{fig:unflattened-multiver-counts-no-bc}.
However, this part of the results should not be considered entirely
representative of the real state of affairs, as explained
in~\ref{sec:dep-conflict-counts}.  It is expected that through manual work the
build process of many more npm packages can be made free of dependency
conflicts.

\section{Package disfunctionality caused by dependency tree reduction}

As witnessed in~\ref{sub:apparently-disfunctional-pkgs}, there is a
non-negligible number of cases where forced removal of direct or indirect
dependencies causes built package to lack important pieces of code or have other
deficiencies.  Many of these problems were caused by Rollup bundler's liberal
treatment of missing code modules and could be automatically detected, for
example by searching the build logs for specific strings.

Nevertheless, the risk of building disfunctional packages appears relatively
high, which is not acceptable if this experiment's method were to be used for
preparation of package recipes in a reproducibility-oriented software
distribution.  Since in more than half of all cases the diffoscope's reports on
built package differences were found comprehensible, it is advisable to manually
investigate dependency removals whose effects are unclear.

Additionally, the method itself proved to have a weakness of allowing a removed
direct dependency to still appear as an indirect dependency, as shown in
\ref{sub:use-of-different-dep-ver} and~\ref{sub:inlining-of-a-dep}.  Although
this does not appear to have lead to built packages' disfunctionalities during
the experiment, it is a space for improvement.  One simple solution would be to
eliminate direct dependencies through dummification, as it was already done with
indirect ones.

\section{Relevance of npm package distribution tags for successful dependency resolution}

The dependency resolution failures described in \ref{sub:resolution-failures}
were all manually analyzed and none was found to be caused by a dependency
specification referencing a distribution tag omitted in the experiment.  Four of
the built projects were found to have a direct or indirect dependency specified
by the \code{latest} tag.  Among others, \code{typescript} -- the most popular
npm package according to the ranking in~\ref{chp:most-popular-dev-deps} --
requires its direct development dependency \code{@types/node} to be in version
tagged \code{latest}.

Concluding, the special \code{latest} tag should be present in npm
dependency metadata to avoid needless dependency resolution failures.
Fortunately, it can usually be derived from the available package versions.  All
other tags can in the vast majority of cases be omitted from the dependency
resolution matadata, removing the need to rely on external, mutable npm
distribution tags information.

\section{Prototype for npm dependency resolution under Paradigm 3}

The experiment's environment resembled that proposed in \ref{sec:paradigm-3} for
Paradigm 3 for hermeticity and reproducibility.  The approach with a host-side
service performing requests on behalf of the isolated guest was indeed workable.
The experiment also showed that this prototype could benefit from added ability
to provide the guested npm process with dependency package files hosted on
different sites than just the npm Registry.  This would require additional work
to have npm's package tarball requests reach the locally-running service.  A
solution could involve configuring npm to use a TLS-enabled HTTP proxy in the
likes of mitmproxy~\cite{mitmproxy}.  While burdensome, it should be workable.

At the same time, obtained results did not contain the expected indicators that
Paradigm 1 -- and Paradigm 2 with flat input metadata of the dependency
resolution step -- is insufficient in practice for handling the complex npm
ecosystem.  This means that new paradigms, as proposed in this work, are not
necessary for further progress in the field of reproducible software builds.
However, paradigms 3 and 4 can still prove useful in addressing the
bootstrappability and developer usefulness issues named
in~\ref{sub:naming-the-hindrance}.

\chapter{Summary}

Throughout the course of this work it was found that software industry shows
some modest interest in reproducible builds.  Practical difficulties in applying
reproducibility to software projects hinder popularisation of this security
measure.  Software developed around popular technologies must become easier to
rebuild hermetically and to test for reproducibility.  This will allow
reproducible builds to be more broadly recommended and mandated.

Even though the concept of end user verification of build reproducibility offers
increase in security confidence, years after 2018 saw little progress in its
application.  Due to the issues of rebuilder lag and occasional build
nondeterminism, continuous tests performed independently of end users should be
considered a more practically viable security measure.

Even when build reproducibility is aimed and tested for, software distributions'
approaches differ.  Metadata used to reproduce Debian packages was found
insufficient to also reproducibly verify the results of dependency resolutions
that package builds relied on.  This is not a severe vulnerability.  However, it
motivates increased interest in purely functional package managers, whose design
excludes the possibility of such ``verification hole'' occuring.

Contrary to intuition, traditional software packaging scheme of Debian can be
applicable even to npm software with complex dependency graphs.  One could still
attempt to utilize suggested Paradigm 3 or 4 to replace existing approaches of
Debian and GNU Guix.  However, such efforts would offer no certain benefit.

Although npm packages tend to have large dependency tree sizes and conflicting
dependencies, this is not the ultimate reason for almost zero coverage of the
npm ecosystem by GNU Guix.  Instead, the issues of package bootstrappability
appear to be the determining factor.  These, however, are practically solvable.

The packages in reproducible software distributions must be bridged with
developers' preferred workflows.  If this does not happen, the distributions
will not only be slow in including software from the npm Registry and similar
repositories.  The software already in those distributions will fail to bring
the security benefits that it could.

As long speculated, much of declared dependencies of a typical npm project are
not needed to build it.  It was found that many indirect dependencies are also
unnecessary.  Their omission is crucial both to simplify packaging into software
distributions and to reduce the software supply chain attack surface.

\chapter{Future work}

During npm project builds analysis it was found that projects exist which
require no code transformations during build.  One such case is described in
detail in~\ref{sub:packages-not-following-conventions}.  If identified,
projects from this category could be packaged hermetically and reproducibly with
minimal effort.  Automated identification of such projects could be a future
research goal.

After the experiment, bootstrappability was named a likely major reason for
small coverage of the npm ecosystem by GNU Guix.  The finding of viable and
optimal bootstrap chains of npm packages could therefore be the subject of
another research.

Paradigms 3 and 4 for hermeticity and reproducibility were proposed to address
the issue of incomprehensibly complex dependency relations between npm packages.
It was found that in the context of reproducible builds, the issue is resolvable
even without employing these paradigms.  However, it is yet to be checked --
possibly in a new work -- whether these paradigms can actually make software
packaging process less labor-intensive and therefore more efficient.

This work touches the topic of securing the inputs of software builds.  The
applicability of methods like the suggested canaries could be further
investigated.

A subset of tested projects did not have their VCS revisions tagged in the
expected way.  The tagging habits of developers and means of automated
identification of VCS revisions corresponding to releases could be researched.
A possible approach to solving the problem of missing VCS tags could involve
comparing commit dates with package version release dates.  If devised, a
successful method would benefit VCS-based software packaging, which appears
desirable in the light of the XZ backdoor.

npm developers are not incentivized make their software easily bootstrappable.
Proof of concept of Ken Thompson's Trusting Trust attack~\cite{Thompson:1984}
could be presented for one or more popular npm packages.  It could help raise
awareness of the supply chain issues and make the community interested in
rebuildability and ultimately bootstrappability.  The PoC could be a
self-implanting backdoor in one of the self-depending builds tool.

\clearpage

\phantomsection{}
\addcontentsline{toc}{chapter}{\refname}
\printbibliography

\end{document}