;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2016 Ricardo Wurmus ;;; Copyright © 2016, 2017 Ludovic Courtès ;;; Copyright © 2017 Stefan Reichör ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see . (define-module (gnu packages logging) #:use-module ((guix licenses) #:prefix license
aboutsummaryrefslogtreecommitdiff
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 David Thompson <davet@gnu.org>
;;; Copyright © 2017-2019, 2022, 2023 Ludovic Courtès <ludo@gnu.org>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu build linux-container)
  #:use-module (ice-9 format)
  #:use-module (ice-9 match)
  #:use-module (ice-9 rdelim)
  #:use-module (srfi srfi-98)
  #:use-module (guix build utils)
  #:use-module (guix build syscalls)
  #:use-module (gnu system file-systems)          ;<file-system>
  #:use-module ((gnu build file-systems) #:select (mount-file-system))
  #:export (user-namespace-supported?
            unprivileged-user-namespace-supported?
            setgroups-supported?
            %namespaces
            run-container
            call-with-container
            container-excursion
            container-excursion*))

(define (user-namespace-supported?)
  "Return #t if user namespaces are supported on this system."
  (file-exists? "/proc/self/ns/user"))

(define (unprivileged-user-namespace-supported?)
  "Return #t if user namespaces can be created by unprivileged users."
  (let ((userns-file "/proc/sys/kernel/unprivileged_userns_clone"))
    (if (file-exists? userns-file)
        (eqv? #\1 (call-with-input-file userns-file read-char))
        #t)))

(define (setgroups-supported?)
  "Return #t if the setgroups proc file, introduced in Linux-libre 3.19,
exists."
  (file-exists? "/proc/self/setgroups"))

(define %namespaces
  '(cgroup mnt pid ipc uts user net))

(define (call-with-clean-exit thunk)
  "Apply THUNK, but exit with a status code of 1 if it fails."
  (dynamic-wind
    (const #t)
    (lambda ()
      (thunk)

      ;; XXX: Somehow we sometimes get EBADF from write(2) or close(2) upon
      ;; exit (coming from fd finalizers) when used by the Shepherd.  To work
      ;; around that, exit forcefully so fd finalizers don't have a chance to
      ;; run and fail.
      (primitive-_exit 0))
    (lambda ()
      (primitive-_exit 1))))

(define (purify-environment)
  "Unset all environment variables."
  (for-each unsetenv
            (match (get-environment-variables)
              (((names . _) ...) names))))

;; The container setup procedure closely resembles that of the Docker
;; specification:
;; https://raw.githubusercontent.com/docker/libcontainer/master/SPEC.md
(define* (mount-file-systems root mounts #:key mount-/sys? mount-/proc?)
  "Mount the essential file systems and the those in MOUNTS, a list of
<file-system> objects, relative to ROOT; then make ROOT the new root directory
for the process."
  (define (scope dir)
    (string-append root dir))

  (define (touch file-name)
    (call-with-output-file file-name (const #t)))

  (define (bind-mount src dest)
    (mount src dest "none" MS_BIND))

  ;; Like mount, but creates the mount point if it doesn't exist.
  (define* (mount* source target type #:optional (flags 0) options
                   #:key (update-mtab? #f))
    (mkdir-p target)
    (mount source target type flags options #:update-mtab? update-mtab?))

  ;; The container's file system is completely ephemeral, sans directories
  ;; bind-mounted from the host.
  (mount "none" root "tmpfs")

  ;; A proc mount requires a new pid namespace.
  (when mount-/proc?
    (mount* "none" (scope "/proc") "proc"
            (logior MS_NOEXEC MS_NOSUID MS_NODEV)))

  ;; A sysfs mount requires the user to have the CAP_SYS_ADMIN capability in
  ;; the current network namespace.
  (when mount-/sys?
    (mount* "none" (scope "/sys") "sysfs"
            (logior MS_NOEXEC MS_NOSUID MS_NODEV MS_RDONLY)))

  (mount* "none" (scope "/dev") "tmpfs"
          (logior MS_NOEXEC MS_STRICTATIME)
          "mode=755")

  ;; Create essential device nodes via bind-mounting them from the
  ;; host, because a process within a user namespace cannot create
  ;; device nodes.
  (for-each (lambda (device)
              (when (file-exists? device)
                ;; Create the mount point file.
                (touch (scope device))
                (bind-mount device (scope device))))
            '("/dev/null"
              "/