;;; GNU Guix --- Functional package management for GNU ;;; Copyright © 2015 David Thompson <davet@gnu.org> ;;; Copyright © 2016-2017, 2019-2023 Ludovic Courtès <ludo@gnu.org> ;;; Copyright © 2019 Arun Isaac <arunisaac@systemreboot.net> ;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il> ;;; Copyright © 2020 Google LLC ;;; Copyright © 2022 Ricardo Wurmus <rekado@elephly.net> ;;; Copyright © 2023 Pierre Langlois <pierre.langlois@gmx.com> ;;; ;;; This file is part of GNU Guix. ;;; ;;; GNU Guix is free software; you can redistribute it and/or modify it ;;; under the terms of the GNU General Public License as published by ;;; the Free Software Foundation; either version 3 of the License, or (at ;;; your option) any later version. ;;; ;;; GNU Guix is distributed in the hope that it will be useful, but ;;; WITHOUT ANY WARRANTY; without even the implied warranty of ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ;;; GNU General Public License for more details. ;;; ;;; You should have received a copy of the GNU General Public License ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. (define-module (gnu system linux-container) #:use-module (ice-9 match) #:use-module (srfi srfi-1) #:use-module (guix config) #:use-module (guix store) #:use-module (guix gexp) #:use-module (guix derivations) #:use-module (guix monads) #:use-module (guix modules) #:use-module (gnu build linux-container) #:use-module (gnu services) #:use-module (gnu services base) #:use-module (gnu services networking) #:use-module (gnu services shepherd) #:use-module (gnu system) #:use-module (gnu system file-systems) #:export (system-container containerized-operating-system container-script eval/container)) (define* (container-essential-services os #:key shared-network?) "Return a list of essential services corresponding to OS, a non-containerized OS. This procedure essentially strips essential services from OS that are needed on the bare metal and not in a container." (define base (remove (lambda (service) (memq (service-kind service) (cons* (service-kind %linux-bare-metal-service) firmware-service-type system-service-type (if shared-network? (list hosts-service-type) '())))) (operating-system-default-essential-services os))) (cons (service system-service-type `(("locale" ,(operating-system-locale-directory os)))) ;; If network is to be shared with the host, remove network ;; configuration files from etc-service. (if shared-network? (modify-services base (etc-service-type files => (remove (match-lambda ((filename _) (member filename (map basename %network-configuration-files)))) files))) base))) (define dummy-networking-service-type (shepherd-service-type 'dummy-networking (const (shepherd-service (documentation "Provide loopback and networking without actually doing anything.") (provision '(loopback networking)) (start #~(const #t)))) #f (description "Provide loopback and networking without actually doing anything. This service is used by guest systems running in containers, where networking support is provided by the host."))) (define %nscd-container-caches ;; Similar to %nscd-default-caches but with smaller cache sizes. This allows ;; many containers to coexist on the same machine without exhausting RAM. (map (lambda (cache) (nscd-cache (inherit cache) (max-database-size (expt 2 18)))) ;256KiB %nscd-default-caches)) (define* (containerized-operating-system os mappings #:key shared-network? (extra-file-systems '())) "Return an operating system based on OS for use in a Linux container environment. MAPPINGS is a list of <file-system-mapping> to realize in the containerized OS. EXTRA-FILE-SYSTEMS is a list of file systems to add to OS." (define user-file-systems (remove (lambda (fs) (let ((target (file-system-mount-point fs)) (source (file-system-device fs))) (or (string=? target (%store-prefix)) (string=? target "/") (and (string? source) (string-prefix? "/dev/" source)) (string-prefix? "/dev/" target) (string-prefix? "/sys/" target)))) (operating-system-file-systems os))) (define (mapping->fs fs) (file-system (inherit (file-system-mapping->bind-mount fs)) (needed-for-boot? #t))) (define services-to-drop ;; Service types to filter from the original operating-system. Some of ;; these make no sense in a container (e.g., those that access ;; /dev/tty[0-9]), while others just need to be reinstantiated with ;; different configs that are better suited to containers. (append (list console-font-service-type mingetty-service-type agetty-service-type) (if shared-network? ;; Replace these with dummy-networking-service-type below. (list static-networking-service-type dhcp-client-service-type network-manager-service-type connman-service-type) (list)))) (define services-to-add ;; Many Guix services depend on a 'networking' shepherd ;; service, so make sure to provide a dummy 'networking' ;; service when we are sure that networking is already set up ;; in the host and can be used. That prevents double setup. (if shared-network? (list (service dummy-networking-service-type)) '())) (operating-system (inherit os) (swap-devices '()) ; disable swap (essential-services (container-essential-services this-operating-system #:shared-network? shared-network?)) (services (append services-to-add (filter-map (lambda (s) (cond ((memq (service-kind s) services-to-drop) #f) ((eq? nscd-service-type (service-kind s)) (service nscd-service-type (nscd-configuration (inherit (service-value s)) (caches %nscd-container-caches)))) ((eq? guix-service-type (service-kind s)) ;; Pass '--disable-chroot' so that ;; guix-daemon can build thing even in ;; Docker without '--privileged'. (service guix-service-type (guix-configuration (inherit (service-value s)) (extra-options (cons "--disable-chroot" (guix-configuration-extra-options (service-value s))))))) (else s))) (operating-system-user-services os)))) (file-systems (append (map mapping->fs (if shared-network? (append %network-file-mappings mappings) mappings)) extra-file-systems user-file-systems ;; Provide a dummy root file system so we can create ;; a 'boot-parameters' file. (list (file-system (mount-point "/") (device "nothing") (type "dummy"))))))) (define* (container-script os #:key (mappings '()) shared-network?) "Return a derivation of a script that runs OS as a Linux container. MAPPINGS is a list of <file-system> objects that specify the files/directories that will be shared with the host system." (define (mountable-file-system? file-system) ;; Return #t if FILE-SYSTEM should be mounted in the container. (and (not (string=? "/" (file-system-mount-point file-system))) (file-system-needed-for-boot? file-system))) (define (os-file-system-specs os) (map file-system->spec (filter mountable-file-system? (operating-system-file-systems os)))) (let* ((os (containerized-operating-system os (cons %store-mapping mappings) #:shared-network? shared-network? #:extra-file-systems %container-file-systems)) (specs (os-file-system-specs os))) (define script (with-imported-modules (source-module-closure '((guix build utils) (gnu build linux-container) (guix i18n) (guix diagnostics))) #~(begin (use-modules (gnu build linux-container) (gnu system file-systems) ;spec->file-system (guix build utils) (guix i18n) (guix diagnostics) (srfi srfi-1) (srfi srfi-37) (ice-9 match)) (define (show-help) (display (G_ "Usage: run-container [OPTION ...] Run the container with the given options.")) (newline) (display (G_ " --share=SPEC share host file system with read/write access according to SPEC")) (display (G_ " --expose=SPEC expose host file system directory as read-only according to SPEC")) (newline) (display (G_ " -h, --help display this help and exit")) (newline)) (define %options ;; Specifications of the command-line options. (list (option '(#\h "help") #f #f (lambda args (show-help) (exit 0))) (option '("share") #t #f (lambda (opt name arg result) (alist-cons 'file-system-mapping (specification->file-system-mapping arg #t) result))) (option '("expose") #t #f (lambda (opt name arg result) (alist-cons 'file-system-mapping (specification->file-system-mapping arg #f) result))))) (define (parse-options args options) (args-fold args options (lambda (opt name arg . rest) (report-error (G_ "~A: unrecognized option~%") name) (exit 1)) (lambda (op res) (cons op res)) '())) (define (explain pid) ;; XXX: We can't quite call 'bindtextdomain' so there's actually ;; no i18n. ;; XXX: Should we really give both options? 'guix container exec' ;; is a more verbose command. Hard to fail to enter the container ;; when we list two options. (info (G_ "system container is running as PID ~a~%") pid) (info (G_ "Run 'sudo guix container exec ~a /run/current-system/profile/bin/bash --login'\n") pid) (info (G_ "or run 'sudo nsenter -a -t ~a' to get a shell into it.~%") pid) (newline (guix-warning-port))) (let* ((opts (parse-options (cdr (command-line)) %options)) (mappings (filter-map (match-lambda (('file-system-mapping . mapping) mapping) (_ #f)) opts)) (file-systems (filter-map (lambda (fs) (let ((flags (file-system-flags fs))) (and (or (not (memq 'bind-mount flags)) (file-exists? (file-system-device fs))) fs))) (append (map file-system-mapping->bind-mount mappings) (map spec->file-system '#$specs))))) (call-with-container file-systems (lambda () (setenv "HOME" "/root") (setenv "TMPDIR" "/tmp") (setenv "GUIX_NEW_SYSTEM" #$os) (for-each mkdir-p '("/run" "/bin" "/etc" "/home" "/var")) (primitive-load (string-append #$os "/boot"))) ;; A range of 65536 uid/gids is used to cover 16 bits worth of ;; users and groups, which is sufficient for most cases. ;; ;; See: http://www.freedesktop.org/software/systemd/man/systemd-nspawn.html#--private-users= #:host-uids 65536 #:namespaces (if #$shared-network? (delq 'net %namespaces) %namespaces) #:process-spawned-hook explain))))) (gexp->script "run-container" script))) (define* (eval/container exp #:key (mappings '()) (namespaces %namespaces) (guest-uid 0) (guest-gid 0)) "Evaluate EXP, a gexp, in a new process executing in separate namespaces as listed in NAMESPACES. Add MAPPINGS, a list of <file-system-mapping>, to the set of directories visible in the process's mount namespace. Inside the namespaces, run code as GUEST-UID and GUEST-GID. Return the process' exit status as a monadic value. This is useful to implement processes that, unlike derivations, are not entirely pure and need to access the outside world or to perform side effects." (mlet %store-monad ((lowered (lower-gexp exp))) (define inputs (cons (lowered-gexp-guile lowered) (lowered-gexp-inputs lowered))) (define items (append (append-map derivation-input-output-paths inputs) (lowered-gexp-sources lowered))) (mbegin %store-monad (built-derivations inputs) (mlet %store-monad ((closure ((store-lift requisites) items))) (return (call-with-container (map file-system-mapping->bind-mount (append (map (lambda (item) (file-system-mapping (source item) (target source))) closure) mappings)) (lambda () (apply execl (string-append (derivation-input-output-path (lowered-gexp-guile lowered)) "/bin/guile") "guile" (append (append-map (lambda (directory) `("-L" ,directory)) (lowered-gexp-load-path lowered)) (append-map (lambda (directory) `("-C" ,directory)) (lowered-gexp-load-compiled-path lowered)) (list "-c" (object->string (lowered-gexp-sexp lowered)))))) #:namespaces namespaces #:guest-uid guest-uid #:guest-gid guest-gid))))))