path: root/gnu/system/linux-container.scm
blob: e6fd0f1315f7359df07f15f49ec5b6386dbcfce8 (about) (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2015 David Thompson <davet@gnu.org>
;;; Copyright © 2016, 2017, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2019 Arun Isaac <arunisaac@systemreboot.net>
;;; Copyright © 2020 Efraim Flashner <efraim@flashner.co.il>
;;; Copyright © 2020 Google LLC
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu system linux-container)
  #:use-module (ice-9 match)
  #:use-module (srfi srfi-1)
  #:use-module (guix config)
  #:use-module (guix store)
  #:use-module (guix gexp)
  #:use-module (guix derivations)
  #:use-module (guix monads)
  #:use-module (guix modules)
  #:use-module (gnu build linux-container)
  #:use-module (gnu services)
  #:use-module (gnu services base)
  #:use-module (gnu services networking)
  #:use-module (gnu services shepherd)
  #:use-module (gnu system)
  #:use-module (gnu system file-systems)
  #:export (system-container
            containerized-operating-system
            container-script
            eval/container))

(define* (container-essential-services os #:key shared-network?)
  "Return a list of essential services corresponding to OS, a
non-containerized OS.  This procedure essentially strips essential services
from OS that are needed on the bare metal and not in a container."
  (define base
    (remove (lambda (service)
              (memq (service-kind service)
                    (list (service-kind %linux-bare-metal-service)
                          firmware-service-type
                          system-service-type)))
            (operating-system-default-essential-services os)))

  (cons (service system-service-type
                 `(("locale" ,(operating-system-locale-directory os))))
        ;; If network is to be shared with the host, remove network
        ;; configuration files from etc-service.
        (if shared-network?
            (modify-services base
              (etc-service-type
               files => (remove
                         (match-lambda
                           ((filename _)
                            (member filename
                                    (map basename %network-configuration-files))))
                         files)))
            base)))

(define dummy-networking-service-type
  (shepherd-service-type
   'dummy-networking
   (const (shepherd-service
           (documentation "Provide loopback and networking without actually
doing anything.")
           (provision '(loopback networking))
           (start #~(const #t))))
   #f
   (description "Provide loopback and networking without actually doing
anything.  This service is used by guest systems running in containers, where
networking support is provided by the host.")))

(define %nscd-container-caches
  ;; Similar to %nscd-default-caches but with smaller cache sizes. This allows
  ;; many containers to coexist on the same machine without exhausting RAM.
  (map (lambda (cache)
         (nscd-cache
          (inherit cache)
          (max-database-size (expt 2 18)))) ;256KiB
       %nscd-default-caches))

(define* (containerized-operating-system os mappings
                                         #:key
                                         shared-network?
                                         (extra-file-systems '()))
  "Return an operating system based on OS for use in a Linux container
environment.  MAPPINGS is a list of <file-system-mapping> to realize in the
containerized OS.  EXTRA-FILE-SYSTEMS is a list of file systems to add to OS."
  (define user-file-systems
    (remove (lambda (fs)
              (let ((target (file-system-mount-point fs))
                    (source (file-system-device fs)))
                (or (string=? target (%store-prefix))
                    (string=? target "/")
                    (and (string? source)
                         (string-prefix? "/dev/" source))
                    (string-prefix? "/dev/" target)
                    (string-prefix? "/sys/" target))))
            (operating-system-file-systems os)))

  (define (mapping->fs fs)
    (file-system (inherit (file-system-mapping->bind-mount fs))
      (needed-for-boot? #t)))

  (define services-to-drop
    ;; Service types to filter from the original operating-system. Some of
    ;; these make no sense in a container (e.g., those that access
    ;; /dev/tty[0-9]), while others just need to be reinstantiated with
    ;; different configs that are better suited to containers.
    (append (list console-font-service-type
                  mingetty-service-type
                  agetty-service-type
                  ;; Reinstantiated below with smaller caches.
                  nscd-service-type)
            (if shared-network?
                ;; Replace these with dummy-networking-service-type below.
                (list
                 static-networking-service-type
                 dhcp-client-service-type
                 network-manager-service-type
                 connman-service-type
                 wicd-service-type)
                (list))))

  (define services-to-add
    (append
     ;; Many Guix services depend on a 'networking' shepherd
     ;; service, so make sure to provide a dummy 'networking'
     ;; service when we are sure that networking is already set up
     ;; in the host and can be used.  That prevents double setup.
     (if shared-network?
         (list (service dummy-networking-service-type))
         '())
     (list
      (nscd-service (nscd-configuration
                     (caches %nscd-container-caches))))))

  (operating-system
    (inherit os)
    (swap-devices '()) ; disable swap
    (essential-services (container-essential-services
                         this-operating-system
                         #:shared-network? shared-network?))
    (services (append (remove (lambda (service)
                                (memq (service-kind service)
                                      services-to-drop))
                              (operating-system-user-services os))
                      services-to-add))
    (file-systems (append (map mapping->fs
                               (if shared-network?
                                   (append %network-file-mappings mappings)
                                   mappings))
                          extra-file-systems
                          user-file-systems

                          ;; Provide a dummy root file system so we can create
                          ;; a 'boot-parameters' file.
                          (list (file-system
                                  (mount-point "/")
                                  (device "nothing")
                                  (type "dummy")))))))

(define* (container-script os #:key (mappings '()) shared-network?)
  "Return a derivation of a script that runs OS as a Linux container.
MAPPINGS is a list of <file-system> objects that specify the files/directories
that will be shared with the host system."
  (define (mountable-file-system? file-system)
    ;; Return #t if FILE-SYSTEM should be mounted in the container.
    (and (not (string=? "/" (file-system-mount-point file-system)))
         (file-system-needed-for-boot? file-system)))

  (define (os-file-system-specs os)
    (map file-system->spec
         (filter mountable-file-system?
                 (operating-system-file-systems os))))

  (let* ((os (containerized-operating-system
              os (cons %store-mapping mappings)
              #:shared-network? shared-network?
              #:extra-file-systems %container-file-systems))
         (specs (os-file-system-specs os)))

    (define script
      (with-imported-modules (source-module-closure
                              '((guix build utils)
                                (gnu build linux-container)
                                (guix i18n)
                                (guix diagnostics)))
        #~(begin
            (use-modules (gnu build linux-container)
                         (gnu system file-systems) ;spec->file-system
                         (guix build utils)
                         (guix i18n)
                         (guix diagnostics)
                         (srfi srfi-1))

            (define file-systems
              (filter-map (lambda (spec)
                            (let* ((fs    (spec->file-system spec))
                                   (flags (file-system-flags fs)))
                              (and (or (not (memq 'bind-mount flags))
                                       (file-exists? (file-system-device fs)))
                                   fs)))
                          '#$specs))

            (define (explain pid)
              ;; XXX: We can't quite call 'bindtextdomain' so there's actually
              ;; no i18n.
              ;; XXX: Should we really give both options? 'guix container exec'
              ;; is a more verbose command.  Hard to fail to enter the container
              ;; when we list two options.
              (info (G_ "system container is running as PID ~a~%") pid)
              (info (G_ "Run 'sudo guix container exec ~a /run/current-system/profile/bin/bash --login'\n")
                    pid)
              (info (G_ "or run 'sudo nsenter -a -t ~a' to get a shell into it.~%") pid)
              (newline (guix-warning-port)))

            (call-with-container file-systems
              (lambda ()
                (setenv "HOME" "/root")
                (setenv "TMPDIR" "/tmp")
                (setenv "GUIX_NEW_SYSTEM" #$os)
                (for-each mkdir-p '("/run" "/bin" "/etc" "/home" "/var"))
                (primitive-load (string-append #$os "/boot")))
              ;; A range of 65536 uid/gids is used to cover 16 bits worth of
              ;; users and groups, which is sufficient for most cases.
              ;;
              ;; See: http://www.freedesktop.org/software/systemd/man/systemd-nspawn.html#--private-users=
              #:host-uids 65536
              #:namespaces (if #$shared-network?
                               (delq 'net %namespaces)
                               %namespaces)
              #:process-spawned-hook explain))))

    (gexp->script "run-container" script)))

(define* (eval/container exp
                         #:key
                         (mappings '())
                         (namespaces %namespaces))
  "Evaluate EXP, a gexp, in a new process executing in separate namespaces as
listed in NAMESPACES.  Add MAPPINGS, a list of <file-system-mapping>, to the
set of directories visible in the process's mount namespace.  Return the
process' exit status as a monadic value.

This is useful to implement processes that, unlike derivations, are not
entirely pure and need to access the outside world or to perform side
effects."
  (mlet %store-monad ((lowered (lower-gexp exp)))
    (define inputs
      (cons (lowered-gexp-guile lowered)
            (lowered-gexp-inputs lowered)))

    (define items
      (append (append-map derivation-input-output-paths inputs)
              (lowered-gexp-sources lowered)))

    (mbegin %store-monad
      (built-derivations inputs)
      (mlet %store-monad ((closure ((store-lift requisites) items)))
        (return (call-with-container (map file-system-mapping->bind-mount
                                          (append (map (lambda (item)
                                                         (file-system-mapping
                                                          (source item)
                                                          (target source)))
                                                       closure)
                                                  mappings))
                  (lambda ()
                    (apply execl
                           (string-append (derivation-input-output-path
                                           (lowered-gexp-guile lowered))
                                          "/bin/guile")
                           "guile"
                           (append (append-map (lambda (directory)
                                                 `("-L" ,directory))
                                               (lowered-gexp-load-path lowered))
                                   (append-map (lambda (directory)
                                                 `("-C" ,directory))
                                               (lowered-gexp-load-compiled-path
                                                lowered))
                                   (list "-c"
                                         (object->string
                                          (lowered-gexp-sexp lowered))))))))))))