;;; GNU Guix --- Functional package management for GNU
;;; Copyright © 2017, 2018, 2019, 2020, 2022 Ludovic Courtès <ludo@gnu.org>
;;; Copyright © 2020 Mathieu Othacehe <othacehe@gnu.org>
;;; Copyright © 2022 Leo Nikkilä <hello@lnikki.la>
;;; Copyright © 2022 Arun Isaac <arunisaac@systemreboot.net>
;;;
;;; This file is part of GNU Guix.
;;;
;;; GNU Guix is free software; you can redistribute it and/or modify it
;;; under the terms of the GNU General Public License as published by
;;; the Free Software Foundation; either version 3 of the License, or (at
;;; your option) any later version.
;;;
;;; GNU Guix is distributed in the hope that it will be useful, but
;;; WITHOUT ANY WARRANTY; without even the implied warranty of
;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;;; GNU General Public License for more details.
;;;
;;; You should have received a copy of the GNU General Public License
;;; along with GNU Guix.  If not, see <http://www.gnu.org/licenses/>.

(define-module (gnu build shepherd)
  #:use-module (gnu system file-systems)
  #:use-module (gnu build linux-container)
  #:use-module (guix build utils)
  #:use-module (srfi srfi-1)
  #:use-module (srfi srfi-26)
  #:use-module (ice-9 match)
  ;; XXX: Lazy-bind the Shepherd to avoid a compile-time dependency.
  #:autoload (shepherd service) (fork+exec-command
                                 read-pid-file
                                 exec-command
                                 %precious-signals)
  #:autoload (shepherd system) (unblock-signals)
  #:export (default-mounts
            make-forkexec-constructor/container
            fork+exec-command/container))

;;; Commentary:
;;;
;;; This module provides extensions to the GNU Shepherd.  In particular, it
;;; provides a helper to start services in a container.
;;;
;;; Code:

(define (clean-up file)
  (when file
    (catch 'system-error
      (lambda ()
        (delete-file file))
      (lambda args
        (unless (= ENOENT (system-error-errno args))
          (apply throw args))))))

(define-syntax-rule (catch-system-error exp)
  (catch 'system-error
    (lambda ()
      exp)
    (const #f)))

(define (default-namespaces args)
  ;; Most daemons are here to talk to the network, and most of them expect to
  ;; run under a non-zero UID.
  (fold delq %namespaces '(net user)))

(define* (default-mounts #:key (namespaces (default-namespaces '())))
  (define (tmpfs directory)
    (file-system
      (device "none")
      (mount-point directory)
      (type "tmpfs")
      (check? #f)))

  (define accounts
    ;; This is for processes in the default user namespace but living in a
    ;; different mount namespace, so that they can lookup users.
    (list (file-system-mapping
           (source "/etc/passwd") (target source))
          (file-system-mapping
           (source "/etc/group") (target source))))

  (append (cons (tmpfs "/tmp") %container-file-systems)
          (let ((mappings `(,@(if (memq 'net namespaces)
                                  '()
                                  %network-file-mappings)
                            ,@(if (and (memq 'mnt namespaces)
                                       (not (memq 'user namespaces)))
                                  accounts
                                  '())

                            ;; Tell the process what timezone we're in.  This
                            ;; makes sure that, for instance, its syslog
                            ;; messages have the correct timestamp.
                            ,(file-system-mapping
                              (source "/etc/localtime")
                              (target source))

                            ,%store-mapping)))    ;XXX: coarse-grain
            (map file-system-mapping->bind-mount
                 (filter (lambda (mapping)
                           (file-exists? (file-system-mapping-source mapping)))
                         mappings)))))

(define* (read-pid-file/container pid pid-file #:key (max-delay 5))
  "Read PID-FILE in the container namespaces of PID, which exists in a
separate mount and PID name space.  Return the \"outer\" PID. "
  (match (container-excursion* pid
           (lambda ()
             ;; XXX: Trick for Shepherd 0.9: prevent 'read-pid-file' from
             ;; using (@ (fibers) sleep), which would try to suspend the
             ;; current task, which doesn't work in this extra process.
             (with-continuation-barrier
              (lambda ()
                (read-pid-file pid-file
                               #:max-delay max-delay)))))
    (#f
     ;; Send SIGTERM to the whole process group.
     (catch-system-error (kill (- pid) SIGTERM))
     #f)
    ((? integer? container-pid)
     ;; XXX: When COMMAND is started in a separate PID namespace, its
     ;; PID is always 1, but that's not what Shepherd needs to know.
     pid)))

(define* (exec-command* command #:key user group log-file pid-file
                        (supplementary-groups '())
                        (directory "/") (environment-variables (environ)))
  "Like 'exec-command', but first restore signal handles modified by
shepherd (PID 1)."
  ;; First restore the default handlers.
  (for-each (cut sigaction <> SIG_DFL) %precious-signals)

  ;; Unblock any signals that have been blocked by the parent process.
  (unblock-signals %precious-signals)

  (mkdir-p "/var/run")
  (clean-up pid-file)

  (exec-command command
                #:user user
                #:group group
                #:supplementary-groups supplementary-groups
                #:log-file log-file
                #:directory directory
                #:environment-variables environment-variables))

(define* (make-forkexec-constructor/container command
                                              #:key
                                              (namespaces
                                               (default-namespaces args))
                                              (mappings '())
                                              (user #f)
                                              (group #f)
                                              (supplementary-groups '())
                                              (log-file #f)
                                              pid-file
                                              (pid-file-timeout 5)
                                              (directory "/")
                                              (environment-variables
                                               (environ))
                                              #:rest args)
  "This is a variant of 'make-forkexec-constructor' that starts COMMAND in
NAMESPACES, a list of Linux namespaces such as '(mnt ipc).  MAPPINGS is the
list of <file-system-mapping> to make in the case of a separate mount
namespace, in addition to essential bind-mounts such /proc."
  (define container-directory
    (match command
      ((program _  ...)
       (string-append "/var/run/containers/" (basename program)))))

  (define auto-mappings
    `(,@(if log-file
            (list (file-system-mapping
                   (source log-file)
                   (target source)
                   (writable? #t)))
            '())))

  (define mounts
    (append (map file-system-mapping->bind-mount
                 (append auto-mappings mappings))
            (default-mounts #:namespaces namespaces)))

  (lambda args
    (mkdir-p container-directory)

    (when log-file
      ;; Create LOG-FILE so we can map it in the container.
      (unless (file-exists? log-file)
        (close (open log-file (logior O_CREAT O_APPEND O_CLOEXEC) #o640))
        (when user
          (let ((pw (getpwnam user)))
            (chown log-file (passwd:uid pw) (passwd:gid pw))))))

    (let ((pid (run-container container-directory
                              mounts namespaces 1
                              (lambda ()
                                (exec-command* command
                                               #:user user
                                               #:group group
                                               #:supplementary-groups
                                               supplementary-groups
                                               #:pid-file pid-file
                                               #:log-file log-file
                                               #:directory directory
                                               #:environment-variables
                                               environment-variables)))))
      (if pid-file
          (if (or (memq 'mnt namespaces) (memq 'pid namespaces))
              (read-pid-file/container pid pid-file
                                       #:max-delay pid-file-timeout)
              (read-pid-file pid-file #:max-delay pid-file-timeout))
          pid))))

(define* (fork+exec-command/container command
                                      #:key pid
                                      #:allow-other-keys
                                      #:rest args)
  "This is a variant of 'fork+exec-command' procedure, that joins the
namespaces of process PID beforehand.  If there is no support for containers,
on Hurd systems for instance, fallback to direct forking."
  (define (strip-pid args)
    ;; TODO: Replace with 'strip-keyword-arguments' when that no longer pulls
    ;; in (guix config).
    (let loop ((args args)
               (result '()))
      (match args
        (()
         (reverse result))
        ((#:pid _ . rest)
         (loop rest result))
        ((head . rest)
         (loop rest (cons head result))))))

  (let ((container-support? (file-exists? "/proc/self/ns")))
    (if (and container-support?
             (not (and pid (= pid (getpid)))))
        (container-excursion* pid
          (lambda ()
            ;; Note: In the Shepherd 0.9, 'fork+exec-command' expects to be
            ;; called from the shepherd process (because it creates a pipe to
            ;; capture stdout/stderr and spawns a logging fiber) so we cannot
            ;; use it here.
            (match (primitive-fork)
              (0 (dynamic-wind
                   (const #t)
                   (lambda ()
                     (apply exec-command* command (strip-pid args)))
                   (lambda ()
                     (primitive-_exit 127))))
              (pid pid))))               ;XXX: assuming the same PID namespace
        (apply fork+exec-command command (strip-pid args)))))

;; Local Variables:
;; eval: (put 'container-excursion* 'scheme-indent-function 1)
;; End:

;;; shepherd.scm ends here