aboutsummaryrefslogtreecommitdiff
;;; SPDX-License-Identifier: CC0-1.0
;;;
;;; Copyright (C) 2023-2025 Wojtek Kosior <koszko@koszko.org>

(define-module (cantius)
  #:use-module (scheme base)

  #:use-module ((srfi srfi-1) #:prefix s1:)
  #:use-module ((srfi srfi-11) #:select (let*-values))
  #:use-module ((srfi srfi-26) #:select (cut))
  #:use-module ((srfi srfi-35) #:prefix s35:)
  #:use-module ((srfi srfi-128) #:select (make-default-comparator))
  #:use-module ((srfi srfi-146) #:prefix s146:)
  #:use-module ((srfi srfi-247) #:select (define-syntactic-monad))

  #:use-module ((ice-9 format) #:select (format))
  #:use-module ((ice-9 exceptions) #:select
                (exception-with-message? exception-message))
  #:use-module ((ice-9 regex) #:prefix reg:)

  #:use-module ((system repl debug) #:select (terminal-width))
  #:use-module ((web http) #:select (read-headers))
  #:use-module ((web request) #:prefix req:)
  #:use-module ((web response) #:prefix res:)
  #:use-module ((web uri) #:prefix uri:)

  #:use-module ((fibers web server) #:select (run-server))
  #:use-module ((de-paul-records) #:select
                (deftype match match-let match-let*))

  #:duplicates (last))

;;;
;;; Source of Cantius, a HTTP server library for Guile, based on Fibers.
;;;



;;;
;;; Condition types.
;;;

(export &not-found)
(export not-found-condition?)
(s35:define-condition-type &not-found s35:&condition
  not-found-condition?)

(export &forbidden)
(export forbidden-condition?)
(s35:define-condition-type &forbidden s35:&condition
  forbidden-condition?)

(export &bad-request)
(export bad-request-condition?)
(s35:define-condition-type &bad-request s35:&condition
  bad-request-condition?)

(export &condition-with-http-code)
(export condition-with-http-code?)
(export condition-http-code)
(s35:define-condition-type &condition-with-http-code s35:&condition
  condition-with-http-code?
  (http-code condition-http-code))



;;;
;;; Cantius syntactic monad.
;;;

(eval-when (compile load eval)
  (define-public $-args
    '(cant$resource-roots
      cant$redirect/normalize-path?
      cant$redirect/remove-query?
      cant$default-headers
      cant$http-code-handlers
      cant$other-http-codes-handler
      cant$request
      cant$body
      cant$current-path
      cant$current-endpoint
      cant$endpoint-matched-path))

  (export $)
  (eval `(define-syntactic-monad $ . ,$-args) (current-module)))



;;;
;;; Multipart form parser.
;;;

(deftype form-data-part
  #:export? #t
  name
  headers
  binary?
  data)

(define (hex-escape bytestring)
  (with-output-to-string
    (lambda ()
      (let loop ((idx 0))
        (when (< idx (bytevector-length bytestring))
          (let ((byte (bytevector-u8-ref bytestring idx)))
            (cond ((or (> byte 127)
                       (= byte (char->integer #\\)))
                   (format #t "\\x~2,'0x" byte))
                  (else
                   (display (integer->char byte)))))
          (loop (1+ idx)))))))

(define (hex-unescape string)
  ;; We assume a backslash is always followed by `x' and 2 digits (this is
  ;; indeed the case if the string has been generated by `hex-escape' above).
  (define byte-length
    (- (string-length string)
       (* 3 (string-count string #\\))))

  (define bytestring
    (make-bytevector byte-length))

  (let loop ((byte-idx 0)
             (str-idx 0))

    (define (write-string str)
      (string-fold (lambda (char char-idx)
                     (bytevector-u8-set! bytestring char-idx
                                         (char->integer char))
                     (1+ char-idx))
                   byte-idx str))

    (match (string-contains string "\\x" str-idx)

      (#f
       (write-string (substring string str-idx))
       bytestring)

      ((and (= (cut substring string str-idx <>) text-until)
            escape-idx)
       (let* ((byte-idx* (write-string text-until))
              (str-idx* (+ 4 escape-idx))
              (escape-hex (substring string (+ 2 escape-idx) str-idx*))
              (byte (string->number escape-hex 16)))
         (bytevector-u8-set! bytestring byte-idx* byte)
         (loop (1+ byte-idx*) str-idx*))))))

(define %part-regex
  ;; Headers, 2 new lines, actual data and one new line at the end.
  (make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$"))

(define %backslash-regex
  (make-regexp "\\\\"))

(define-public (parse-multipart/form-data boundary data)
  ;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of
  ;; procedure so we instead convert our data to ASCII-only string (with
  ;; non-ASCII bytes expressed as hex escapes) and use regexes to split this
  ;; string around the occurences of boundary in O(n) time.

  (define boundary*
    ;; A backslash is theoretically legal in boundary, let's make sure it's also
    ;; escaped.
    (format #f "--~a"
            (reg:regexp-substitute/global #f %backslash-regex boundary
                                          'pre "\\x5c" 'post)))

  (define data*
    (hex-escape data))

  (define parts
    (let loop ((idx 0)
               (parts '()))
      (match (string-contains data* boundary* idx)

        (#f
         (unless (string-prefix? "--\r\n" (substring data* idx))
           (raise (s35:condition
                   (&bad-request)
                   (s35:&message
                    (message (format #f "Last boundary in multipart/form-data ~
                                         should end with extra double hyphen ~
                                         followed by CRLF"))))))
         ;; Discard the last "part" (actually, the epilogue).
         (reverse parts))

        ((and (= (cut substring data* idx <>) part)
              (= (cut + <> (string-length boundary*)) idx*))
         ;; Discard the first "part" (actually, the preamble).
         (loop idx* (if (= idx 0)
                        parts
                        (cons part parts)))))))

  (map (lambda (part)
         (match-let* (((and (= (cut reg:match:substring <> 1) headers-string)
                            (= (cut reg:match:substring <> 3) data))
                       (regexp-exec %part-regex part))

                      (headers
                       (call-with-input-string headers-string read-headers))

                      (('form-data . (= (cut assq-ref <> 'name)
                                        (? string? name)))
                       (assq-ref headers 'content-disposition))

                      (content-transfer-encoding
                       (assq-ref headers 'content-transfer-encoding)))

           (unless (member content-transfer-encoding '(#f "binary"))
             (raise (s35:condition
                     (&condition-with-http-code
                      (http-code 415)) ;; Unsupported Media Type
                     (s35:&message
                      (message (format #f "Content-Transfer-Encoding of type ~
                                           \"~a\" is not currently supported ~
                                           for multipart/form-data parts, ~
                                           please use \"binary\" instead"
                                       content-transfer-encoding))))))

           (form-data-part
            name
            headers
            (binary? (not (member (assq-ref headers 'content-type)
                                  '(#f (text/plain)))))
            (data (let ((data-binary (hex-unescape data)))
                    (if binary?
                        data-binary
                        (utf8->string data-binary)))))))
       parts))

(export multipart/form-data)
($ define (multipart/form-data)
  (match (req:request-content-type cant$request)

    (('multipart/form-data
      . (= (cut assq-ref <> 'boundary) (? identity boundary)))
     (parse-multipart/form-data boundary cant$body))

    (_
     '())))

(define (parts->alist parts)
  ;; This shouldn't be strongly relevant but we make later parts appear
  ;; earlier in the alist so that they take precedence when `assoc' is used.
  (s1:fold (lambda (part acc)
             `((,(form-data-part-name part) . ,part)
               . ,acc))
           '() parts))

(define-public parse-multipart/form-data*
  (compose parts->alist parse-multipart/form-data))

(define-public multipart/form-data*
  (compose parts->alist multipart/form-data))



;;;
;;; Several smaller helper utilities.
;;;

;; Operations on URI paths.

(define-public (normalize-path path)
  (define absolute?
    (eqv? #\/ (string-ref path 0)))

  (let loop ((parent-walks 0)
             (processed '())
             (to-process (string-split path #\/)))
    (match (list parent-walks processed to-process)
      ((0 () ())
       (if absolute? "/" "."))
      ((_ _ ())
       (string-join (append (map (const "..") (iota parent-walks))
                            (reverse processed))
                    "/"
                    (if absolute? 'prefix 'infix)))
      ((_ _ ((? (cut member <> '("" ".")))
             . rest))
       (loop parent-walks processed rest))
      ((_ () (".." . rest))
       (loop (1+ parent-walks) '() rest))
      ((_ (_ . processed-rest) (".." . rest))
       (loop parent-walks processed-rest rest))
      ((_ _ (segment . rest))
       (loop parent-walks (cons segment processed) rest)))))

(define %illegal-path-regex
  ;; Assume normalized path, forbid parent directory ref.
  (make-regexp "^/?[.][.](/.*)?$"))

(export find-resource-file)
(define* (find-resource-file file root-paths)
  (define normalized-file
    (normalize-path file))

  (when (regexp-exec %illegal-path-regex normalized-file)
    (raise (s35:condition
            (&forbidden)
            (s35:&message
             (message (format #f "Illegal resource path ~a" file))))))

  (let loop ((paths root-paths))
    (match paths
      (()
       (raise (s35:condition
               (&not-found)
               (s35:&message
                (message (format #f "Resource not found ~a" file))))))
      ((? string?)
       (loop (list root-paths)))
      (((= (cut format #f "~a/~a" <> normalized-file) file-path)
        . paths-rest)
       (or (and (stat file-path #f) file-path)
           (loop paths-rest))))))

;; Cookie parser.

(define %cookie-regex
  (make-regexp (string-append
                "^\\s*"
                "([^[:space:];=]([^;=]*[^[:space:];=])?)" ;; name
                "\\s*=\\s*"
                "(([^[:space:];]([^;]*[^[:space:];])?)?)" ;; value
                "\\s*(;|$)")))

(define-public (parse-cookies cookies)
  (let loop ((idx 0)
             (cookies-alist '()))
    (match (regexp-exec %cookie-regex cookies idx)

      (#f
       (when (string-index cookies char-set:graphic idx)
         (raise (s35:condition
                 (&bad-request)
                 (s35:&message
                  (message "Unparseable cookie")))))
       (reverse cookies-alist))

      ((and (= (cut reg:match:substring <> 1) name)
            (= (cut reg:match:substring <> 3) value)
            (= reg:match:end idx*))
       (loop idx* `((,name . ,value) . ,cookies-alist))))))

(export cookies)
($ define (cookies)
  (or (and=> (assq-ref (req:request-headers cant$request) 'cookie)
             parse-cookies)
      '()))

;; Sugar over `build-response', with support for easy redirects.

(export build-response*)
(define* (build-response* headers #:key (redirect-to #f) (code #f) #:rest args)
  (define code*
    (or code (if redirect-to 301 200)))

  (define redirect-to*
    (if (string? redirect-to)
        (uri:string->uri redirect-to)
        redirect-to))

  (define headers*
    (if redirect-to*
        `((location . ,redirect-to*) . ,headers)
        headers))

  (let loop ((to-filter args)
             (filtered '()))
    (match to-filter
      (()
       (apply res:build-response #:code code* #:headers headers*
              (reverse filtered)))
      (((? (cut memq <> '(#:code #:redirect-to))) _ . rest)
       (loop rest filtered))
      ((keyword keyword-arg . rest)
       (loop rest (cons* keyword-arg keyword filtered))))))

(export response)
($ define (response . rest)
  (apply build-response* cant$default-headers rest))



;;;
;;; Default values to use as monadic arguments.
;;;

(define %default-headers
  '((content-type . (text/plain (charset . "utf-8")))))

(define (make-code-handler-pair http-code default-message)
  (cons http-code
        ($ lambda (. rest)
          (values (build-response* %default-headers #:code http-code)
                  (or (s1:last rest) default-message)))))

(define %default-http-code-handlers
  (map (cut apply make-code-handler-pair <>)
       `((404 "There's no page with this address :(")
         (403 "You. Shall not. Pass. (forbidden)")
         (400 ,(format #f "Bad Request (you or your software seems to have ~
                           messed sth up).")))))

($ define (default-other-http-codes-handler . rest)
  (match-let (((message #:message http-code #:http-code . _) (reverse rest)))
    (values (build-response* %default-headers #:code http-code)
            (or message (format #f "HTTP code ~d" http-code)))))

($ define (get-code-handler code)
   (or (and=> (assv code cant$http-code-handlers) cdr)
       cant$other-http-codes-handler))



;;;
;;; Trees of endsets and endpoints.
;;;

(define (any->proc-list list-or-proc)
  (if (procedure? list-or-proc)
      (list list-or-proc)
      list-or-proc))

(deftype endpoint
  #:export? yes,of-course
  (handler #:default (lambda _ (raise (s35:condition (s35:&error)))))
  (path #:default '())
  (arg-converters #:default '())
  #:finalize (cut *endpoint
                  #:<- <>
                  (arg-converters #:=> any->proc-list)))

(deftype endpoint-tree-node
  (direct-match #:default #f)
  (any-match #:default #f)
  (children #:default (s146:mapping (make-default-comparator))))

(deftype endset
  #:export? sure,why-not
  (endpoints #:default (endpoint-tree-node))
  (arg-converters #:default '())
  #:finalize (cut *endset
                  #:<- <>
                  (arg-converters #:=> any->proc-list)))

(define (endpoint-tree-+ tree path what)
  (let recurse ((segments path)
                (tree-node tree))
    (define (processed-segments)
      (list-head path (- (length path) (length segments))))

    (match-let ((($* endpoint-tree-node direct-match any-match children)
                 tree-node))
      (when (endset? direct-match)
        (raise (s35:condition
                (s35:&error)
                (s35:&message
                 (message (format #f "Endset already registered for ~s"
                                  (processed-segments)))))))

      (match segments
        (()
         (cond (direct-match
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "Endpoint already registered for ~
                                              ~s"
                                          path))))))
               ((and (endset? what) (not (s146:mapping-empty? children)))
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "Endpoints already registered ~
                                              under ~s"
                                          path))))))
               (#t
                (endpoint-tree-node
                 #:<- tree-node
                 (direct-match what)))))

        ('any
         (cond (any-match
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "\"any\" endpoint already ~
                                              registered for ~s"
                                          path))))))
               ((endset? what)
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "endset should be registered at ~
                                              normal path, not ~s"
                                          path))))))
               (#t
                (endpoint-tree-node
                 #:<- tree-node
                 (any-match what)))))

        (((? string? current-segment) . segments-rest)
         (endpoint-tree-node
          #:<- tree-node
          (children (s146:mapping-update/default
                     children current-segment (cut recurse segments-rest <>)
                     %null-endpoint-tree-node))))))))

(define-public (endset-+ endset-obj path what)
  (endset
   #:<- endset-obj
   (endpoints (endpoint-tree-+ (endset-endpoints endset-obj) path what))))

(define-syntactic-monad $query
  qy$end qy$endpoint-matched-path qy$arg-converters)

(define %default-endpoint
  (endpoint (handler ($ lambda (. rest)
                       ($ (cut apply ($ get-code-handler () 404) <...>)
                          () (append rest '(#:http-code 404 #:message #f)))))
            (arg-converters ($ lambda (. rest)
                              ($ (cut apply values <...>)
                                 ((cant$redirect/normalize-path? #f)
                                  (cant$redirect/remove-query? #f)))))))

(define (query-endset endset path-segments)
  ($query let loop ((qy$end %default-endpoint)
                    (qy$endpoint-matched-path path-segments)
                    (qy$arg-converters (reverse (endset-arg-converters endset)))
                    (current-segments path-segments)
                    (current-place (endset-endpoints endset)))
    ($query define (descend-subnode)
      (match-let (((current-segment . segments-rest) current-segments)
                  (($* endpoint-tree-node children) current-place))
        (cond ((s146:mapping-ref/default children current-segment #f)
               => (cut $query loop () segments-rest <>))
              (else
               ($query values)))))

    (cond ((endset? current-place)
           (let ((new-converters (reverse (endset-arg-converters endset))))
             ($query loop ((qy$endpoint-matched-path current-segments)
                           (qy$arg-converters (append new-converters
                                                      qy$arg-converters)))
                     current-segments (endset-endpoints current-place))))

          ((null? current-segments)
           (cond ((endpoint-tree-node-direct-match current-place)
                  => (lambda (end)
                       ($query values ((qy$end end)))))
                 (else
                  ($query values))))

          ((endpoint-tree-node-any-match current-place)
           ;; Remember that `end' can only be an endpoint here — we don't allow
           ;; endsets with `any-match'.
           => (lambda (end)
                ($query descend-subnode ((qy$end end)))))

          (else
           ($query descend-subnode)))))



;;;
;;; Macro layer for endpoint definitions.
;;;

(eval-when (compile load eval)
  (deftype path-spec
    (path-stx)
    (path-rest-stx))

  (define (syntax->path-spec x)
    (let loop ((path-segments '())
               (path-stx x))
      (syntax-case path-stx (unquote unquote-splicing)
        (()
         (path-spec (path-stx #``(#,@(reverse path-segments)))
                    (path-rest-stx #f)))
        (rest-var
         (identifier? #'rest-var)
         (path-spec (path-stx #``(#,@(reverse path-segments)))
                    (path-rest-stx #'rest-var)))
        ((unquote list-expression)
         (path-spec (path-stx #``(#,@(reverse path-segments)
                                     . ,list-expression))
                    (path-rest-stx #f)))
        ((segment-arg . rest)
         (identifier? #'segment-arg)
         (raise (s35:condition
                 (s35:&error)
                 (s35:&message
                  (message (format #f "Binding identifiers to path segments ~
                                       not yet supported"))))))
        (((unquote segment-expression) . rest)
         (loop (cons #',segment-expression path-segments) #'rest))
        (((unquote-splicing segments-expression) . rest)
         (loop (cons #',@segments-expression path-segments) #'rest))
        ((segment . rest)
         (string? (syntax->datum #'segment))
         (loop (cons #'segment path-segments) #'rest))))))

(export define-endpoint)
(define-syntax define-endpoint
  (lambda (x)
    (syntax-case x ()
      ((source-stx endset-identifier (handler-identifier . handler-formals)
                   . rest)
       #'(source-stx endset-identifier #f (handler-identifier . handler-formals)
                     . rest))

      ((source-stx endset-identifier maybe-monad-identifier
                   (handler-identifier . handler-formals)
                   path arg-converters-list
                   handler-body handler-body-rest ...)
       (and (identifier? #'endset-identifier)
            (identifier? #'handler-identifier))

       (match-let* ((($* path-spec path-stx path-rest-stx)
                     (syntax->path-spec #'path))

                    (full-path-stx #``(,@#,path-stx . #,(or path-rest-stx '())))
                    (stx (cut datum->syntax #'source-stx <>))
                    (endpoint-matched-path (stx 'cant$endpoint-matched-path))

                    (lambda-stx
                     #`(lambda (. handler-formals)
                         (let (#,@(if path-rest-stx
                                      #`((#,path-rest-stx
                                          (s1:drop #,endpoint-matched-path
                                                   path-len)))
                                      '()))
                           handler-body handler-body-rest ...))))

         #`(begin
             (define evaluated-path
               #,path-stx)

             (define handler-identifier
               (let ((path-len (length evaluated-path)))
                 #,(if (syntax->datum #'maybe-monad-identifier)
                       #`(maybe-monad-identifier . #,lambda-stx)
                       ((macro-transformer
                        (module-ref (resolve-module '(cantius)) '$))
                        #`(source-stx . #,lambda-stx)))))

             (define endset-identifier
               (endset-+ endset-identifier
                         #,(if path-rest-stx
                               #``(,@evaluated-path . any)
                               #'evaluated-path)
                         (endpoint (arg-converters arg-converters-list)
                                   (handler handler-identifier))))))))))



;;;
;;; Dispatching of requests to handlers, with error handling.
;;;

(define %ugly-uri-path-regex
  (make-regexp ".+/(/|$)"))

(define (exception->msg ex)
  (format #f "~a~%~a~%"
          (if (exception-with-message? ex)
              (format #f "Error: ~a." (exception-message ex))
              "Error occured.")
          ex))

(define (bt-string)
  (call-with-output-string (lambda (port)
                             (terminal-width 10000)
                             (display-backtrace (make-stack #t 5 0) port))))

($ define (meta-handler/no-redirect . rest)
  (define (invoke-http-code-handler code ex)
    (let ((msg (and (s35:condition-has-type? ex s35:&message)
                    (s35:condition-message ex))))
      ($ (cut apply ($ get-code-handler () code) <...>)
         () (append rest `(#:http-code ,code #:message ,msg)))))

  (guard (ex ((s35:condition-has-type? ex &not-found)
              (invoke-http-code-handler 404 ex))
             ((s35:condition-has-type? ex &forbidden)
              (invoke-http-code-handler 403 ex))
             ((s35:condition-has-type? ex &bad-request)
              (invoke-http-code-handler 400 ex))
             ((s35:condition-has-type? ex &condition-with-http-code)
              (invoke-http-code-handler (condition-http-code ex) ex)))
    (match ($ (cut (compose list apply) (endpoint-handler cant$current-endpoint)
                   <...>)
              () rest)
      ((response-body)
       (values (build-response* cant$default-headers) response-body))
      ((response response-body)
       (values response response-body))
      (something-else
            (values (build-response* %default-headers #:code 500)
                    (format #f "Invalid return values from handler:~%~a"
                            something-else))))))

(define (web-server-handler request body root-endset)
  (let* ((uri (req:request-uri request))
         (path-string (uri:uri-path uri))
         (path-segments (uri:split-and-decode-uri-path path-string)))
    ($ define (meta-handler . rest)
      (define (normalized-uri-path-string)
        (format #f "/~a" (uri:encode-and-join-uri-path path-segments)))

      (define (redirect-path)
        (if cant$redirect/normalize-path?
            (normalized-uri-path-string)
            path-string))

      (define (redirect-query)
        (if cant$redirect/remove-query?
            #f
            (uri:uri-query uri)))

      (if (or (and cant$redirect/normalize-path?
                   (regexp-exec %ugly-uri-path-regex path-string))
              (and cant$redirect/remove-query?
                   (uri:uri-query uri)))

          (values (build-response* %default-headers
                                   #:code 307
                                   #:redirect-to (uri:build-relative-ref
                                                  #:path (redirect-path)
                                                  #:query (redirect-query)))
                  "307 Temporary Redirect...")

          ($ (cut apply meta-handler/no-redirect <...>) () rest)))

    (guard (ex ((and (getenv "CANTIUS_DEBUG")
                     (bt-string))
                => (lambda (backtrace)
                     (let* ((desc (format #f "~a~%~
                                              Backtrace:~%~%~a"
                                          (exception->msg ex) backtrace)))
                       (format (current-error-port) "In ~a request to `~a':~%~%"
                               (symbol->string (req:request-method request))
                               path-string)
                       (display desc (current-error-port))
                       (values (build-response* %default-headers #:code 500)
                               desc))))
               (else
                (display (exception->msg ex) (current-error-port))
                (values (build-response* %default-headers #:code 500)
                        "Internal server error.")))

      (start-stack 'cantius-request
        ($query let*-values ((() (query-endset root-endset path-segments)))
          ($ (apply compose meta-handler
                    (append (reverse (endpoint-arg-converters qy$end))
                            qy$arg-converters))
             ((cant$resource-roots '())
              (cant$redirect/normalize-path? #t)
              (cant$redirect/remove-query? #t)
              (cant$default-headers %default-headers)
              (cant$http-code-handlers %default-http-code-handlers)
              (cant$other-http-codes-handler default-other-http-codes-handler)
              (cant$request request)
              (cant$body body)
              (cant$current-path path-segments)
              (cant$current-endpoint qy$end)
              (cant$endpoint-matched-path qy$endpoint-matched-path))))))))

(define-public (run-cantius endset . server-args)
  (apply run-server (cut web-server-handler <> <> endset) server-args))