aboutsummaryrefslogtreecommitdiff
;;; SPDX-License-Identifier: CC0-1.0
;;;
;;; Copyright (C) 2023, 2024 Wojtek Kosior <koszko@koszko.org>

(define-module (cantius)
  #:use-module ((rnrs bytevectors) #:prefix bv:)
  #:use-module ((srfi srfi-1) #:select (filter-map))
  #:use-module ((srfi srfi-26) #:select (cut))
  #:use-module ((srfi srfi-34) #:select (guard raise))
  #:use-module ((srfi srfi-35) #:prefix s35:)
  #:use-module ((srfi srfi-39) #:select (with-parameters*))
  #:use-module ((srfi srfi-128) #:select (make-default-comparator))
  #:use-module ((srfi srfi-146) #:prefix s146:)
  #:use-module ((ice-9 format) #:select (format))
  #:use-module ((ice-9 control) #:select (let/ec))
  #:use-module ((ice-9 exceptions) #:select
                (exception-with-message? exception-message))
  #:use-module ((ice-9 regex) #:select
                (match:substring match:end regexp-substitute/global))
  #:use-module ((system repl debug) #:select (terminal-width))
  #:use-module ((web http) #:select (read-headers))
  #:use-module ((web request) #:select (request-uri))
  #:use-module ((web response) #:select (build-response))
  #:use-module ((web uri) #:prefix uri:)
  #:use-module ((fibers web server) #:select (run-server))
  #:use-module ((de-paul-records) #:select
                (define-immutable-record-type* match match-let match-let*
                 match-lambda)))



(export &not-found)
(export not-found-condition?)
(s35:define-condition-type &not-found s35:&condition
  not-found-condition?)

(export &forbidden)
(export forbidden-condition?)
(s35:define-condition-type &forbidden s35:&condition
  forbidden-condition?)

(export &bad-request)
(export bad-request-condition?)
(s35:define-condition-type &bad-request s35:&condition
  bad-request-condition?)

(export &condition-with-http-code)
(export condition-with-http-code?)
(export condition-http-code)
(s35:define-condition-type &condition-with-http-code s35:&condition
  condition-with-http-code?
  (http-code condition-http-code))

(define-public (normalize-path path)
  (define absolute?
    (eqv? #\/ (string-ref path 0)))

  (let loop ((parent-walks 0)
             (processed '())
             (to-process (string-split path #\/)))
    (match (list parent-walks processed to-process)
      ((0 () ())
       (if absolute? "/" "."))
      ((_ _ ())
       (string-join (append (map (const "..") (iota parent-walks))
                            (reverse processed))
                    "/"
                    (if absolute? 'prefix 'infix)))
      ((_ _ ((? (cut member <> '("" ".")))
             . rest))
       (loop parent-walks processed rest))
      ((_ () (".." . rest))
       (loop (1+ parent-walks) '() rest))
      ((_ (_ . processed-rest) (".." . rest))
       (loop parent-walks processed-rest rest))
      ((_ _ (segment . rest))
       (loop parent-walks (cons segment processed) rest)))))

(define %illegal-path-regex
  ;; Assume normalized path, forbid parent directory ref.
  (make-regexp "^/?[.][.](/.*)?$"))

(export find-resource-file)
(define* (find-resource-file file #:optional (root-path (%resource-root-path)))
  (define normalized-file
    (normalize-path file))

  (when (regexp-exec %illegal-path-regex normalized-file)
    (raise (s35:condition
            (&forbidden)
            (s35:&message
             (message (format #f "Illegal resource path ~a" file))))))

  (let loop ((paths root-path))
    (match paths
      (()
       (raise (s35:condition
               (&not-found)
               (s35:&message
                (message (format #f "Resource not found ~a" file))))))
      ((? string?)
       (loop (list root-path)))
      (((= (cut format #f "~a/~a" <> normalized-file) file-path)
        . paths-rest)
       (or (and (stat file-path #f) file-path)
           (loop paths-rest))))))



;;;
;;; Multipart form parser
;;;

(define-immutable-record-type* form-data-part
  #:export? #t
  (name)
  (headers)
  (binary?)
  (data))

(define (hex-escape bytestring)
  (with-output-to-string
    (lambda ()
      (let loop ((idx 0))
        (when (< idx (bv:bytevector-length bytestring))
          (let ((byte (bv:bytevector-u8-ref bytestring idx)))
            (cond ((or (> byte 127)
                       (= byte (char->integer #\\)))
                   (format #t "\\x~2,'0x" byte))
                  (else
                   (display (integer->char byte)))))
          (loop (1+ idx)))))))

(define (hex-unescape string)
  ;; We assume a backslash is always followed by `x' and 2 digits (this is
  ;; indeed the case if the string has been generated by `hex-escape' above).
  (define byte-length
    (- (string-length string)
       (* 3 (string-count string #\\))))

  (define bytestring
    (bv:make-bytevector byte-length))

  (let loop ((byte-idx 0)
             (str-idx 0))

    (define (write-string str)
      (string-fold (lambda (char char-idx)
                     (bv:bytevector-u8-set! bytestring char-idx
                                            (char->integer char))
                     (1+ char-idx))
                   byte-idx str))

    (match (string-contains string "\\x" str-idx)

      (#f
       (write-string (substring string str-idx))
       bytestring)

      ((and (= (cut substring string str-idx <>) text-until)
            escape-idx)
       (let* ((byte-idx* (write-string text-until))
              (str-idx* (+ 4 escape-idx))
              (escape-hex (substring string (+ 2 escape-idx) str-idx*))
              (byte (string->number escape-hex 16)))
         (bv:bytevector-u8-set! bytestring byte-idx* byte)
         (loop (1+ byte-idx*) str-idx*))))))

(define %part-regex
  ;; Headers, 2 new lines, actual data and one new line at the end.
  (make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$"))

(define %backslash-regex
  (make-regexp "\\\\"))

(define-public (parse-multipart/form-data boundary data)
  ;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of
  ;; procedure so we instead convert our data to ASCII-only string (with
  ;; non-ASCII bytes expressed as hex escapes) and use regexes to split this
  ;; string around the occurences of boundary in O(n) time.

  (define boundary*
    ;; A backslash is theoretically legal in boundary, let's make sure it's also
    ;; escaped.
    (format #f "--~a"
            (regexp-substitute/global #f %backslash-regex boundary
                                      'pre "\\x5c" 'post)))

  (define data*
    (hex-escape data))

  (define parts
    (let loop ((idx 0)
               (parts '()))
      (match (string-contains data* boundary* idx)

        (#f
         (unless (string-prefix? "--\r\n" (substring data* idx))
           (raise (s35:condition
                   (&bad-request)
                   (s35:&message
                    (message "Last boundary in multipart/form-data should end with extra double hyphen followed by CRLF")))))
         ;; Discard the last "part" (actually, the epilogue).
         (reverse parts))

        ((and (= (cut substring data* idx <>) part)
              (= (cut + <> (string-length boundary*)) idx*))
         ;; Discard the first "part" (actually, the preamble).
         (loop idx* (if (= idx 0)
                        parts
                        (cons part parts)))))))

  (map (lambda (part)
         (match-let* (((and (= (cut match:substring <> 1) headers-string)
                            (= (cut match:substring <> 3) data))
                       (regexp-exec %part-regex part))

                      (headers
                       (call-with-input-string headers-string read-headers))

                      (('form-data . (= (cut assq-ref <> 'name)
                                        (? string? name)))
                       (assq-ref headers 'content-disposition))

                      (content-transfer-encoding
                       (assq-ref headers 'content-transfer-encoding)))

           (unless (member content-transfer-encoding '(#f "binary"))
             (raise (s35:condition
                     (&condition-with-http-code
                      (http-code 415)) ;; Unsupported Media Type
                     (s35:&message
                      (message (format #f "Content-Transfer-Encoding of type \"~a\" is not currently supported for multipart/form-data parts, please use \"binary\" instead"
                                       content-transfer-encoding))))))

           (form-data-part
            name
            headers
            (binary? (not (member (assq-ref headers 'content-type)
                                  '(#f (text/plain)))))
            (data (let ((data-binary (hex-unescape data)))
                    (if binary?
                        data-binary
                        (bv:utf8->string data-binary)))))))
       parts))



;;;
;;; Cookie parser
;;;

(define %cookie-regex
  (make-regexp (string-append
                "^\\s*"
                "([^[:space:];=]([^;=]*[^[:space:];=])?)" ;; name
                "\\s*=\\s*"
                "(([^[:space:];]([^;]*[^[:space:];])?)?)" ;; value
                "\\s*(;|$)")))

(define-public (parse-cookies cookies)
  (let loop ((idx 0)
             (cookies-alist '()))
    (match (regexp-exec %cookie-regex cookies idx)

      (#f
       (when (string-index cookies char-set:graphic idx)
         (raise (s35:condition
                 (&bad-request)
                 (s35:&message
                  (message "Unparseable cookie")))))
       (reverse cookies-alist))

      ((and (= (cut match:substring <> 1) name)
            (= (cut match:substring <> 3) value)
            (= match:end idx*))
       (loop idx* `((,name . ,value) . ,cookies-alist))))))



(define-public %current-path
  (make-parameter #f))

(define-public %current-path-string
  (make-parameter #f))

(define-public %resource-root-path
  (make-parameter '()))

(define-public %redirect/normalize-path?
  (make-parameter #t))

(define-public %redirect/remove-query?
  (make-parameter #t))

(define *%default-headers
  '((content-type . (text/plain (charset . "utf-8")))))

(define-public %default-headers
  (make-parameter *%default-headers))

(define (make-handler-parameter http-code default-message)
  (make-parameter (lambda* (request body #:key message)
                    (values (build-response* #:headers *%default-headers
                                             #:code http-code)
                            (or message default-message)))))

(define-public %not-found-handler
  (make-handler-parameter 404 "There's no page with this address :("))

(define-public %forbidden-handler
  (make-handler-parameter 403 "You. Shall not. Pass. (forbidden)"))

(define-public %bad-request-handler
  (make-handler-parameter 400 "Bad Request (you or your software seems to have messed sth up)."))

(define-public %default-handler
  (let ((not-found-handler (%not-found-handler)))
    (make-parameter (lambda (request body . _)
                      (not-found-handler request body)))))

(define-public %other-codes-handler
  (make-parameter (lambda* (request body http-code #:key message)
                    (values (build-response* #:headers *%default-headers
                                             #:code http-code)
                            (or message
                                (format #f "HTTP code ~d" http-code))))))



(define-immutable-record-type* endpoint
  (handler #:default (lambda _ (raise (s35:condition (s35:&error)))))
  (path #:default '())
  (parameters #:default '())
  #:export? yes,of-course)

(define-immutable-record-type* endpoint-tree-node
  (direct-match #:default #f)
  (any-match #:default #f)
  (children #:default (s146:mapping (make-default-comparator))))

(define-immutable-record-type* endset
  (endpoints #:default (endpoint-tree-node))
  (parameters #:default '())
  #:export? sure,why-not)

(define (endpoint-tree-+ tree path what)
  (let recurse ((segments path)
                (tree-node tree))
    (define (processed-segments)
      (list-head path (- (length path) (length segments))))

    (match-let ((($* endpoint-tree-node direct-match any-match children)
                 tree-node))
      (when (endset? direct-match)
        (raise (s35:condition
                (s35:&error)
                (s35:&message
                 (message (format #f "Endset already registered for ~s"
                                  (processed-segments)))))))

      (match segments
        (()
         (cond (direct-match
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "Endpoint already registered for ~s"
                                          path))))))
               ((and (endset? what) (not (s146:mapping-empty? children)))
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "Endpoints already registered under ~s"
                                          path))))))
               (#t
                (endpoint-tree-node
                 #:<- tree-node
                 (direct-match what)))))

        ('any
         (cond (any-match
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "\"any\" endpoint already registered for ~s"
                                          path))))))
               ((endset? what)
                (raise (s35:condition
                        (s35:&error)
                        (s35:&message
                         (message (format #f "endset should be registered at normal path, not ~s"
                                          path))))))
               (#t
                (endpoint-tree-node
                 #:<- tree-node
                 (any-match what)))))

        (((? string? current-segment) . segments-rest)
         (endpoint-tree-node
          #:<- tree-node
          (children (s146:mapping-update/default
                     children current-segment (cut recurse segments-rest <>)
                     %null-endpoint-tree-node))))))))

(define-public (endset-+ endset-obj path what)
  (endset
   #:<- endset-obj
   (endpoints (endpoint-tree-+ (endset-endpoints endset-obj) path what))))

(eval-when (compile load eval)
  (define-immutable-record-type* path-spec
    (path-syntax)
    (extra-handler-args-syntax))

  (define (syntax->path-spec x)
    (let loop ((path-segments '())
               (path-syntax x))
      (syntax-case path-syntax (unquote unquote-splicing)
        (()
         (path-spec (path-syntax #``(#,@(reverse path-segments)))
                    (extra-handler-args-syntax #'())))
        (rest-arg
         (identifier? #'rest-arg)
         (path-spec (path-syntax #``(#,@(reverse path-segments) . any))
                    (extra-handler-args-syntax #'(rest-arg))))
        ((unquote list-expression)
         (path-spec (path-syntax #``(#,@(reverse path-segments)
                                     . ,list-expression))
                    (extra-handler-args-syntax #'())))
        ((segment-arg . rest)
         (identifier? #'segment-arg)
         (raise (s35:condition
                 (s35:&error)
                 (s35:&message
                  (message "Binding identifiers to path segments not yet supported")))))
        (((unquote segment-expression) . rest)
         (loop (cons #',segment-expression path-segments) #'rest))
        (((unquote-splicing segments-expression) . rest)
         (loop (cons #',@segments-expression path-segments) #'rest))
        ((segment . rest)
         (string? (syntax->datum #'segment))
         (loop (cons #'segment path-segments) #'rest))))))

(export define-endpoint)
(define-syntax define-endpoint
  (lambda (x)
    (syntax-case x ()
      ((_ endset-identifier handler-identifier path parameters-list
          handler-body handler-body-rest ...)
       (and (identifier? #'webservice-identifier)
            (identifier? #'handler-identifier))
       (match (syntax->path-spec #'path)
         (($* path-spec path-syntax extra-handler-args-syntax)
          #`(begin
              (define (handler-identifier
                       #,(datum->syntax #'handler-identifier 'request)
                       #,(datum->syntax #'handler-identifier 'body)
                       . #,extra-handler-args-syntax)
                handler-body handler-body-rest ...)

              (define endset-identifier
                (endset-+ endset-identifier #,path-syntax
                          (endpoint (parameters parameters-list)
                                    (handler handler-identifier)))))))))))

(define-immutable-record-type* endpoint-ref-result
  (path-tail)
  (endpoint)
  (parameters)
  #:finalize (match-lambda
               ((and result ($* endpoint-ref-result (parameters #f) endpoint))
                (let ((parameters (match endpoint
                                    (($* endpoint parameters)
                                     parameters)
                                    (($* endset parameters)
                                     parameters))))
                  (endpoint-ref-result #:<- result parameters)))
               (result
                result)))

(define (default-endpoint)
  (endpoint (handler (%default-handler))
            (parameters `((,%redirect/normalize-path? . #f)
                          (,%redirect/remove-query? . #f)))))

(define* (endpoint-tree-ref tree path #:key
          (default-endpoint (default-endpoint)))
  (let loop ((segments path)
             (tree-node tree)
             (best-any-match-endpoint default-endpoint)
             (best-any-match-path #f))
    (define (default-result)
      (endpoint-ref-result (path-tail best-any-match-path)
                           (endpoint best-any-match-endpoint)))

    (match segments
      (()
       (match tree-node
         (($* endpoint-tree-node (direct-match (? identity endpoint)))
          (endpoint-ref-result (path-tail #f) endpoint))
         (_
          (default-result))))

      ((current-segment . segments-rest)
       (match-let* (((best-any-match-endpoint best-any-match-path)
                     (or (and=> (endpoint-tree-node-any-match tree-node)
                                (cut list <> segments))
                         (list best-any-match-endpoint best-any-match-path)))
                    (children (endpoint-tree-node-children tree-node))
                    (subnode (s146:mapping-ref/default children current-segment
                                                       #f)))
         (cond (subnode
                (loop segments-rest subnode
                      best-any-match-endpoint best-any-match-path))
               (best-any-match-endpoint
                (endpoint-ref-result (path-tail best-any-match-path)
                                     (endpoint best-any-match-endpoint)))
               (#t
                (default-result))))))))

(define* (query-endset endset path #:optional (default #f))
  (match-let loop ((($* endset endpoints (parameters endset-parameters)) endset)
                   (path path)
                   (parameters-lists '()))
    (match (endpoint-tree-ref endpoints path)
      (($* endpoint-ref-result
           path-tail
           (endpoint (? endset? next-endset)))
       (loop next-endset path-tail (cons endset-parameters parameters-lists)))
      (result
       (endpoint-ref-result
        #:<- result
        (parameters #:-> (apply append (reverse (cons* parameters
                                                       endset-parameters
                                                       parameters-lists)))))))))



(export build-response*)
(define* (build-response* #:key (redirect-to #f) (headers (%default-headers))
          (code #f) #:rest args)
  (define code*
    (or code (if redirect-to 301 200)))

  (define redirect-to*
    (if (string? redirect-to)
        (uri:string->uri redirect-to)
        redirect-to))

  (define headers*
    (if redirect-to*
        `((location . ,redirect-to*) . ,headers)
        headers))

  (let loop ((to-filter args)
             (filtered '()))
    (match to-filter
      (()
       (apply build-response #:code code* #:headers headers*
              (reverse filtered)))
      (((? (cut memq <> '(#:code #:headers #:redirect-to))) _ . rest)
       (loop rest filtered))
      ((keyword keyword-arg . rest)
       (loop rest (cons* keyword-arg keyword filtered))))))

(define (%catchall-ref-result)
  (endpoint-ref-result (path-tail #f)
                       (endpoint (endpoint (handler (%default-handler))))))

(define %ugly-uri-path-regex
  (make-regexp ".+/(/|$)"))

(define (exception->msg ex)
  (format #f "~a~%~a~%"
          (if (exception-with-message? ex)
              (format #f "Error: ~a." (exception-message ex))
              "Error occured.")
          ex))

(define (handler request body root-endset)
  (define (bt-string)
    (call-with-output-string (lambda (port)
                               (terminal-width 10000)
                               (display-backtrace (make-stack #t 5 0) port))))

  (match-let* ((uri (request-uri request))
               (path-string (uri:uri-path uri))
               (path (uri:split-and-decode-uri-path path-string))
               (($* endpoint-ref-result path-tail endpoint parameters)
                (query-endset root-endset path (%catchall-ref-result))))
    (define (normalized-uri-path-string)
      (format #f "/~a" (uri:encode-and-join-uri-path path)))

    (define (redirect-path)
      (if (%redirect/normalize-path?)
          (normalized-uri-path-string)
          (uri:uri-path (request-uri request))))

    (define (redirect-query)
      (if (%redirect/remove-query?)
          #f
          (uri:uri-query uri)))

    (define (compute-response)
      (if (or (and (%redirect/normalize-path?)
                   (regexp-exec %ugly-uri-path-regex path-string))
              (and (%redirect/remove-query?)
                   (uri:uri-query uri)))
          (values (build-response* #:headers *%default-headers
                                   #:redirect-to (uri:build-relative-ref
                                                  #:path (redirect-path)
                                                  #:query (redirect-query)))
                  "Redirect...")
          (let/ec escape
            (with-exception-handler
                (lambda (ex)
                  (define msg
                    (exception->msg ex))

                  (display msg (current-error-port))

                  (escape (build-response* #:headers *%default-headers
                                           #:code 500)
                          (string-append msg "\nBacktrace:\n\n" (bt-string))))
              (lambda ()
                (parameterize ((%current-path path)
                               (%current-path-string path-string))
                  (start-stack 'cantius-request
                    (guard (ex ((s35:condition-has-type? ex &not-found)
                                ((%not-found-handler) request body))

                               ((s35:condition-has-type? ex &forbidden)
                                ((%forbidden-handler) request body))

                               ((s35:condition-has-type? ex &bad-request)
                                ((%bad-request-handler) request body))

                               ((s35:condition-has-type?
                                 ex &condition-with-http-code)
                                ((%other-codes-handler) request body
                                 (condition-http-code ex)
                                 #:message
                                 (and (s35:condition-has-type? ex s35:&message)
                                      (s35:condition-message ex)))))

                      (apply (endpoint-handler endpoint) request body
                             (or (and=> path-tail list) '()))))))))))

    (with-parameters* (map car parameters) (map cdr parameters)
      (lambda ()
        (match ((compose list compute-response))
          ((response-body)
           (values (build-response*) response-body))
          ((response response-body)
           (values response response-body))
          (something-else
           (values (build-response* #:headers *%default-headers #:code 500)
                   (format #f "Invalid return values from handler:~%~a"
                           something-else))))))))

(define-public (run-cantius endset . server-args)
  (apply run-server (cut handler <> <> endset) server-args))