;;; SPDX-License-Identifier: CC0-1.0
;;;
;;; Copyright (C) 2023-2025 Wojtek Kosior <koszko@koszko.org>
(define-module (cantius)
#:use-module (scheme base)
#:use-module ((srfi srfi-1) #:prefix s1:)
#:use-module ((srfi srfi-11) #:select (let*-values))
#:use-module ((srfi srfi-26) #:select (cut))
#:use-module ((srfi srfi-35) #:prefix s35:)
#:use-module ((srfi srfi-128) #:select (make-default-comparator))
#:use-module ((srfi srfi-146) #:prefix s146:)
#:use-module ((srfi srfi-247) #:select (define-syntactic-monad))
#:use-module ((ice-9 format) #:select (format))
#:use-module ((ice-9 exceptions) #:select
(exception-with-message? exception-message))
#:use-module ((ice-9 regex) #:prefix reg:)
#:use-module ((system repl debug) #:select (terminal-width))
#:use-module ((web http) #:select (read-headers))
#:use-module ((web request) #:prefix req:)
#:use-module ((web response) #:prefix res:)
#:use-module ((web uri) #:prefix uri:)
#:use-module ((fibers web server) #:select (run-server))
#:use-module ((de-paul-records) #:select
(deftype match match-let match-let*))
#:duplicates (last))
;;;
;;; Source of Cantius, a HTTP server library for Guile, based on Fibers.
;;;
;;;
;;; Condition types.
;;;
(export ¬-found)
(export not-found-condition?)
(s35:define-condition-type ¬-found s35:&condition
not-found-condition?)
(export &forbidden)
(export forbidden-condition?)
(s35:define-condition-type &forbidden s35:&condition
forbidden-condition?)
(export &bad-request)
(export bad-request-condition?)
(s35:define-condition-type &bad-request s35:&condition
bad-request-condition?)
(export &condition-with-http-code)
(export condition-with-http-code?)
(export condition-http-code)
(s35:define-condition-type &condition-with-http-code s35:&condition
condition-with-http-code?
(http-code condition-http-code))
;;;
;;; Cantius syntactic monad.
;;;
(eval-when (compile load eval)
(define-public $-args
'(cant$resource-roots
cant$redirect/normalize-path?
cant$redirect/remove-query?
cant$default-headers
cant$http-code-handlers
cant$other-http-codes-handler
cant$request
cant$body
cant$current-path
cant$current-endpoint
cant$endpoint-matched-path))
(export $)
(eval `(define-syntactic-monad $ . ,$-args) (current-module)))
;;;
;;; Multipart form parser.
;;;
(deftype form-data-part
#:export? #t
name
headers
binary?
data)
(define (hex-escape bytestring)
(with-output-to-string
(lambda ()
(let loop ((idx 0))
(when (< idx (bytevector-length bytestring))
(let ((byte (bytevector-u8-ref bytestring idx)))
(cond ((or (> byte 127)
(= byte (char->integer #\\)))
(format #t "\\x~2,'0x" byte))
(else
(display (integer->char byte)))))
(loop (1+ idx)))))))
(define (hex-unescape string)
;; We assume a backslash is always followed by `x' and 2 digits (this is
;; indeed the case if the string has been generated by `hex-escape' above).
(define byte-length
(- (string-length string)
(* 3 (string-count string #\\))))
(define bytestring
(make-bytevector byte-length))
(let loop ((byte-idx 0)
(str-idx 0))
(define (write-string str)
(string-fold (lambda (char char-idx)
(bytevector-u8-set! bytestring char-idx
(char->integer char))
(1+ char-idx))
byte-idx str))
(match (string-contains string "\\x" str-idx)
(#f
(write-string (substring string str-idx))
bytestring)
((and (= (cut substring string str-idx <>) text-until)
escape-idx)
(let* ((byte-idx* (write-string text-until))
(str-idx* (+ 4 escape-idx))
(escape-hex (substring string (+ 2 escape-idx) str-idx*))
(byte (string->number escape-hex 16)))
(bytevector-u8-set! bytestring byte-idx* byte)
(loop (1+ byte-idx*) str-idx*))))))
(define %part-regex
;; Headers, 2 new lines, actual data and one new line at the end.
(make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$"))
(define %backslash-regex
(make-regexp "\\\\"))
(define-public (parse-multipart/form-data boundary data)
;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of
;; procedure so we instead convert our data to ASCII-only string (with
;; non-ASCII bytes expressed as hex escapes) and use regexes to split this
;; string around the occurences of boundary in O(n) time.
(define boundary*
;; A backslash is theoretically legal in boundary, let's make sure it's also
;; escaped.
(format #f "--~a"
(reg:regexp-substitute/global #f %backslash-regex boundary
'pre "\\x5c" 'post)))
(define data*
(hex-escape data))
(define parts
(let loop ((idx 0)
(parts '()))
(match (string-contains data* boundary* idx)
(#f
(unless (string-prefix? "--\r\n" (substring data* idx))
(raise (s35:condition
(&bad-request)
(s35:&message
(message (format #f "Last boundary in multipart/form-data ~
should end with extra double hyphen ~
followed by CRLF"))))))
;; Discard the last "part" (actually, the epilogue).
(reverse parts))
((and (= (cut substring data* idx <>) part)
(= (cut + <> (string-length boundary*)) idx*))
;; Discard the first "part" (actually, the preamble).
(loop idx* (if (= idx 0)
parts
(cons part parts)))))))
(map (lambda (part)
(match-let* (((and (= (cut reg:match:substring <> 1) headers-string)
(= (cut reg:match:substring <> 3) data))
(regexp-exec %part-regex part))
(headers
(call-with-input-string headers-string read-headers))
(('form-data . (= (cut assq-ref <> 'name)
(? string? name)))
(assq-ref headers 'content-disposition))
(content-transfer-encoding
(assq-ref headers 'content-transfer-encoding)))
(unless (member content-transfer-encoding '(#f "binary"))
(raise (s35:condition
(&condition-with-http-code
(http-code 415)) ;; Unsupported Media Type
(s35:&message
(message (format #f "Content-Transfer-Encoding of type ~
\"~a\" is not currently supported ~
for multipart/form-data parts, ~
please use \"binary\" instead"
content-transfer-encoding))))))
(form-data-part
name
headers
(binary? (not (member (assq-ref headers 'content-type)
'(#f (text/plain)))))
(data (let ((data-binary (hex-unescape data)))
(if binary?
data-binary
(utf8->string data-binary)))))))
parts))
(export multipart/form-data)
($ define (multipart/form-data)
(match (req:request-content-type cant$request)
(('multipart/form-data
. (= (cut assq-ref <> 'boundary) (? identity boundary)))
(parse-multipart/form-data boundary cant$body))
(_
'())))
(define (parts->alist parts)
;; This shouldn't be strongly relevant but we make later parts appear
;; earlier in the alist so that they take precedence when `assoc' is used.
(s1:fold (lambda (part acc)
`((,(form-data-part-name part) . ,part)
. ,acc))
'() parts))
(define-public parse-multipart/form-data*
(compose parts->alist parse-multipart/form-data))
(define-public multipart/form-data*
(compose parts->alist multipart/form-data))
;;;
;;; Several smaller helper utilities.
;;;
;; Operations on URI paths.
(define-public (normalize-path path)
(define absolute?
(eqv? #\/ (string-ref path 0)))
(let loop ((parent-walks 0)
(processed '())
(to-process (string-split path #\/)))
(match (list parent-walks processed to-process)
((0 () ())
(if absolute? "/" "."))
((_ _ ())
(string-join (append (map (const "..") (iota parent-walks))
(reverse processed))
"/"
(if absolute? 'prefix 'infix)))
((_ _ ((? (cut member <> '("" ".")))
. rest))
(loop parent-walks processed rest))
((_ () (".." . rest))
(loop (1+ parent-walks) '() rest))
((_ (_ . processed-rest) (".." . rest))
(loop parent-walks processed-rest rest))
((_ _ (segment . rest))
(loop parent-walks (cons segment processed) rest)))))
(define %illegal-path-regex
;; Assume normalized path, forbid parent directory ref.
(make-regexp "^/?[.][.](/.*)?$"))
(export find-resource-file)
(define* (find-resource-file file root-paths)
(define normalized-file
(normalize-path file))
(when (regexp-exec %illegal-path-regex normalized-file)
(raise (s35:condition
(&forbidden)
(s35:&message
(message (format #f "Illegal resource path ~a" file))))))
(let loop ((paths root-paths))
(match paths
(()
(raise (s35:condition
(¬-found)
(s35:&message
(message (format #f "Resource not found ~a" file))))))
((? string?)
(loop (list root-paths)))
(((= (cut format #f "~a/~a" <> normalized-file) file-path)
. paths-rest)
(or (and (stat file-path #f) file-path)
(loop paths-rest))))))
;; Cookie parser.
(define %cookie-regex
(make-regexp (string-append
"^\\s*"
"([^[:space:];=]([^;=]*[^[:space:];=])?)" ;; name
"\\s*=\\s*"
"(([^[:space:];]([^;]*[^[:space:];])?)?)" ;; value
"\\s*(;|$)")))
(define-public (parse-cookies cookies)
(let loop ((idx 0)
(cookies-alist '()))
(match (regexp-exec %cookie-regex cookies idx)
(#f
(when (string-index cookies char-set:graphic idx)
(raise (s35:condition
(&bad-request)
(s35:&message
(message "Unparseable cookie")))))
(reverse cookies-alist))
((and (= (cut reg:match:substring <> 1) name)
(= (cut reg:match:substring <> 3) value)
(= reg:match:end idx*))
(loop idx* `((,name . ,value) . ,cookies-alist))))))
(export cookies)
($ define (cookies)
(or (and=> (assq-ref (req:request-headers cant$request) 'cookie)
parse-cookies)
'()))
;; Sugar over `build-response', with support for easy redirects.
(export build-response*)
(define* (build-response* headers #:key (redirect-to #f) (code #f) #:rest args)
(define code*
(or code (if redirect-to 301 200)))
(define redirect-to*
(if (string? redirect-to)
(uri:string->uri redirect-to)
redirect-to))
(define headers*
(if redirect-to*
`((location . ,redirect-to*) . ,headers)
headers))
(let loop ((to-filter args)
(filtered '()))
(match to-filter
(()
(apply res:build-response #:code code* #:headers headers*
(reverse filtered)))
(((? (cut memq <> '(#:code #:redirect-to))) _ . rest)
(loop rest filtered))
((keyword keyword-arg . rest)
(loop rest (cons* keyword-arg keyword filtered))))))
(export response)
($ define (response . rest)
(apply build-response* cant$default-headers rest))
;;;
;;; Default values to use as monadic arguments.
;;;
(define %default-headers
'((content-type . (text/plain (charset . "utf-8")))))
(define (make-code-handler-pair http-code default-message)
(cons http-code
($ lambda (. rest)
(values (build-response* %default-headers #:code http-code)
(or (s1:last rest) default-message)))))
(define %default-http-code-handlers
(map (cut apply make-code-handler-pair <>)
`((404 "There's no page with this address :(")
(403 "You. Shall not. Pass. (forbidden)")
(400 ,(format #f "Bad Request (you or your software seems to have ~
messed sth up).")))))
($ define (default-other-http-codes-handler . rest)
(match-let (((message #:message http-code #:http-code . _) (reverse rest)))
(values (build-response* %default-headers #:code http-code)
(or message (format #f "HTTP code ~d" http-code)))))
($ define (get-code-handler code)
(or (and=> (assv code cant$http-code-handlers) cdr)
cant$other-http-codes-handler))
;;;
;;; Trees of endsets and endpoints.
;;;
(define (any->proc-list list-or-proc)
(if (procedure? list-or-proc)
(list list-or-proc)
list-or-proc))
(deftype endpoint
#:export? yes,of-course
(handler #:default (lambda _ (raise (s35:condition (s35:&error)))))
(path #:default '())
(arg-converters #:default '())
#:finalize (cut *endpoint
#:<- <>
(arg-converters #:=> any->proc-list)))
(deftype endpoint-tree-node
(direct-match #:default #f)
(any-match #:default #f)
(children #:default (s146:mapping (make-default-comparator))))
(deftype endset
#:export? sure,why-not
(endpoints #:default (endpoint-tree-node))
(arg-converters #:default '())
#:finalize (cut *endset
#:<- <>
(arg-converters #:=> any->proc-list)))
(define (endpoint-tree-+ tree path what)
(let recurse ((segments path)
(tree-node tree))
(define (processed-segments)
(list-head path (- (length path) (length segments))))
(match-let ((($* endpoint-tree-node direct-match any-match children)
tree-node))
(when (endset? direct-match)
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "Endset already registered for ~s"
(processed-segments)))))))
(match segments
(()
(cond (direct-match
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "Endpoint already registered for ~
~s"
path))))))
((and (endset? what) (not (s146:mapping-empty? children)))
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "Endpoints already registered ~
under ~s"
path))))))
(#t
(endpoint-tree-node
#:<- tree-node
(direct-match what)))))
('any
(cond (any-match
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "\"any\" endpoint already ~
registered for ~s"
path))))))
((endset? what)
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "endset should be registered at ~
normal path, not ~s"
path))))))
(#t
(endpoint-tree-node
#:<- tree-node
(any-match what)))))
(((? string? current-segment) . segments-rest)
(endpoint-tree-node
#:<- tree-node
(children (s146:mapping-update/default
children current-segment (cut recurse segments-rest <>)
%null-endpoint-tree-node))))))))
(define-public (endset-+ endset-obj path what)
(endset
#:<- endset-obj
(endpoints (endpoint-tree-+ (endset-endpoints endset-obj) path what))))
(define-syntactic-monad $query
qy$end qy$endpoint-matched-path qy$arg-converters)
(define %default-endpoint
(endpoint (handler ($ lambda (. rest)
($ (cut apply ($ get-code-handler () 404) <...>)
() (append rest '(#:http-code 404 #:message #f)))))
(arg-converters ($ lambda (. rest)
($ (cut apply values <...>)
((cant$redirect/normalize-path? #f)
(cant$redirect/remove-query? #f)))))))
(define (query-endset endset path-segments)
($query let loop ((qy$end %default-endpoint)
(qy$endpoint-matched-path path-segments)
(qy$arg-converters (reverse (endset-arg-converters endset)))
(current-segments path-segments)
(current-place (endset-endpoints endset)))
($query define (descend-subnode)
(match-let (((current-segment . segments-rest) current-segments)
(($* endpoint-tree-node children) current-place))
(cond ((s146:mapping-ref/default children current-segment #f)
=> (cut $query loop () segments-rest <>))
(else
($query values)))))
(cond ((endset? current-place)
(let ((new-converters (reverse (endset-arg-converters endset))))
($query loop ((qy$endpoint-matched-path current-segments)
(qy$arg-converters (append new-converters
qy$arg-converters)))
current-segments (endset-endpoints current-place))))
((null? current-segments)
(cond ((endpoint-tree-node-direct-match current-place)
=> (lambda (end)
($query values ((qy$end end)))))
(else
($query values))))
((endpoint-tree-node-any-match current-place)
;; Remember that `end' can only be an endpoint here — we don't allow
;; endsets with `any-match'.
=> (lambda (end)
($query descend-subnode ((qy$end end)))))
(else
($query descend-subnode)))))
;;;
;;; Macro layer for endpoint definitions.
;;;
(eval-when (compile load eval)
(deftype path-spec
(path-stx)
(path-rest-stx))
(define (syntax->path-spec x)
(let loop ((path-segments '())
(path-stx x))
(syntax-case path-stx (unquote unquote-splicing)
(()
(path-spec (path-stx #``(#,@(reverse path-segments)))
(path-rest-stx #f)))
(rest-var
(identifier? #'rest-var)
(path-spec (path-stx #``(#,@(reverse path-segments)))
(path-rest-stx #'rest-var)))
((unquote list-expression)
(path-spec (path-stx #``(#,@(reverse path-segments)
. ,list-expression))
(path-rest-stx #f)))
((segment-arg . rest)
(identifier? #'segment-arg)
(raise (s35:condition
(s35:&error)
(s35:&message
(message (format #f "Binding identifiers to path segments ~
not yet supported"))))))
(((unquote segment-expression) . rest)
(loop (cons #',segment-expression path-segments) #'rest))
(((unquote-splicing segments-expression) . rest)
(loop (cons #',@segments-expression path-segments) #'rest))
((segment . rest)
(string? (syntax->datum #'segment))
(loop (cons #'segment path-segments) #'rest))))))
(export define-endpoint)
(define-syntax define-endpoint
(lambda (x)
(syntax-case x ()
((source-stx endset-identifier (handler-identifier . handler-formals)
. rest)
#'(source-stx endset-identifier #f (handler-identifier . handler-formals)
. rest))
((source-stx endset-identifier maybe-monad-identifier
(handler-identifier . handler-formals)
path arg-converters-list
handler-body handler-body-rest ...)
(and (identifier? #'endset-identifier)
(identifier? #'handler-identifier))
(match-let* ((($* path-spec path-stx path-rest-stx)
(syntax->path-spec #'path))
(full-path-stx #``(,@#,path-stx . #,(or path-rest-stx '())))
(stx (cut datum->syntax #'source-stx <>))
(endpoint-matched-path (stx 'cant$endpoint-matched-path))
(lambda-stx
#`(lambda (. handler-formals)
(let (#,@(if path-rest-stx
#`((#,path-rest-stx
(s1:drop #,endpoint-matched-path
path-len)))
'()))
handler-body handler-body-rest ...))))
#`(begin
(define evaluated-path
#,path-stx)
(define handler-identifier
(let ((path-len (length evaluated-path)))
#,(if (syntax->datum #'maybe-monad-identifier)
#`(maybe-monad-identifier . #,lambda-stx)
((macro-transformer
(module-ref (resolve-module '(cantius)) '$))
#`(source-stx . #,lambda-stx)))))
(define endset-identifier
(endset-+ endset-identifier
#,(if path-rest-stx
#``(,@evaluated-path . any)
#'evaluated-path)
(endpoint (arg-converters arg-converters-list)
(handler handler-identifier))))))))))
;;;
;;; Dispatching of requests to handlers, with error handling.
;;;
(define %ugly-uri-path-regex
(make-regexp ".+/(/|$)"))
(define (exception->msg ex)
(format #f "~a~%~a~%"
(if (exception-with-message? ex)
(format #f "Error: ~a." (exception-message ex))
"Error occured.")
ex))
(define (bt-string)
(call-with-output-string (lambda (port)
(terminal-width 10000)
(display-backtrace (make-stack #t 5 0) port))))
($ define (meta-handler/no-redirect . rest)
(define (invoke-http-code-handler code ex)
(let ((msg (and (s35:condition-has-type? ex s35:&message)
(s35:condition-message ex))))
($ (cut apply ($ get-code-handler () code) <...>)
() (append rest `(#:http-code ,code #:message ,msg)))))
(guard (ex ((s35:condition-has-type? ex ¬-found)
(invoke-http-code-handler 404 ex))
((s35:condition-has-type? ex &forbidden)
(invoke-http-code-handler 403 ex))
((s35:condition-has-type? ex &bad-request)
(invoke-http-code-handler 400 ex))
((s35:condition-has-type? ex &condition-with-http-code)
(invoke-http-code-handler (condition-http-code ex) ex)))
(match ($ (cut (compose list apply) (endpoint-handler cant$current-endpoint)
<...>)
() rest)
((response-body)
(values (build-response* cant$default-headers) response-body))
((response response-body)
(values response response-body))
(something-else
(values (build-response* %default-headers #:code 500)
(format #f "Invalid return values from handler:~%~a"
something-else))))))
(define (web-server-handler request body root-endset)
(let* ((uri (req:request-uri request))
(path-string (uri:uri-path uri))
(path-segments (uri:split-and-decode-uri-path path-string)))
($ define (meta-handler . rest)
(define (normalized-uri-path-string)
(format #f "/~a" (uri:encode-and-join-uri-path path-segments)))
(define (redirect-path)
(if cant$redirect/normalize-path?
(normalized-uri-path-string)
path-string))
(define (redirect-query)
(if cant$redirect/remove-query?
#f
(uri:uri-query uri)))
(if (or (and cant$redirect/normalize-path?
(regexp-exec %ugly-uri-path-regex path-string))
(and cant$redirect/remove-query?
(uri:uri-query uri)))
(values (build-response* %default-headers
#:code 307
#:redirect-to (uri:build-relative-ref
#:path (redirect-path)
#:query (redirect-query)))
"307 Temporary Redirect...")
($ (cut apply meta-handler/no-redirect <...>) () rest)))
(guard (ex ((and (getenv "CANTIUS_DEBUG")
(bt-string))
=> (lambda (backtrace)
(let* ((desc (format #f "~a~%~
Backtrace:~%~%~a"
(exception->msg ex) backtrace)))
(format (current-error-port) "In ~a request to `~a':~%~%"
(symbol->string (req:request-method request))
path-string)
(display desc (current-error-port))
(values (build-response* %default-headers #:code 500)
desc))))
(else
(display (exception->msg ex) (current-error-port))
(values (build-response* %default-headers #:code 500)
"Internal server error.")))
(start-stack 'cantius-request
($query let*-values ((() (query-endset root-endset path-segments)))
($ (apply compose meta-handler
(append (reverse (endpoint-arg-converters qy$end))
qy$arg-converters))
((cant$resource-roots '())
(cant$redirect/normalize-path? #t)
(cant$redirect/remove-query? #t)
(cant$default-headers %default-headers)
(cant$http-code-handlers %default-http-code-handlers)
(cant$other-http-codes-handler default-other-http-codes-handler)
(cant$request request)
(cant$body body)
(cant$current-path path-segments)
(cant$current-endpoint qy$end)
(cant$endpoint-matched-path qy$endpoint-matched-path))))))))
(define-public (run-cantius endset . server-args)
(apply run-server (cut web-server-handler <> <> endset) server-args))