;;; SPDX-License-Identifier: CC0-1.0 ;;; ;;; Copyright (C) 2023-2025 Wojtek Kosior (define-module (cantius) #:use-module (scheme base) #:use-module ((srfi srfi-1) #:select (drop fold last)) #:use-module ((srfi srfi-11) #:select (let*-values)) #:use-module ((srfi srfi-26) #:select (cut)) #:use-module ((srfi srfi-35) #:prefix s35:) #:use-module ((srfi srfi-128) #:select (make-default-comparator)) #:use-module ((srfi srfi-146) #:prefix s146:) #:use-module ((srfi srfi-247) #:select (define-syntactic-monad)) #:use-module ((ice-9 format) #:select (format)) #:use-module ((ice-9 exceptions) #:select (exception-with-message? exception-message)) #:use-module ((ice-9 regex) #:select (match:substring match:end regexp-substitute/global)) #:use-module ((system repl debug) #:select (terminal-width)) #:use-module ((web http) #:select (read-headers)) #:use-module ((web request) #:prefix req:) #:use-module ((web response) #:prefix res:) #:use-module ((web uri) #:prefix uri:) #:use-module ((fibers web server) #:select (run-server)) #:use-module ((de-paul-records) #:select (deftype match match-let match-let*)) #:duplicates (last)) ;;; ;;; Source of Cantius, a HTTP server library for Guile, based on Fibers. ;;; ;;; ;;; Condition types. ;;; (export ¬-found) (export not-found-condition?) (s35:define-condition-type ¬-found s35:&condition not-found-condition?) (export &forbidden) (export forbidden-condition?) (s35:define-condition-type &forbidden s35:&condition forbidden-condition?) (export &bad-request) (export bad-request-condition?) (s35:define-condition-type &bad-request s35:&condition bad-request-condition?) (export &condition-with-http-code) (export condition-with-http-code?) (export condition-http-code) (s35:define-condition-type &condition-with-http-code s35:&condition condition-with-http-code? (http-code condition-http-code)) ;;; ;;; Multipart form parser. ;;; (deftype form-data-part #:export? #t name headers binary? data) (define (hex-escape bytestring) (with-output-to-string (lambda () (let loop ((idx 0)) (when (< idx (bytevector-length bytestring)) (let ((byte (bytevector-u8-ref bytestring idx))) (cond ((or (> byte 127) (= byte (char->integer #\\))) (format #t "\\x~2,'0x" byte)) (else (display (integer->char byte))))) (loop (1+ idx))))))) (define (hex-unescape string) ;; We assume a backslash is always followed by `x' and 2 digits (this is ;; indeed the case if the string has been generated by `hex-escape' above). (define byte-length (- (string-length string) (* 3 (string-count string #\\)))) (define bytestring (make-bytevector byte-length)) (let loop ((byte-idx 0) (str-idx 0)) (define (write-string str) (string-fold (lambda (char char-idx) (bytevector-u8-set! bytestring char-idx (char->integer char)) (1+ char-idx)) byte-idx str)) (match (string-contains string "\\x" str-idx) (#f (write-string (substring string str-idx)) bytestring) ((and (= (cut substring string str-idx <>) text-until) escape-idx) (let* ((byte-idx* (write-string text-until)) (str-idx* (+ 4 escape-idx)) (escape-hex (substring string (+ 2 escape-idx) str-idx*)) (byte (string->number escape-hex 16))) (bytevector-u8-set! bytestring byte-idx* byte) (loop (1+ byte-idx*) str-idx*)))))) (define %part-regex ;; Headers, 2 new lines, actual data and one new line at the end. (make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$")) (define %backslash-regex (make-regexp "\\\\")) (define-public (parse-multipart/form-data boundary data) ;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of ;; procedure so we instead convert our data to ASCII-only string (with ;; non-ASCII bytes expressed as hex escapes) and use regexes to split this ;; string around the occurences of boundary in O(n) time. (define boundary* ;; A backslash is theoretically legal in boundary, let's make sure it's also ;; escaped. (format #f "--~a" (regexp-substitute/global #f %backslash-regex boundary 'pre "\\x5c" 'post))) (define data* (hex-escape data)) (define parts (let loop ((idx 0) (parts '())) (match (string-contains data* boundary* idx) (#f (unless (string-prefix? "--\r\n" (substring data* idx)) (raise (s35:condition (&bad-request) (s35:&message (message (format #f "Last boundary in multipart/form-data ~ should end with extra double hyphen ~ followed by CRLF")))))) ;; Discard the last "part" (actually, the epilogue). (reverse parts)) ((and (= (cut substring data* idx <>) part) (= (cut + <> (string-length boundary*)) idx*)) ;; Discard the first "part" (actually, the preamble). (loop idx* (if (= idx 0) parts (cons part parts))))))) (map (lambda (part) (match-let* (((and (= (cut match:substring <> 1) headers-string) (= (cut match:substring <> 3) data)) (regexp-exec %part-regex part)) (headers (call-with-input-string headers-string read-headers)) (('form-data . (= (cut assq-ref <> 'name) (? string? name))) (assq-ref headers 'content-disposition)) (content-transfer-encoding (assq-ref headers 'content-transfer-encoding))) (unless (member content-transfer-encoding '(#f "binary")) (raise (s35:condition (&condition-with-http-code (http-code 415)) ;; Unsupported Media Type (s35:&message (message (format #f "Content-Transfer-Encoding of type ~ \"~a\" is not currently supported ~ for multipart/form-data parts, ~ please use \"binary\" instead" content-transfer-encoding)))))) (form-data-part name headers (binary? (not (member (assq-ref headers 'content-type) '(#f (text/plain))))) (data (let ((data-binary (hex-unescape data))) (if binary? data-binary (utf8->string data-binary))))))) parts)) (define-public (parse-request-multipart/form-data request body) (match-let* ((content-type (assq-ref (req:request-headers request) 'content-type)) (('multipart/form-data . (= (cut assq-ref <> 'boundary) (? identity boundary))) content-type)) (parse-multipart/form-data boundary body))) (define (parts->alist parts) ;; This shouldn't be strongly relevant but we make later parts appear ;; earlier in the alist so that they take precedence when `assoc' is used. (fold (lambda (part acc) `((,(form-data-part-name part) . ,part) . ,acc)) '() parts)) (define-public parse-multipart/form-data* (compose parts->alist parse-multipart/form-data)) (define-public parse-request-multipart/form-data* (compose parts->alist parse-request-multipart/form-data)) ;;; ;;; Several smaller helper utilities. ;;; ;; Operations on URI paths. (define-public (normalize-path path) (define absolute? (eqv? #\/ (string-ref path 0))) (let loop ((parent-walks 0) (processed '()) (to-process (string-split path #\/))) (match (list parent-walks processed to-process) ((0 () ()) (if absolute? "/" ".")) ((_ _ ()) (string-join (append (map (const "..") (iota parent-walks)) (reverse processed)) "/" (if absolute? 'prefix 'infix))) ((_ _ ((? (cut member <> '("" "."))) . rest)) (loop parent-walks processed rest)) ((_ () (".." . rest)) (loop (1+ parent-walks) '() rest)) ((_ (_ . processed-rest) (".." . rest)) (loop parent-walks processed-rest rest)) ((_ _ (segment . rest)) (loop parent-walks (cons segment processed) rest))))) (define %illegal-path-regex ;; Assume normalized path, forbid parent directory ref. (make-regexp "^/?[.][.](/.*)?$")) (export find-resource-file) (define* (find-resource-file file root-paths) (define normalized-file (normalize-path file)) (when (regexp-exec %illegal-path-regex normalized-file) (raise (s35:condition (&forbidden) (s35:&message (message (format #f "Illegal resource path ~a" file)))))) (let loop ((paths root-paths)) (match paths (() (raise (s35:condition (¬-found) (s35:&message (message (format #f "Resource not found ~a" file)))))) ((? string?) (loop (list root-paths))) (((= (cut format #f "~a/~a" <> normalized-file) file-path) . paths-rest) (or (and (stat file-path #f) file-path) (loop paths-rest)))))) ;; Cookie parser. (define %cookie-regex (make-regexp (string-append "^\\s*" "([^[:space:];=]([^;=]*[^[:space:];=])?)" ;; name "\\s*=\\s*" "(([^[:space:];]([^;]*[^[:space:];])?)?)" ;; value "\\s*(;|$)"))) (define-public (parse-cookies cookies) (let loop ((idx 0) (cookies-alist '())) (match (regexp-exec %cookie-regex cookies idx) (#f (when (string-index cookies char-set:graphic idx) (raise (s35:condition (&bad-request) (s35:&message (message "Unparseable cookie"))))) (reverse cookies-alist)) ((and (= (cut match:substring <> 1) name) (= (cut match:substring <> 3) value) (= match:end idx*)) (loop idx* `((,name . ,value) . ,cookies-alist)))))) ;; Sugar over `build-response', with support for easy redirects. (export build-response*) (define* (build-response* headers #:key (redirect-to #f) (code #f) #:rest args) (define code* (or code (if redirect-to 301 200))) (define redirect-to* (if (string? redirect-to) (uri:string->uri redirect-to) redirect-to)) (define headers* (if redirect-to* `((location . ,redirect-to*) . ,headers) headers)) (let loop ((to-filter args) (filtered '())) (match to-filter (() (apply res:build-response #:code code* #:headers headers* (reverse filtered))) (((? (cut memq <> '(#:code #:redirect-to))) _ . rest) (loop rest filtered)) ((keyword keyword-arg . rest) (loop rest (cons* keyword-arg keyword filtered)))))) ;;; ;;; Cantius syntactic monad and default values to use as its arguments. ;;; (eval-when (compile load eval) (define-public $-args '(cant$resource-roots cant$redirect/normalize-path? cant$redirect/remove-query? cant$default-headers cant$http-code-handlers cant$other-http-codes-handler cant$request cant$body cant$current-path cant$current-endpoint cant$endpoint-matched-path)) (export $) (eval `(define-syntactic-monad $ . ,$-args) (current-module))) (define %default-headers '((content-type . (text/plain (charset . "utf-8"))))) (define (make-code-handler-pair http-code default-message) (cons http-code ($ lambda (. rest) (values (build-response* %default-headers #:code http-code) (or (last rest) default-message))))) (define %default-http-code-handlers (map (cut apply make-code-handler-pair <>) `((404 "There's no page with this address :(") (403 "You. Shall not. Pass. (forbidden)") (400 ,(format #f "Bad Request (you or your software seems to have ~ messed sth up)."))))) ($ define (default-other-http-codes-handler . rest) (match-let (((message #:message http-code #:http-code . _) (reverse rest))) (values (build-response* %default-headers #:code http-code) (or message (format #f "HTTP code ~d" http-code))))) ($ define (get-code-handler code) (or (and=> (assv code cant$http-code-handlers) cdr) cant$other-http-codes-handler)) ;;; ;;; Trees of endsets and endpoints. ;;; (define (any->proc-list list-or-proc) (if (procedure? list-or-proc) (list list-or-proc) list-or-proc)) (deftype endpoint #:export? yes,of-course (handler #:default (lambda _ (raise (s35:condition (s35:&error))))) (path #:default '()) (arg-converters #:default '()) #:finalize (cut *endpoint #:<- <> (arg-converters #:=> any->proc-list))) (deftype endpoint-tree-node (direct-match #:default #f) (any-match #:default #f) (children #:default (s146:mapping (make-default-comparator)))) (deftype endset #:export? sure,why-not (endpoints #:default (endpoint-tree-node)) (arg-converters #:default '()) #:finalize (cut *endset #:<- <> (arg-converters #:=> any->proc-list))) (define (endpoint-tree-+ tree path what) (let recurse ((segments path) (tree-node tree)) (define (processed-segments) (list-head path (- (length path) (length segments)))) (match-let ((($* endpoint-tree-node direct-match any-match children) tree-node)) (when (endset? direct-match) (raise (s35:condition (s35:&error) (s35:&message (message (format #f "Endset already registered for ~s" (processed-segments))))))) (match segments (() (cond (direct-match (raise (s35:condition (s35:&error) (s35:&message (message (format #f "Endpoint already registered for ~ ~s" path)))))) ((and (endset? what) (not (s146:mapping-empty? children))) (raise (s35:condition (s35:&error) (s35:&message (message (format #f "Endpoints already registered ~ under ~s" path)))))) (#t (endpoint-tree-node #:<- tree-node (direct-match what))))) ('any (cond (any-match (raise (s35:condition (s35:&error) (s35:&message (message (format #f "\"any\" endpoint already ~ registered for ~s" path)))))) ((endset? what) (raise (s35:condition (s35:&error) (s35:&message (message (format #f "endset should be registered at ~ normal path, not ~s" path)))))) (#t (endpoint-tree-node #:<- tree-node (any-match what))))) (((? string? current-segment) . segments-rest) (endpoint-tree-node #:<- tree-node (children (s146:mapping-update/default children current-segment (cut recurse segments-rest <>) %null-endpoint-tree-node)))))))) (define-public (endset-+ endset-obj path what) (endset #:<- endset-obj (endpoints (endpoint-tree-+ (endset-endpoints endset-obj) path what)))) (define-syntactic-monad $query qy$end qy$endpoint-matched-path qy$arg-converters) (define %default-endpoint (endpoint (handler ($ lambda (. rest) ($ (cut apply ($ get-code-handler () 404) <...>) () (append rest '(#:http-code 404 #:message #f))))) (arg-converters ($ lambda (. rest) ($ (cut apply values <...>) ((cant$redirect/normalize-path? #f) (cant$redirect/remove-query? #f))))))) (define (query-endset endset path-segments) ($query let loop ((qy$end %default-endpoint) (qy$endpoint-matched-path path-segments) (qy$arg-converters (reverse (endset-arg-converters endset))) (current-segments path-segments) (current-place (endset-endpoints endset))) ($query define (descend-subnode) (match-let (((current-segment . segments-rest) current-segments) (($* endpoint-tree-node children) current-place)) (cond ((s146:mapping-ref/default children current-segment #f) => (cut $query loop () segments-rest <>)) (else ($query values))))) (cond ((endset? current-place) (let ((new-converters (reverse (endset-arg-converters endset)))) ($query loop ((qy$endpoint-matched-path current-segments) (qy$arg-converters (append new-converters qy$arg-converters))) current-segments (endset-endpoints current-place)))) ((null? current-segments) (cond ((endpoint-tree-node-direct-match current-place) => (lambda (end) ($query values ((qy$end end))))) (else ($query values)))) ((endpoint-tree-node-any-match current-place) ;; Remember that `end' can only be an endpoint here — we don't allow ;; endsets with `any-match'. => (lambda (end) ($query descend-subnode ((qy$end end))))) (else ($query descend-subnode))))) ;;; ;;; Macro layer for endpoint definitions. ;;; (eval-when (compile load eval) (deftype path-spec (path-stx) (path-rest-stx)) (define (syntax->path-spec x) (let loop ((path-segments '()) (path-stx x)) (syntax-case path-stx (unquote unquote-splicing) (() (path-spec (path-stx #``(#,@(reverse path-segments))) (path-rest-stx #f))) (rest-var (identifier? #'rest-var) (path-spec (path-stx #``(#,@(reverse path-segments))) (path-rest-stx #'rest-var))) ((unquote list-expression) (path-spec (path-stx #``(#,@(reverse path-segments) . ,list-expression)) (path-rest-stx #f))) ((segment-arg . rest) (identifier? #'segment-arg) (raise (s35:condition (s35:&error) (s35:&message (message (format #f "Binding identifiers to path segments ~ not yet supported")))))) (((unquote segment-expression) . rest) (loop (cons #',segment-expression path-segments) #'rest)) (((unquote-splicing segments-expression) . rest) (loop (cons #',@segments-expression path-segments) #'rest)) ((segment . rest) (string? (syntax->datum #'segment)) (loop (cons #'segment path-segments) #'rest)))))) (export define-endpoint) (define-syntax define-endpoint (lambda (x) (syntax-case x () ((source-stx endset-identifier (handler-identifier . handler-formals) path arg-converters-list handler-body handler-body-rest ...) (and (identifier? #'endset-identifier) (identifier? #'handler-identifier)) (match-let* ((($* path-spec path-stx path-rest-stx) (syntax->path-spec #'path)) (full-path-stx #``(,@#,path-stx . #,(or path-rest-stx '()))) (stx (cut datum->syntax #'source-stx <>)) (endpoint-matched-path (stx 'cant$endpoint-matched-path))) #`(begin (define evaluated-path #,path-stx) (define handler-identifier (let ((path-len (length evaluated-path))) #,((macro-transformer (module-ref (resolve-module '(cantius)) '$)) #`(source-stx lambda (. handler-formals) (let (#,@(if path-rest-stx #`((#,path-rest-stx (drop #,endpoint-matched-path path-len))) '())) handler-body handler-body-rest ...))))) (define endset-identifier (endset-+ endset-identifier #,(if path-rest-stx #``(,@evaluated-path . any) #'evaluated-path) (endpoint (arg-converters arg-converters-list) (handler handler-identifier)))))))))) ;;; ;;; Dispatching of requests to handlers, with error handling. ;;; (define %ugly-uri-path-regex (make-regexp ".+/(/|$)")) (define (exception->msg ex) (format #f "~a~%~a~%" (if (exception-with-message? ex) (format #f "Error: ~a." (exception-message ex)) "Error occured.") ex)) (define (bt-string) (call-with-output-string (lambda (port) (terminal-width 10000) (display-backtrace (make-stack #t 5 0) port)))) ($ define (meta-handler/no-redirect . rest) (define (invoke-http-code-handler code ex) (let ((msg (and (s35:condition-has-type? ex s35:&message) (s35:condition-message ex)))) ($ (cut apply ($ get-code-handler () code) <...>) () (append rest `(#:http-code ,code #:message ,msg))))) (guard (ex ((s35:condition-has-type? ex ¬-found) (invoke-http-code-handler 404 ex)) ((s35:condition-has-type? ex &forbidden) (invoke-http-code-handler 403 ex)) ((s35:condition-has-type? ex &bad-request) (invoke-http-code-handler 400 ex)) ((s35:condition-has-type? ex &condition-with-http-code) (invoke-http-code-handler (condition-http-code ex) ex))) (match ($ (cut (compose list apply) (endpoint-handler cant$current-endpoint) <...>) () rest) ((response-body) (values (build-response* cant$default-headers) response-body)) ((response response-body) (values response response-body)) (something-else (values (build-response* %default-headers #:code 500) (format #f "Invalid return values from handler:~%~a" something-else)))))) (define (web-server-handler request body root-endset) (let* ((uri (req:request-uri request)) (path-string (uri:uri-path uri)) (path-segments (uri:split-and-decode-uri-path path-string))) ($ define (meta-handler . rest) (define (normalized-uri-path-string) (format #f "/~a" (uri:encode-and-join-uri-path path-segments))) (define (redirect-path) (if cant$redirect/normalize-path? (normalized-uri-path-string) path-string)) (define (redirect-query) (if cant$redirect/remove-query? #f (uri:uri-query uri))) (if (or (and cant$redirect/normalize-path? (regexp-exec %ugly-uri-path-regex path-string)) (and cant$redirect/remove-query? (uri:uri-query uri))) (values (build-response* cant$default-headers #:code 307 #:redirect-to (uri:build-relative-ref #:path (redirect-path) #:query (redirect-query))) "307 Temporary Redirect...") ($ (cut apply meta-handler/no-redirect <...>) () rest))) (guard (ex ((and (getenv "CANTIUS_DEBUG") (bt-string)) => (lambda (backtrace) (let* ((desc (format #f "~a~%~ Backtrace:~%~%~a" (exception->msg ex) backtrace))) (format (current-error-port) "In ~a request to `~a':~%~%" (symbol->string (req:request-method request)) path-string) (display desc (current-error-port)) (values (build-response* %default-headers #:code 500) desc)))) (else (display (exception->msg ex) (current-error-port)) (values (build-response* %default-headers #:code 500) "Internal server error."))) (start-stack 'cantius-request ($query let*-values ((() (query-endset root-endset path-segments))) ($ (apply compose meta-handler (append (reverse (endpoint-arg-converters qy$end)) qy$arg-converters)) ((cant$resource-roots '()) (cant$redirect/normalize-path? #t) (cant$redirect/remove-query? #t) (cant$default-headers %default-headers) (cant$http-code-handlers %default-http-code-handlers) (cant$other-http-codes-handler default-other-http-codes-handler) (cant$request request) (cant$body body) (cant$current-path path-segments) (cant$current-endpoint qy$end) (cant$endpoint-matched-path qy$endpoint-matched-path)))))))) (define-public (run-cantius endset . server-args) (apply run-server (cut web-server-handler <> <> endset) server-args))