aboutsummaryrefslogtreecommitdiff
path: root/src/guile
diff options
context:
space:
mode:
authorW. Kosior <koszko@koszko.org>2024-07-18 17:39:41 +0200
committerW. Kosior <koszko@koszko.org>2024-07-23 19:14:54 +0200
commit803bb2fbd71463a75dfed288a9edcfe3ca4a72c6 (patch)
tree3065d8c6aee0f9f7c0eabb19256b34df0a6ac871 /src/guile
parent9dfc29b7d23a4c8b23286531cce54ad148d737b8 (diff)
downloadcantius-803bb2fbd71463a75dfed288a9edcfe3ca4a72c6.tar.gz
cantius-803bb2fbd71463a75dfed288a9edcfe3ca4a72c6.zip
Add `parse-multipart/form-data' procedure.
Diffstat (limited to 'src/guile')
-rw-r--r--src/guile/cantius.scm146
1 files changed, 145 insertions, 1 deletions
diff --git a/src/guile/cantius.scm b/src/guile/cantius.scm
index e51a32e..76cc5fa 100644
--- a/src/guile/cantius.scm
+++ b/src/guile/cantius.scm
@@ -3,6 +3,8 @@
;;; Copyright (C) 2023, 2024 Wojtek Kosior <koszko@koszko.org>
(define-module (cantius)
+ #:use-module ((rnrs bytevectors) #:prefix bv:)
+ #:use-module ((srfi srfi-1) #:select (filter-map))
#:use-module ((srfi srfi-26) #:select (cut))
#:use-module ((srfi srfi-34) #:select (guard raise))
#:use-module ((srfi srfi-35) #:prefix s35:)
@@ -13,7 +15,10 @@
#:use-module ((ice-9 control) #:select (let/ec))
#:use-module ((ice-9 exceptions) #:select
(exception-with-message? exception-message))
+ #:use-module ((ice-9 regex) #:select
+ (match:substring match:end regexp-substitute/global))
#:use-module ((system repl debug) #:select (terminal-width))
+ #:use-module ((web http) #:select (read-headers))
#:use-module ((web request) #:select (request-uri))
#:use-module ((web response) #:select (build-response))
#:use-module ((web uri) #:prefix uri:)
@@ -41,7 +46,7 @@
(export &condition-with-http-code)
(export condition-with-http-code?)
-
+(export condition-http-code)
(s35:define-condition-type &condition-with-http-code s35:&condition
condition-with-http-code?
(http-code condition-http-code))
@@ -102,6 +107,145 @@
+;;;
+;;; Multipart form parser
+;;;
+
+(define-immutable-record-type* form-data-part
+ #:export? #t
+ (name)
+ (headers)
+ (binary?)
+ (data))
+
+(define (hex-escape bytestring)
+ (with-output-to-string
+ (lambda ()
+ (let loop ((idx 0))
+ (when (< idx (bv:bytevector-length bytestring))
+ (let ((byte (bv:bytevector-u8-ref bytestring idx)))
+ (cond ((or (> byte 127)
+ (= byte (char->integer #\\)))
+ (format #t "\\x~2,'0x" byte))
+ (else
+ (display (integer->char byte)))))
+ (loop (1+ idx)))))))
+
+(define (hex-unescape string)
+ ;; We assume a backslash is always followed by `x' and 2 digits (this is
+ ;; indeed the case if the string has been generated by `hex-escape' above).
+ (define byte-length
+ (- (string-length string)
+ (* 3 (string-count string #\\))))
+
+ (define bytestring
+ (bv:make-bytevector byte-length))
+
+ (let loop ((byte-idx 0)
+ (str-idx 0))
+
+ (define (write-string str)
+ (string-fold (lambda (char char-idx)
+ (bv:bytevector-u8-set! bytestring char-idx
+ (char->integer char))
+ (1+ char-idx))
+ byte-idx str))
+
+ (match (string-contains string "\\x" str-idx)
+
+ (#f
+ (write-string (substring string str-idx))
+ bytestring)
+
+ ((and (= (cut substring string str-idx <>) text-until)
+ escape-idx)
+ (let* ((byte-idx* (write-string text-until))
+ (str-idx* (+ 4 escape-idx))
+ (escape-hex (substring string (+ 2 escape-idx) str-idx*))
+ (byte (string->number escape-hex 16)))
+ (bv:bytevector-u8-set! bytestring byte-idx* byte)
+ (loop (1+ byte-idx*) str-idx*))))))
+
+(define %part-regex
+ ;; Headers, 2 new lines, actual data and one new line at the end.
+ (make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$"))
+
+(define %backslash-regex
+ (make-regexp "\\\\"))
+
+(define-public (parse-multipart/form-data boundary data)
+ ;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of
+ ;; procedure so we instead convert our data to ASCII-only string (with
+ ;; non-ASCII bytes expressed as hex escapes) and use regexes to split this
+ ;; string around the occurences of boundary in O(n) time.
+
+ (define boundary*
+ ;; A backslash is theoretically legal in boundary, let's make sure it's also
+ ;; escaped.
+ (format #f "--~a"
+ (regexp-substitute/global #f %backslash-regex boundary
+ 'pre "\\x5c" 'post)))
+
+ (define data*
+ (hex-escape data))
+
+ (define parts
+ (let loop ((idx 0)
+ (parts '()))
+ (match (string-contains data* boundary* idx)
+
+ (#f
+ (unless (string-prefix? "--\r\n" (substring data* idx))
+ (raise (s35:condition
+ (&bad-request)
+ (s35:&message
+ (message "Last boundary in multipart/form-data should end with extra double hyphen followed by CRLF")))))
+ ;; Discard the last "part" (actually, the epilogue).
+ (reverse parts))
+
+ ((and (= (cut substring data* idx <>) part)
+ (= (cut + <> (string-length boundary*)) idx*))
+ ;; Discard the first "part" (actually, the preamble).
+ (loop idx* (if (= idx 0)
+ parts
+ (cons part parts)))))))
+
+ (map (lambda (part)
+ (match-let* (((and (= (cut match:substring <> 1) headers-string)
+ (= (cut match:substring <> 3) data))
+ (regexp-exec %part-regex part))
+
+ (headers
+ (call-with-input-string headers-string read-headers))
+
+ (('form-data . (= (cut assq-ref <> 'name)
+ (? string? name)))
+ (assq-ref headers 'content-disposition))
+
+ (content-transfer-encoding
+ (assq-ref headers 'content-transfer-encoding)))
+
+ (unless (member content-transfer-encoding '(#f "binary"))
+ (raise (s35:condition
+ (&condition-with-http-code
+ (http-code 415)) ;; Unsupported Media Type
+ (s35:&message
+ (message (format #f "Content-Transfer-Encoding of type \"~a\" is not currently supported for multipart/form-data parts, please use \"binary\" instead"
+ content-transfer-encoding))))))
+
+ (form-data-part
+ name
+ headers
+ (binary? (not (member (assq-ref headers 'content-type)
+ '(#f (text/plain)))))
+ (data (let ((data-binary (hex-unescape data)))
+ (if binary?
+ data-binary
+ (bv:utf8->string data-binary)))))))
+ parts))
+
+
+
(define-public %current-path
(make-parameter #f))