diff options
author | W. Kosior <koszko@koszko.org> | 2024-07-18 17:39:41 +0200 |
---|---|---|
committer | W. Kosior <koszko@koszko.org> | 2024-07-23 19:14:54 +0200 |
commit | 803bb2fbd71463a75dfed288a9edcfe3ca4a72c6 (patch) | |
tree | 3065d8c6aee0f9f7c0eabb19256b34df0a6ac871 /src/guile | |
parent | 9dfc29b7d23a4c8b23286531cce54ad148d737b8 (diff) | |
download | cantius-803bb2fbd71463a75dfed288a9edcfe3ca4a72c6.tar.gz cantius-803bb2fbd71463a75dfed288a9edcfe3ca4a72c6.zip |
Add `parse-multipart/form-data' procedure.
Diffstat (limited to 'src/guile')
-rw-r--r-- | src/guile/cantius.scm | 146 |
1 files changed, 145 insertions, 1 deletions
diff --git a/src/guile/cantius.scm b/src/guile/cantius.scm index e51a32e..76cc5fa 100644 --- a/src/guile/cantius.scm +++ b/src/guile/cantius.scm @@ -3,6 +3,8 @@ ;;; Copyright (C) 2023, 2024 Wojtek Kosior <koszko@koszko.org> (define-module (cantius) + #:use-module ((rnrs bytevectors) #:prefix bv:) + #:use-module ((srfi srfi-1) #:select (filter-map)) #:use-module ((srfi srfi-26) #:select (cut)) #:use-module ((srfi srfi-34) #:select (guard raise)) #:use-module ((srfi srfi-35) #:prefix s35:) @@ -13,7 +15,10 @@ #:use-module ((ice-9 control) #:select (let/ec)) #:use-module ((ice-9 exceptions) #:select (exception-with-message? exception-message)) + #:use-module ((ice-9 regex) #:select + (match:substring match:end regexp-substitute/global)) #:use-module ((system repl debug) #:select (terminal-width)) + #:use-module ((web http) #:select (read-headers)) #:use-module ((web request) #:select (request-uri)) #:use-module ((web response) #:select (build-response)) #:use-module ((web uri) #:prefix uri:) @@ -41,7 +46,7 @@ (export &condition-with-http-code) (export condition-with-http-code?) - +(export condition-http-code) (s35:define-condition-type &condition-with-http-code s35:&condition condition-with-http-code? (http-code condition-http-code)) @@ -102,6 +107,145 @@ +;;; +;;; Multipart form parser +;;; + +(define-immutable-record-type* form-data-part + #:export? #t + (name) + (headers) + (binary?) + (data)) + +(define (hex-escape bytestring) + (with-output-to-string + (lambda () + (let loop ((idx 0)) + (when (< idx (bv:bytevector-length bytestring)) + (let ((byte (bv:bytevector-u8-ref bytestring idx))) + (cond ((or (> byte 127) + (= byte (char->integer #\\))) + (format #t "\\x~2,'0x" byte)) + (else + (display (integer->char byte))))) + (loop (1+ idx))))))) + +(define (hex-unescape string) + ;; We assume a backslash is always followed by `x' and 2 digits (this is + ;; indeed the case if the string has been generated by `hex-escape' above). + (define byte-length + (- (string-length string) + (* 3 (string-count string #\\)))) + + (define bytestring + (bv:make-bytevector byte-length)) + + (let loop ((byte-idx 0) + (str-idx 0)) + + (define (write-string str) + (string-fold (lambda (char char-idx) + (bv:bytevector-u8-set! bytestring char-idx + (char->integer char)) + (1+ char-idx)) + byte-idx str)) + + (match (string-contains string "\\x" str-idx) + + (#f + (write-string (substring string str-idx)) + bytestring) + + ((and (= (cut substring string str-idx <>) text-until) + escape-idx) + (let* ((byte-idx* (write-string text-until)) + (str-idx* (+ 4 escape-idx)) + (escape-hex (substring string (+ 2 escape-idx) str-idx*)) + (byte (string->number escape-hex 16))) + (bv:bytevector-u8-set! bytestring byte-idx* byte) + (loop (1+ byte-idx*) str-idx*)))))) + +(define %part-regex + ;; Headers, 2 new lines, actual data and one new line at the end. + (make-regexp "\r\n(([^\n]+\r\n)+\r\n)(.*)\r\n$")) + +(define %backslash-regex + (make-regexp "\\\\")) + +(define-public (parse-multipart/form-data boundary data) + ;; `data' is a bytevector but Scheme lacks a `bytevector-contains' kind of + ;; procedure so we instead convert our data to ASCII-only string (with + ;; non-ASCII bytes expressed as hex escapes) and use regexes to split this + ;; string around the occurences of boundary in O(n) time. + + (define boundary* + ;; A backslash is theoretically legal in boundary, let's make sure it's also + ;; escaped. + (format #f "--~a" + (regexp-substitute/global #f %backslash-regex boundary + 'pre "\\x5c" 'post))) + + (define data* + (hex-escape data)) + + (define parts + (let loop ((idx 0) + (parts '())) + (match (string-contains data* boundary* idx) + + (#f + (unless (string-prefix? "--\r\n" (substring data* idx)) + (raise (s35:condition + (&bad-request) + (s35:&message + (message "Last boundary in multipart/form-data should end with extra double hyphen followed by CRLF"))))) + ;; Discard the last "part" (actually, the epilogue). + (reverse parts)) + + ((and (= (cut substring data* idx <>) part) + (= (cut + <> (string-length boundary*)) idx*)) + ;; Discard the first "part" (actually, the preamble). + (loop idx* (if (= idx 0) + parts + (cons part parts))))))) + + (map (lambda (part) + (match-let* (((and (= (cut match:substring <> 1) headers-string) + (= (cut match:substring <> 3) data)) + (regexp-exec %part-regex part)) + + (headers + (call-with-input-string headers-string read-headers)) + + (('form-data . (= (cut assq-ref <> 'name) + (? string? name))) + (assq-ref headers 'content-disposition)) + + (content-transfer-encoding + (assq-ref headers 'content-transfer-encoding))) + + (unless (member content-transfer-encoding '(#f "binary")) + (raise (s35:condition + (&condition-with-http-code + (http-code 415)) ;; Unsupported Media Type + (s35:&message + (message (format #f "Content-Transfer-Encoding of type \"~a\" is not currently supported for multipart/form-data parts, please use \"binary\" instead" + content-transfer-encoding)))))) + + (form-data-part + name + headers + (binary? (not (member (assq-ref headers 'content-type) + '(#f (text/plain))))) + (data (let ((data-binary (hex-unescape data))) + (if binary? + data-binary + (bv:utf8->string data-binary))))))) + parts)) + + + (define-public %current-path (make-parameter #f)) |