diff options
author | Ludovic Courtès <ludo@gnu.org> | 2024-11-28 23:20:00 +0100 |
---|---|---|
committer | Ludovic Courtès <ludo@gnu.org> | 2024-12-01 20:14:16 +0100 |
commit | 5d6691d33ec0b1225696d2545763ec28effc32e9 (patch) | |
tree | 9d8d06185bedc079ffbfb2672a0345fa7501644d | |
parent | db59509931d11501b552101c73e70206fea763bf (diff) | |
download | guix-5d6691d33ec0b1225696d2545763ec28effc32e9.tar.gz guix-5d6691d33ec0b1225696d2545763ec28effc32e9.zip |
gnu-maintenance: ‘generic-html’ update honors <base href="…">.
This fixes updates of ‘curl’: <https://curl.se/download/> includes
<base href="…"> in its head and ignoring it would lead to incorrect
download URLs.
* guix/gnu-maintenance.scm (html-links): Keep track of <base href="…">
in ‘loop’. Rewrite relative links at the end.
Change-Id: I989da78df3431034c9a584f8e10cad87ae6dc920
-rw-r--r-- | guix/gnu-maintenance.scm | 41 |
1 files changed, 28 insertions, 13 deletions
diff --git a/guix/gnu-maintenance.scm b/guix/gnu-maintenance.scm index b612b11c00..ee4882326f 100644 --- a/guix/gnu-maintenance.scm +++ b/guix/gnu-maintenance.scm @@ -39,6 +39,7 @@ #:use-module (guix utils) #:use-module (guix diagnostics) #:use-module (guix i18n) + #:autoload (guix combinators) (fold2) #:use-module (guix memoization) #:use-module (guix records) #:use-module (guix upstream) @@ -483,19 +484,33 @@ hosted on ftp.gnu.org, or not under that name (this is the case for (define (html-links sxml) "Return the list of links found in SXML, the SXML tree of an HTML page." - (let loop ((sxml sxml) - (links '())) - (match sxml - (('a ('@ attributes ...) body ...) - (match (assq 'href attributes) - (#f (fold loop links body)) - (('href url) (fold loop (cons url links) body)))) - ((tag ('@ _ ...) body ...) - (fold loop links body)) - ((tag body ...) - (fold loop links body)) - (_ - links)))) + (define-values (links base) + (let loop ((sxml sxml) + (links '()) + (base #f)) + (match sxml + (('a ('@ attributes ...) body ...) + (match (assq 'href attributes) + (#f (fold2 loop links base body)) + (('href url) (fold2 loop (cons url links) base body)))) + (('base ('@ ('href new-base))) + ;; The base against which relative URL paths must be resolved. + (values links new-base)) + ((tag ('@ _ ...) body ...) + (fold2 loop links base body)) + ((tag body ...) + (fold2 loop links base body)) + (_ + (values links base))))) + + (if base + (map (lambda (link) + (let ((uri (string->uri link))) + (if (or uri (string-prefix? "/" link)) + link + (in-vicinity base link)))) + links) + links)) (define (url->links url) "Return the unique links on the HTML page accessible at URL." |