aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLudovic Courtès <ludo@gnu.org>2024-11-28 23:20:00 +0100
committerLudovic Courtès <ludo@gnu.org>2024-12-01 20:14:16 +0100
commit5d6691d33ec0b1225696d2545763ec28effc32e9 (patch)
tree9d8d06185bedc079ffbfb2672a0345fa7501644d
parentdb59509931d11501b552101c73e70206fea763bf (diff)
downloadguix-5d6691d33ec0b1225696d2545763ec28effc32e9.tar.gz
guix-5d6691d33ec0b1225696d2545763ec28effc32e9.zip
gnu-maintenance: ‘generic-html’ update honors <base href="…">.
This fixes updates of ‘curl’: <https://curl.se/download/> includes <base href="…"> in its head and ignoring it would lead to incorrect download URLs. * guix/gnu-maintenance.scm (html-links): Keep track of <base href="…"> in ‘loop’. Rewrite relative links at the end. Change-Id: I989da78df3431034c9a584f8e10cad87ae6dc920
-rw-r--r--guix/gnu-maintenance.scm41
1 files changed, 28 insertions, 13 deletions
diff --git a/guix/gnu-maintenance.scm b/guix/gnu-maintenance.scm
index b612b11c00..ee4882326f 100644
--- a/guix/gnu-maintenance.scm
+++ b/guix/gnu-maintenance.scm
@@ -39,6 +39,7 @@
#:use-module (guix utils)
#:use-module (guix diagnostics)
#:use-module (guix i18n)
+ #:autoload (guix combinators) (fold2)
#:use-module (guix memoization)
#:use-module (guix records)
#:use-module (guix upstream)
@@ -483,19 +484,33 @@ hosted on ftp.gnu.org, or not under that name (this is the case for
(define (html-links sxml)
"Return the list of links found in SXML, the SXML tree of an HTML page."
- (let loop ((sxml sxml)
- (links '()))
- (match sxml
- (('a ('@ attributes ...) body ...)
- (match (assq 'href attributes)
- (#f (fold loop links body))
- (('href url) (fold loop (cons url links) body))))
- ((tag ('@ _ ...) body ...)
- (fold loop links body))
- ((tag body ...)
- (fold loop links body))
- (_
- links))))
+ (define-values (links base)
+ (let loop ((sxml sxml)
+ (links '())
+ (base #f))
+ (match sxml
+ (('a ('@ attributes ...) body ...)
+ (match (assq 'href attributes)
+ (#f (fold2 loop links base body))
+ (('href url) (fold2 loop (cons url links) base body))))
+ (('base ('@ ('href new-base)))
+ ;; The base against which relative URL paths must be resolved.
+ (values links new-base))
+ ((tag ('@ _ ...) body ...)
+ (fold2 loop links base body))
+ ((tag body ...)
+ (fold2 loop links base body))
+ (_
+ (values links base)))))
+
+ (if base
+ (map (lambda (link)
+ (let ((uri (string->uri link)))
+ (if (or uri (string-prefix? "/" link))
+ link
+ (in-vicinity base link))))
+ links)
+ links))
(define (url->links url)
"Return the unique links on the HTML page accessible at URL."