From c25fd8eeb136ee48e444b5cd91cee65426bb845f Mon Sep 17 00:00:00 2001 From: Ludovic Courtès Date: Sun, 1 Dec 2024 17:46:43 +0100 Subject: Revert "gnu: ucx: Update to 1.17.0." MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 7c36577cdec8185ea7df608eb9a68424e19ee9cb, which broke unit tests running in the chroot build environment. For instance, tests of ‘hdf5-parallel-openmpi’ would fail with: Testing ../../src/h5diff/ph5diff -c 1_vds.h5 2_vds.h5 *FAILED* ====Expected result (expect_sorted) differs from actual result (actual_sorted) *** expect_sorted 2024-12-01 16:41:03.795879595 +0000 --- actual_sorted 2024-12-01 16:41:03.791879542 +0000 *************** *** 1,3 **** --- 1,15 ---- Not comparable: has rank 3, dimensions [5x18x8], max dimensions [18446744073709551615x18x8] Warning: or is a virtual dataset + [1733071262.437780] [localhost:33263:0] tcp_iface.c:893 UCX ERROR scandir(/sys/class/net) failed: No such file or directory + [1733071262.468994] [localhost:33247:0] tcp_iface.c:893 UCX ERROR scandir(/sys/class/net) failed: No such file or directory + [1733071262.479911] [localhost:33242:0] tcp_iface.c:893 UCX ERROR scandir(/sys/class/net) failed: No such file or directory Change-Id: I9c08d3c5375a2abd7d78344dfbef9b407828d1e0 --- gnu/local.mk | 1 + gnu/packages/fabric-management.scm | 18 ++-- gnu/packages/patches/ucx-tcp-iface-ioctl.patch | 127 +++++++++++++++++++++++++ 3 files changed, 134 insertions(+), 12 deletions(-) create mode 100644 gnu/packages/patches/ucx-tcp-iface-ioctl.patch diff --git a/gnu/local.mk b/gnu/local.mk index 5f31741f3c..c89fd88282 100644 --- a/gnu/local.mk +++ b/gnu/local.mk @@ -2254,6 +2254,7 @@ dist_patch_DATA = \ %D%/packages/patches/u-boot-calloc-visibility.patch \ %D%/packages/patches/u-boot-nintendo-nes-serial.patch \ %D%/packages/patches/u-boot-rockchip-inno-usb.patch \ + %D%/packages/patches/ucx-tcp-iface-ioctl.patch \ %D%/packages/patches/ultrastar-deluxe-no-freesans.patch \ %D%/packages/patches/ungoogled-chromium-extension-search-path.patch \ %D%/packages/patches/ungoogled-chromium-ffmpeg-compat.patch \ diff --git a/gnu/packages/fabric-management.scm b/gnu/packages/fabric-management.scm index 9b37314e65..7e539406db 100644 --- a/gnu/packages/fabric-management.scm +++ b/gnu/packages/fabric-management.scm @@ -185,22 +185,17 @@ testing InfiniBand networks.") (define-public ucx (package (name "ucx") - (version "1.17.0") + (version "1.15.0") (source (origin (method git-fetch) (uri (git-reference (url "https://github.com/openucx/ucx") (commit (string-append "v" version)))) (file-name (git-file-name name version)) + (patches (search-patches "ucx-tcp-iface-ioctl.patch")) (sha256 (base32 - "09182kx60kq7iyjyz3mpcrgp1mm0lnpc0f4hd4hlw5yyabkxrpa1")) - (snippet - ;; As seen in commit b0a275a5492125a13020cd095fe9934e0b5e7c6a. - #~(begin (use-modules (guix build utils)) - (substitute* "src/ucs/time/time.h" - (("#include ") - "#include \n#include ")))))) + "1mk46vyfp8hsivk88s8gv0nf458jfs59fczpf66wwa3a9yp324jp")))) (build-system gnu-build-system) (arguments (list @@ -241,7 +236,6 @@ memory mechanisms for efficient intra-node communication.") (home-page "https://www.openucx.org/") (license bsd-3) - ;; lists only PowerPC64, AArch64, RISC-V - ;; and x86_64 as supported. - (supported-systems '("x86_64-linux" "aarch64-linux" "powerpc64le-linux" - "riscv64-linux")))) + ;; lists only PowerPC64, AArch64, and x86_64 as + ;; supported. + (supported-systems '("x86_64-linux" "aarch64-linux" "powerpc64le-linux")))) diff --git a/gnu/packages/patches/ucx-tcp-iface-ioctl.patch b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch new file mode 100644 index 0000000000..2a0e4ce138 --- /dev/null +++ b/gnu/packages/patches/ucx-tcp-iface-ioctl.patch @@ -0,0 +1,127 @@ +Since /sys is unavailable in build environments, the list of available +TCP network interfaces cannot be obtained via /sys/class/net. This patch +provides alternative code that uses the SIOCGIFCONF ioctl to get the +names of the available TCP network interfaces. + +Initially submitted at . + +diff --git a/src/uct/tcp/tcp_iface.c b/src/uct/tcp/tcp_iface.c +index 6a6cd34fa..af32bb2e9 100644 +--- a/src/uct/tcp/tcp_iface.c ++++ b/src/uct/tcp/tcp_iface.c +@@ -18,6 +18,8 @@ + #include + #include + #include ++#include ++#include + + #define UCT_TCP_IFACE_NETDEV_DIR "/sys/class/net" + +@@ -875,6 +877,85 @@ static UCS_CLASS_DEFINE_NEW_FUNC(uct_tcp_iface_t, uct_iface_t, uct_md_h, + uct_worker_h, const uct_iface_params_t*, + const uct_iface_config_t*); + ++/* Fetch information about available network devices through an ioctl. */ ++static ucs_status_t uct_tcp_query_devices_ioctl(uct_md_h md, ++ uct_tl_device_resource_t **devices_p, ++ unsigned *num_devices_p) ++{ ++ int sock, err, i; ++ uct_tl_device_resource_t *devices, *tmp; ++ unsigned num_devices; ++ ucs_status_t status; ++ struct ifconf conf; ++ ++ conf.ifc_len = 0; ++ conf.ifc_req = NULL; ++ ++ status = ucs_socket_create(AF_INET, SOCK_STREAM, &sock); ++ if (status != UCS_OK) { ++ goto out; ++ } ++ ++ err = ioctl(sock, SIOCGIFCONF, &conf); ++ if (err < 0) { ++ ucs_error("ioctl(SIOCGIFCONF) failed: %m"); ++ status = UCS_ERR_IO_ERROR; ++ goto out; ++ } ++ ++ conf.ifc_req = ucs_calloc(1, conf.ifc_len, "ifreq"); ++ if (conf.ifc_req == NULL) { ++ ucs_error("memory alocation failed"); ++ status = UCS_ERR_NO_MEMORY; ++ goto out; ++ } ++ ++ err = ioctl(sock, SIOCGIFCONF, &conf); ++ if (err < 0) { ++ ucs_error("ioctl(SIOCGIFCONF) failed: %m"); ++ status = UCS_ERR_IO_ERROR; ++ goto out_free; ++ } ++ ++ devices = NULL; ++ num_devices = 0; ++ for (i = 0; i < (conf.ifc_len / sizeof(struct ifreq)); i++) { ++ const char *name = conf.ifc_req[i].ifr_name; ++ sa_family_t family = conf.ifc_req[i].ifr_addr.sa_family; ++ ++ if (!ucs_netif_is_active(name, family)) { ++ continue; ++ } ++ ++ tmp = ucs_realloc(devices, sizeof(*devices) * (num_devices + 1), ++ "tcp devices"); ++ if (tmp == NULL) { ++ ucs_free(devices); ++ status = UCS_ERR_NO_MEMORY; ++ goto out_free; ++ } ++ devices = tmp; ++ ++ ucs_snprintf_zero(devices[num_devices].name, ++ sizeof(devices[num_devices].name), ++ "%s", name); ++ devices[num_devices].type = UCT_DEVICE_TYPE_NET; ++ ++num_devices; ++ } ++ ++ *num_devices_p = num_devices; ++ *devices_p = devices; ++ status = UCS_OK; ++ ++out_free: ++ ucs_free(conf.ifc_req); ++out: ++ if (sock >= 0) { ++ close(sock); ++ } ++ return status; ++} ++ + ucs_status_t uct_tcp_query_devices(uct_md_h md, + uct_tl_device_resource_t **devices_p, + unsigned *num_devices_p) +@@ -893,9 +974,9 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, + + dir = opendir(UCT_TCP_IFACE_NETDEV_DIR); + if (dir == NULL) { +- ucs_error("opendir(%s) failed: %m", UCT_TCP_IFACE_NETDEV_DIR); +- status = UCS_ERR_IO_ERROR; +- goto out; ++ /* When /sys is unavailable, as can be the case in a container, ++ * resort to a good old 'ioctl'. */ ++ return uct_tcp_query_devices_ioctl(md, devices_p, num_devices_p); + } + + devices = NULL; +@@ -963,7 +1044,6 @@ ucs_status_t uct_tcp_query_devices(uct_md_h md, + + out_closedir: + closedir(dir); +-out: + return status; + } + -- cgit v1.2.3