Merge remote-tracking branch 'origin/master' into core-updates

author: Efraim Flashner <efraim@flashner.co.il> 2021-03-24 15:28:33 +0200
committer: Efraim Flashner <efraim@flashner.co.il> 2021-03-24 20:50:44 +0200
commit: 2aab587f842908a886e3bd08b028885dddd650e0 (patch)
tree: 87c0723a9ae2c69ab6920d90b6e87ad8510492fe /gnu/packages/patches/unzip-alt-iconv-utf8-print.patch
parent: 5664bcdcb0e4c10dfe48dd5e4730fc3c746a21e2 (diff)
parent: 65c46e79e0495fe4d32f6f2725d7233fff10fd70 (diff)
download: guix-2aab587f842908a886e3bd08b028885dddd650e0.tar.gz
guix-2aab587f842908a886e3bd08b028885dddd650e0.zip
1 files changed, 381 insertions, 0 deletions
diff --git a/gnu/packages/patches/unzip-alt-iconv-utf8-print.patch b/gnu/packages/patches/unzip-alt-iconv-utf8-print.patch
new file mode 100644
index 0000000000..0b0153ba54
--- /dev/null
+++ b/gnu/packages/patches/unzip-alt-iconv-utf8-print.patch
@@ -0,0 +1,381 @@
+From ca0212ba19b64488b9e8459a762c11ecd6e7d0bd Mon Sep 17 00:00:00 2001
+From: Petr Stodulka <pstodulk@redhat.com>
+Date: Tue, 24 Nov 2015 17:56:11 +0100
+Subject: [PATCH] print correctly non-ascii filenames
+
+---
+ extract.c | 289 ++++++++++++++++++++++++++++++++++++++++++++++++--------------
+ unzpriv.h |   7 ++
+ 2 files changed, 233 insertions(+), 63 deletions(-)
+
+diff --git a/extract.c b/extract.c
+index 0ee4e93..741b7e0 100644
+--- a/extract.c
++++ b/extract.c
+@@ -2648,8 +2648,21 @@ static void set_deferred_symlink(__G__ slnk_entry)
+ } /* end function set_deferred_symlink() */
+ #endif /* SYMLINKS */
+ 
++/*
++ * If Unicode is supported, assume we have what we need to do this
++ * check using wide characters, avoiding MBCS issues.
++ */
+ 
+-
++#ifndef UZ_FNFILTER_REPLACECHAR
++        /* A convenient choice for the replacement of unprintable char codes is
++         * the "single char wildcard", as this character is quite unlikely to
++         * appear in filenames by itself.  The following default definition
++         * sets the replacement char to a question mark as the most common
++         * "single char wildcard"; this setting should be overridden in the
++         * appropiate system-specific configuration header when needed.
++         */
++# define UZ_FNFILTER_REPLACECHAR      '?'
++#endif
+ 
+ /*************************/
+ /*  Function fnfilter()  */        /* here instead of in list.c for SFX */
+@@ -2661,48 +2674,168 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
+     extent size;
+ {
+ #ifndef NATIVE   /* ASCII:  filter ANSI escape codes, etc. */
+-    ZCONST uch *r=(ZCONST uch *)raw;
++    ZCONST uch *r; // =(ZCONST uch *)raw;
+     uch *s=space;
+     uch *slim=NULL;
+     uch *se=NULL;
+     int have_overflow = FALSE;
+ 
+-    if (size > 0) {
+-        slim = space + size
+-#ifdef _MBCS
+-                     - (MB_CUR_MAX - 1)
+-#endif
+-                     - 4;
++# if defined( UNICODE_SUPPORT) && defined( _MBCS)
++/* If Unicode support is enabled, and we have multi-byte characters,
++ * then do the isprint() checks by first converting to wide characters
++ * and checking those.  This avoids our having to parse multi-byte
++ * characters for ourselves.  After the wide-char replacements have been
++ * made, the wide string is converted back to the local character set.
++ */
++    wchar_t *wstring;    /* wchar_t version of raw */
++    size_t wslen;        /* length of wstring */
++    wchar_t *wostring;   /* wchar_t version of output string */
++    size_t woslen;       /* length of wostring */
++    char *newraw;        /* new raw */
++
++    /* 2012-11-06 SMS.
++     * Changed to check the value returned by mbstowcs(), and bypass the
++     * Unicode processing if it fails.  This seems to fix a problem
++     * reported in the SourceForge forum, but it's not clear that we
++     * should be doing any Unicode processing without some evidence that
++     * the name actually is Unicode.  (Check bit 11 in the flags before
++     * coming here?)
++     * http://sourceforge.net/p/infozip/bugs/40/
++     */
++
++    if (MB_CUR_MAX <= 1)
++    {
++        /* There's no point to converting multi-byte chars if there are
++         * no multi-byte chars.
++         */
++        wslen = (size_t)-1;
+     }
+-    while (*r) {
+-        if (size > 0 && s >= slim && se == NULL) {
+-            se = s;
++    else
++    {
++        /* Get Unicode wide character count (for storage allocation). */
++        wslen = mbstowcs( NULL, raw, 0);
++    }
++
++    if (wslen != (size_t)-1)
++    {
++        /* Apparently valid Unicode.  Allocate wide-char storage. */
++        wstring = (wchar_t *)malloc((wslen + 1) * sizeof(wchar_t));
++        if (wstring == NULL) {
++            strcpy( (char *)space, raw);
++            return (char *)space;
+         }
+-#ifdef QDOS
+-        if (qlflag & 2) {
+-            if (*r == '/' || *r == '.') {
++        wostring = (wchar_t *)malloc(2 * (wslen + 1) * sizeof(wchar_t));
++        if (wostring == NULL) {
++            free(wstring);
++            strcpy( (char *)space, raw);
++            return (char *)space;
++        }
++
++        /* Convert the multi-byte Unicode to wide chars. */
++        wslen = mbstowcs(wstring, raw, wslen + 1);
++
++        /* Filter the wide-character string. */
++        fnfilterw( wstring, wostring, (2 * (wslen + 1) * sizeof(wchar_t)));
++
++        /* Convert filtered wide chars back to multi-byte. */
++        woslen = wcstombs( NULL, wostring, 0);
++        if ((newraw = malloc(woslen + 1)) == NULL) {
++            free(wstring);
++            free(wostring);
++            strcpy( (char *)space, raw);
++            return (char *)space;
++        }
++        woslen = wcstombs( newraw, wostring, (woslen * MB_CUR_MAX) + 1);
++
++        if (size > 0) {
++            slim = space + size - 4;
++        }
++        r = (ZCONST uch *)newraw;
++        while (*r) {
++            if (size > 0 && s >= slim && se == NULL) {
++                se = s;
++            }
++#  ifdef QDOS
++            if (qlflag & 2) {
++                if (*r == '/' || *r == '.') {
++                    if (se != NULL && (s > (space + (size-3)))) {
++                        have_overflow = TRUE;
++                        break;
++                    }
++                    ++r;
++                    *s++ = '_';
++                    continue;
++                }
++            } else
++#  endif
++            {
+                 if (se != NULL && (s > (space + (size-3)))) {
+                     have_overflow = TRUE;
+                     break;
+                 }
+-                ++r;
+-                *s++ = '_';
+-                continue;
++                *s++ = *r++;
+             }
+-        } else
++        }
++        if (have_overflow) {
++            strcpy((char *)se, "...");
++        } else {
++            *s = '\0';
++        }
++
++        free(wstring);
++        free(wostring);
++        free(newraw);
++    }
++    else
++# endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
++    {
++        /* No Unicode support, or apparently invalid Unicode. */
++        r = (ZCONST uch *)raw;
++
++        if (size > 0) {
++            slim = space + size
++#ifdef _MBCS
++                         - (MB_CUR_MAX - 1)
++#endif
++                         - 4;
++        }
++        while (*r) {
++            if (size > 0 && s >= slim && se == NULL) {
++                se = s;
++            }
++#ifdef QDOS
++            if (qlflag & 2) {
++                if (*r == '/' || *r == '.') {
++                    if (se != NULL && (s > (space + (size-3)))) {
++                        have_overflow = TRUE;
++                        break;
++                    }
++                    ++r;
++                    *s++ = '_';
++                    continue;
++                }
++            } else
+ #endif
+ #ifdef HAVE_WORKING_ISPRINT
+-# ifndef UZ_FNFILTER_REPLACECHAR
+-    /* A convenient choice for the replacement of unprintable char codes is
+-     * the "single char wildcard", as this character is quite unlikely to
+-     * appear in filenames by itself.  The following default definition
+-     * sets the replacement char to a question mark as the most common
+-     * "single char wildcard"; this setting should be overridden in the
+-     * appropiate system-specific configuration header when needed.
+-     */
+-#   define UZ_FNFILTER_REPLACECHAR      '?'
+-# endif
+-        if (!isprint(*r)) {
++            if (!isprint(*r)) {
++                if (*r < 32) {
++                    /* ASCII control codes are escaped as "^{letter}". */
++                    if (se != NULL && (s > (space + (size-4)))) {
++                        have_overflow = TRUE;
++                        break;
++                    }
++                    *s++ = '^', *s++ = (uch)(64 + *r++);
++                } else {
++                    /* Other unprintable codes are replaced by the
++                     * placeholder character. */
++                    if (se != NULL && (s > (space + (size-3)))) {
++                        have_overflow = TRUE;
++                        break;
++                    }
++                    *s++ = UZ_FNFILTER_REPLACECHAR;
++                    INCSTR(r);
++                }
++#else /* !HAVE_WORKING_ISPRINT */
+             if (*r < 32) {
+                 /* ASCII control codes are escaped as "^{letter}". */
+                 if (se != NULL && (s > (space + (size-4)))) {
+@@ -2710,47 +2843,30 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
+                     break;
+                 }
+                 *s++ = '^', *s++ = (uch)(64 + *r++);
++#endif /* ?HAVE_WORKING_ISPRINT */
+             } else {
+-                /* Other unprintable codes are replaced by the
+-                 * placeholder character. */
++#ifdef _MBCS
++                unsigned i = CLEN(r);
++                if (se != NULL && (s > (space + (size-i-2)))) {
++                    have_overflow = TRUE;
++                    break;
++                }
++                for (; i > 0; i--)
++                    *s++ = *r++;
++#else
+                 if (se != NULL && (s > (space + (size-3)))) {
+                     have_overflow = TRUE;
+                     break;
+                 }
+-                *s++ = UZ_FNFILTER_REPLACECHAR;
+-                INCSTR(r);
+-            }
+-#else /* !HAVE_WORKING_ISPRINT */
+-        if (*r < 32) {
+-            /* ASCII control codes are escaped as "^{letter}". */
+-            if (se != NULL && (s > (space + (size-4)))) {
+-                have_overflow = TRUE;
+-                break;
+-            }
+-            *s++ = '^', *s++ = (uch)(64 + *r++);
+-#endif /* ?HAVE_WORKING_ISPRINT */
+-        } else {
+-#ifdef _MBCS
+-            unsigned i = CLEN(r);
+-            if (se != NULL && (s > (space + (size-i-2)))) {
+-                have_overflow = TRUE;
+-                break;
+-            }
+-            for (; i > 0; i--)
+                 *s++ = *r++;
+-#else
+-            if (se != NULL && (s > (space + (size-3)))) {
+-                have_overflow = TRUE;
+-                break;
+-            }
+-            *s++ = *r++;
+ #endif
+-         }
+-    }
+-    if (have_overflow) {
+-        strcpy((char *)se, "...");
+-    } else {
+-        *s = '\0';
++             }
++        }
++        if (have_overflow) {
++            strcpy((char *)se, "...");
++        } else {
++            *s = '\0';
++        }
+     }
+ 
+ #ifdef WINDLL
+@@ -2772,6 +2888,53 @@ char *fnfilter(raw, space, size)   /* convert name to safely printable form */
+ } /* end function fnfilter() */
+ 
+ 
++#if defined( UNICODE_SUPPORT) && defined( _MBCS)
++
++/****************************/
++/*  Function fnfilter[w]()  */  /* (Here instead of in list.c for SFX.) */
++/****************************/
++
++/* fnfilterw() - Convert wide name to safely printable form. */
++
++/* fnfilterw() - Convert wide-character name to safely printable form. */
++
++wchar_t *fnfilterw( src, dst, siz)
++    ZCONST wchar_t *src;        /* Pointer to source char (string). */
++    wchar_t *dst;               /* Pointer to destination char (string). */
++    extent siz;                 /* Not used (!). */
++{
++    wchar_t *dsx = dst;
++
++    /* Filter the wide chars. */
++    while (*src)
++    {
++        if (iswprint( *src))
++        {
++            /* Printable code.  Copy it. */
++            *dst++ = *src;
++        }
++        else
++        {
++            /* Unprintable code.  Substitute something printable for it. */
++            if (*src < 32)
++            {
++                /* Replace ASCII control code with "^{letter}". */
++                *dst++ = (wchar_t)'^';
++                *dst++ = (wchar_t)(64 + *src);
++            }
++            else
++            {
++                /* Replace other unprintable code with the placeholder. */
++                *dst++ = (wchar_t)UZ_FNFILTER_REPLACECHAR;
++            }
++        }
++        src++;
++    }
++    *dst = (wchar_t)0;  /* NUL-terminate the destination string. */
++    return dsx;
++} /* fnfilterw(). */
++
++#endif /* defined( UNICODE_SUPPORT) && defined( _MBCS) */
+ 
+ 
+ #ifdef SET_DIR_ATTRIB
+diff --git a/unzpriv.h b/unzpriv.h
+index 22d3923..e48a652 100644
+--- a/unzpriv.h
++++ b/unzpriv.h
+@@ -1212,6 +1212,7 @@
+ # ifdef UNICODE_WCHAR
+ #  if !(defined(_WIN32_WCE) || defined(POCKET_UNZIP))
+ #   include <wchar.h>
++#   include <wctype.h>
+ #  endif
+ # endif
+ # ifndef _MBCS  /* no need to include <locale.h> twice, see below */
+@@ -2410,6 +2411,12 @@ int    memflush                  OF((__GPRO__ ZCONST uch *rawbuf, ulg size));
+ char  *fnfilter                  OF((ZCONST char *raw, uch *space,
+                                      extent size));
+ 
++# if defined( UNICODE_SUPPORT) && defined( _MBCS)
++wchar_t *fnfilterw               OF((ZCONST wchar_t *src, wchar_t *dst,
++                                     extent siz));
++#endif
++
++
+ /*---------------------------------------------------------------------------
+     Decompression functions:
+   ---------------------------------------------------------------------------*/
+-- 
+2.4.3
+
author	Efraim Flashner <efraim@flashner.co.il>	2021-03-24 15:28:33 +0200
committer	Efraim Flashner <efraim@flashner.co.il>	2021-03-24 20:50:44 +0200
commit	2aab587f842908a886e3bd08b028885dddd650e0 (patch)
tree	87c0723a9ae2c69ab6920d90b6e87ad8510492fe /gnu/packages/patches/unzip-alt-iconv-utf8-print.patch
parent	5664bcdcb0e4c10dfe48dd5e4730fc3c746a21e2 (diff)
parent	65c46e79e0495fe4d32f6f2725d7233fff10fd70 (diff)
download	guix-2aab587f842908a886e3bd08b028885dddd650e0.tar.gz guix-2aab587f842908a886e3bd08b028885dddd650e0.zip