diff options
author | Sam James <sam@gentoo.org> | 2022-08-16 03:28:55 +0100 |
---|---|---|
committer | Sam James <sam@gentoo.org> | 2022-08-16 03:29:57 +0100 |
commit | 253ca90f3f968a03ea6fff8f0011cf411764b22e (patch) | |
tree | 3a975b6071e8e4821673924289b1e5e1ab492247 /sys-apps/file | |
parent | sys-libs/timezone-data: add 2022c (diff) | |
download | gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.gz gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.bz2 gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.zip |
sys-apps/file: backport unicode handling fixes to 5.42
Temporarily unkeyworded given I had a few issues before I threw
in a few extra patches. Want to give it a test run for a day
or so myself first before keywording.
Bug: https://bugs.gentoo.org/861089
Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'sys-apps/file')
-rw-r--r-- | sys-apps/file/file-5.42-r1.ebuild | 162 | ||||
-rw-r--r-- | sys-apps/file/files/file-5.42-unicode-fixes.patch | 414 |
2 files changed, 576 insertions, 0 deletions
diff --git a/sys-apps/file/file-5.42-r1.ebuild b/sys-apps/file/file-5.42-r1.ebuild new file mode 100644 index 000000000000..e74d71b49e84 --- /dev/null +++ b/sys-apps/file/file-5.42-r1.ebuild @@ -0,0 +1,162 @@ +# Copyright 1999-2022 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=8 + +DISTUTILS_USE_PEP517=setuptools +DISTUTILS_OPTIONAL=1 +PYTHON_COMPAT=( python3_{8..11} ) + +inherit distutils-r1 libtool toolchain-funcs multilib-minimal + +if [[ ${PV} == 9999 ]] ; then + EGIT_REPO_URI="https://github.com/glensc/file.git" + inherit autotools git-r3 +else + VERIFY_SIG_OPENPGP_KEY_PATH="${BROOT}"/usr/share/openpgp-keys/file.asc + inherit verify-sig + SRC_URI="ftp://ftp.astron.com/pub/file/${P}.tar.gz" + SRC_URI+=" verify-sig? ( ftp://ftp.astron.com/pub/file/${P}.tar.gz.asc )" + + #KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris" + + BDEPEND="verify-sig? ( sec-keys/openpgp-keys-file )" +fi + +DESCRIPTION="Identify a file's format by scanning binary data for patterns" +HOMEPAGE="https://www.darwinsys.com/file/" + +LICENSE="BSD-2" +SLOT="0" +IUSE="bzip2 lzma python seccomp static-libs zlib" +REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )" + +DEPEND=" + bzip2? ( app-arch/bzip2[${MULTILIB_USEDEP}] ) + lzma? ( app-arch/xz-utils[${MULTILIB_USEDEP}] ) + python? ( + ${PYTHON_DEPS} + dev-python/setuptools[${PYTHON_USEDEP}] + ) + zlib? ( >=sys-libs/zlib-1.2.8-r1[${MULTILIB_USEDEP}] )" +RDEPEND="${DEPEND} + python? ( !dev-python/python-magic ) + seccomp? ( sys-libs/libseccomp[${MULTILIB_USEDEP}] )" +BDEPEND+=" + python? ( + ${PYTHON_DEPS} + ${DISTUTILS_DEPS} + )" + +PATCHES=( + "${FILESDIR}/file-5.39-portage-sandbox.patch" #713710 #728978 + "${FILESDIR}/file-5.40-seccomp-fstatat64-musl.patch" #789336, not upstream yet + "${FILESDIR}/${P}-unicode-fixes.patch" #861089 +) + +src_prepare() { + default + + if [[ ${PV} == 9999 ]] ; then + eautoreconf + else + elibtoolize + fi + + # don't let python README kill main README, bug ##60043 + mv python/README.md python/README.python.md || die + # bug #662090 + sed 's@README.md@README.python.md@' -i python/setup.py || die +} + +multilib_src_configure() { + local myeconfargs=( + --enable-fsect-man5 + $(use_enable bzip2 bzlib) + $(use_enable lzma xzlib) + $(use_enable seccomp libseccomp) + $(use_enable static-libs static) + $(use_enable zlib) + ) + econf "${myeconfargs[@]}" +} + +build_src_configure() { + local myeconfargs=( + --disable-shared + --disable-libseccomp + --disable-bzlib + --disable-xzlib + --disable-zlib + ) + + econf_build "${myeconfargs[@]}" +} + +need_build_file() { + # when cross-compiling, we need to build up our own file + # because people often don't keep matching host/target + # file versions, bug #362941 + tc-is-cross-compiler && ! has_version -b "~${CATEGORY}/${P}" +} + +src_configure() { + local ECONF_SOURCE="${S}" + + if need_build_file ; then + mkdir -p "${WORKDIR}"/build || die + cd "${WORKDIR}"/build || die + build_src_configure + fi + + multilib-minimal_src_configure +} + +multilib_src_compile() { + if multilib_is_native_abi ; then + emake + else + # bug #586444 + emake -C src magic.h + emake -C src libmagic.la + fi +} + +src_compile() { + if need_build_file ; then + # bug #586444 + emake -C "${WORKDIR}"/build/src magic.h + emake -C "${WORKDIR}"/build/src file + local -x PATH="${WORKDIR}/build/src:${PATH}" + fi + + multilib-minimal_src_compile + + if use python ; then + cd python || die + distutils-r1_src_compile + fi +} + +multilib_src_install() { + if multilib_is_native_abi ; then + default + else + emake -C src install-{nodist_includeHEADERS,libLTLIBRARIES} DESTDIR="${D}" + fi +} + +multilib_src_install_all() { + dodoc ChangeLog MAINT # README + + # Required for `file -C` + insinto /usr/share/misc/magic + doins -r magic/Magdir/* + + if use python ; then + cd python || die + distutils-r1_src_install + fi + + find "${ED}" -type f -name "*.la" -delete || die +} diff --git a/sys-apps/file/files/file-5.42-unicode-fixes.patch b/sys-apps/file/files/file-5.42-unicode-fixes.patch new file mode 100644 index 000000000000..91c46a358120 --- /dev/null +++ b/sys-apps/file/files/file-5.42-unicode-fixes.patch @@ -0,0 +1,414 @@ +https://bugs.gentoo.org/861089 +https://github.com/file/file/commit/19bf47777d0002ee884467e45e6ace702e40a4c1 +https://github.com/file/file/commit/c80065fe6900be5e794941e29b32440e9969b1c3 +https://github.com/file/file/commit/7e59d34206d7c962e093d4239e5367a2cd8b7623 +https://github.com/file/file/commit/f042050f59bfc037677871c4d1037c33273f5213 +https://github.com/file/file/commit/d471022b2772071877895759f209f2c346757a4c +https://github.com/file/file/commit/441ac2b15508909e82ad467960df4ac0adf9644c + +From 19bf47777d0002ee884467e45e6ace702e40a4c1 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Mon, 4 Jul 2022 17:00:51 +0000 +Subject: [PATCH] PR/358: Fix width for -f - (jpalus) + +--- + src/file.c | 46 +++++++++++++++++++++++++++++----------------- + 2 files changed, 31 insertions(+), 18 deletions(-) + +diff --git a/src/file.c b/src/file.c +index 5300e5af8..bb058ce1e 100644 +--- a/src/file.c ++++ b/src/file.c +@@ -506,35 +506,47 @@ unwrap(struct magic_set *ms, const char *fn) + size_t llen = 0; + int wid = 0, cwid; + int e = 0; ++ size_t fi = 0, fimax = 100; ++ char **flist = malloc(sizeof(*flist) * fimax); + +- if (strcmp("-", fn) == 0) { ++ if (flist == NULL) ++out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list"); ++ ++ if (strcmp("-", fn) == 0) + f = stdin; +- wid = 1; +- } else { ++ else { + if ((f = fopen(fn, "r")) == NULL) { + file_warn("Cannot open `%s'", fn); + return 1; + } +- +- while ((len = getline(&line, &llen, f)) > 0) { +- if (line[len - 1] == '\n') +- line[len - 1] = '\0'; +- cwid = file_mbswidth(ms, line); +- if (cwid > wid) +- wid = cwid; +- } +- +- rewind(f); + } + + while ((len = getline(&line, &llen, f)) > 0) { + if (line[len - 1] == '\n') + line[len - 1] = '\0'; +- e |= process(ms, line, wid); ++ if (fi >= fimax) { ++ fimax += 100; ++ char **nf = realloc(flist, fimax * sizeof(*flist)); ++ if (nf == NULL) ++ goto out; ++ } ++ flist[fi++] = line; ++ cwid = file_mbswidth(ms, line); ++ if (cwid > wid) ++ wid = cwid; ++ line = NULL; ++ llen = 0; ++ } ++ ++ fimax = fi; ++ for (fi = 0; fi < fimax; fi++) { ++ e |= process(ms, flist[fi], wid); ++ free(flist[fi]); + } ++ free(flist); + +- free(line); +- (void)fclose(f); ++ if (f != stdin) ++ (void)fclose(f); + return e; + } + + +From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Mon, 4 Jul 2022 19:44:35 +0000 +Subject: [PATCH] PR/362: ro-ee: fix wide char printing + +--- a/src/file.c ++++ b/src/file.c +@@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $") + #ifdef HAVE_WCTYPE_H + #include <wctype.h> + #endif ++#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \ ++ defined(HAVE_WCTYPE_H) ++#define FILE_WIDE_SUPPORT ++#else ++#include <ctype.h> ++#endif + + #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION) + # include <getopt.h> +@@ -550,6 +556,55 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list"); + return e; + } + ++private void ++file_octal(unsigned char c) ++{ ++ putc('\\', stdout); ++ putc(((c >> 6) & 7) + '0', stdout); ++ putc(((c >> 3) & 7) + '0', stdout); ++ putc(((c >> 0) & 7) + '0', stdout); ++} ++ ++private void ++fname_print(const char *inname) ++{ ++ size_t n = strlen(inname); ++#ifdef FILE_WIDE_SUPPORT ++ mbstate_t state; ++ wchar_t nextchar; ++ size_t bytesconsumed; ++ ++ ++ (void)mbrlen(NULL, 0, &state); ++ while (n > 0) { ++ bytesconsumed = mbrtowc(&nextchar, inname, n, &state); ++ if (bytesconsumed == CAST(size_t, -1) || ++ bytesconsumed == CAST(size_t, -2)) { ++ nextchar = *inname; ++ bytesconsumed = 1; ++ } ++ inname += bytesconsumed; ++ n -= bytesconsumed; ++ if (iswprint(nextchar)) { ++ putwc(nextchar, stdout); ++ continue; ++ } ++ /* XXX: What if it is > 255? */ ++ file_octal(CAST(unsigned char, nextchar)); ++ } ++#else ++ size_t i; ++ for (i = 0; i < n; i++) { ++ unsigned char c = CAST(unsigned char, inname[i]); ++ if (isprint(c)) { ++ putc(c); ++ continue; ++ } ++ file_octal(c); ++ } ++#endif ++} ++ + /* + * Called for each input file on the command line (or in a list of files) + */ +@@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int wid) + const char *type, c = nulsep > 1 ? '\0' : '\n'; + int std_in = strcmp(inname, "-") == 0; + int haderror = 0; +- size_t plen = 4 * wid + 1; +- char *pbuf, *pname; +- +- if ((pbuf = CAST(char *, malloc(plen))) == NULL) +- file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen); + + if (wid > 0 && !bflag) { +- pname = file_printable(ms, pbuf, plen, inname, wid); +- (void)printf("%s", std_in ? "/dev/stdin" : pname); ++ const char *pname = std_in ? "/dev/stdin" : inname; ++ if ((ms->flags & MAGIC_RAW) == 0) ++ fname_print(pname); ++ else ++ (void)printf("%s", pname); + if (nulsep) + (void)putc('\0', stdout); + if (nulsep < 2) { +@@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid) + } + if (nobuffer) + haderror |= fflush(stdout) != 0; +- free(pbuf); + return haderror || type == NULL; + } + +@@ -594,35 +646,33 @@ protected size_t + file_mbswidth(struct magic_set *ms, const char *s) + { + size_t width = 0; +-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \ +- defined(HAVE_WCTYPE_H) +- size_t bytesconsumed, old_n, n; ++#ifdef FILE_WIDE_SUPPORT ++ size_t bytesconsumed, n; + mbstate_t state; + wchar_t nextchar; +- (void)memset(&state, 0, sizeof(mbstate_t)); +- old_n = n = strlen(s); ++ ++ (void)mbrlen(NULL, 0, &state); ++ n = strlen(s); + + while (n > 0) { + bytesconsumed = mbrtowc(&nextchar, s, n, &state); + if (bytesconsumed == CAST(size_t, -1) || + bytesconsumed == CAST(size_t, -2)) { +- /* Something went wrong, return something reasonable */ +- return old_n; ++ nextchar = *s; ++ bytesconsumed = 1; + } + width += ((ms->flags & MAGIC_RAW) != 0 + || iswprint(nextchar)) ? wcwidth(nextchar) : 4; + + s += bytesconsumed, n -= bytesconsumed; + } +- return width; + #else + while (*s) { + width += (ms->flags & MAGIC_RAW) != 0 + || isprint(CAST(unsigned char, *s)) ? 1 : 4; + } +- +- return strlen(s); + #endif ++ return width; + } + + private void +--- a/src/file.h ++++ b/src/file.h +@@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set *, + const struct magic *); + protected size_t file_pstring_get_length(struct magic_set *, + const struct magic *, const char *); +-public char * file_printable(struct magic_set *, char *, size_t, ++protected char * file_printable(struct magic_set *, char *, size_t, + const char *, size_t); + #ifdef __EMX__ + protected int file_os2_apptype(struct magic_set *, const char *, const void *, +--- a/src/funcs.c ++++ b/src/funcs.c +@@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb) + /* + * convert string to ascii printable format. + */ +-public char * ++protected char * + file_printable(struct magic_set *ms, char *buf, size_t bufsiz, + const char *str, size_t slen) + { + +From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Mon, 4 Jul 2022 20:16:29 +0000 +Subject: [PATCH] Handle invalid characters as octal (idea from PR/363 by + dimich) + +--- + src/file.c | 16 +++++++++++----- + 1 file changed, 11 insertions(+), 5 deletions(-) + +diff --git a/src/file.c b/src/file.c +index 5e89137d7..af9be0f0c 100644 +--- a/src/file.c ++++ b/src/file.c +@@ -580,8 +580,11 @@ fname_print(const char *inname) + bytesconsumed = mbrtowc(&nextchar, inname, n, &state); + if (bytesconsumed == CAST(size_t, -1) || + bytesconsumed == CAST(size_t, -2)) { +- nextchar = *inname; +- bytesconsumed = 1; ++ nextchar = *inname++; ++ n--; ++ (void)mbrlen(NULL, 0, &state); ++ file_octal(CAST(unsigned char, nextchar)); ++ continue; + } + inname += bytesconsumed; + n -= bytesconsumed; +@@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s) + bytesconsumed == CAST(size_t, -2)) { + nextchar = *s; + bytesconsumed = 1; ++ (void)mbrlen(NULL, 0, &state); ++ width += 4; ++ } else { ++ width += ((ms->flags & MAGIC_RAW) != 0 ++ || iswprint(nextchar)) ? wcwidth(nextchar) : 4; + } +- width += ((ms->flags & MAGIC_RAW) != 0 +- || iswprint(nextchar)) ? wcwidth(nextchar) : 4; + + s += bytesconsumed, n -= bytesconsumed; + } + + +From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Mon, 4 Jul 2022 22:30:51 +0000 +Subject: [PATCH] mbrlen(NULL, is not portable; revert to using memset to + initialize the state. + +--- + src/file.c | 10 +++++----- + 1 file changed, 5 insertions(+), 5 deletions(-) + +diff --git a/src/file.c b/src/file.c +index af9be0f0c..8b4f14c2e 100644 +--- a/src/file.c ++++ b/src/file.c +@@ -575,14 +575,14 @@ fname_print(const char *inname) + size_t bytesconsumed; + + +- (void)mbrlen(NULL, 0, &state); ++ (void)memset(&state, 0, sizeof(state)); + while (n > 0) { + bytesconsumed = mbrtowc(&nextchar, inname, n, &state); + if (bytesconsumed == CAST(size_t, -1) || + bytesconsumed == CAST(size_t, -2)) { + nextchar = *inname++; + n--; +- (void)mbrlen(NULL, 0, &state); ++ (void)memset(&state, 0, sizeof(state)); + file_octal(CAST(unsigned char, nextchar)); + continue; + } +@@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s) + mbstate_t state; + wchar_t nextchar; + +- (void)mbrlen(NULL, 0, &state); ++ (void)memset(&state, 0, sizeof(state)); + n = strlen(s); + + while (n > 0) { +@@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s) + bytesconsumed == CAST(size_t, -2)) { + nextchar = *s; + bytesconsumed = 1; +- (void)mbrlen(NULL, 0, &state); ++ (void)memset(&state, 0, sizeof(state)); + width += 4; + } else { + width += ((ms->flags & MAGIC_RAW) != 0 + + +From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Tue, 5 Jul 2022 19:53:42 +0000 +Subject: [PATCH] Use printf("%lc") instead of putwc(). Somehow mixing wide and + narrow stdio does not work on Linux? + +--- a/src/file.c ++++ b/src/file.c +@@ -589,7 +589,7 @@ fname_print(const char *inname) + inname += bytesconsumed; + n -= bytesconsumed; + if (iswprint(nextchar)) { +- putwc(nextchar, stdout); ++ printf("%lc", nextchar); + continue; + } + /* XXX: What if it is > 255? */ + +From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Tue, 5 Jul 2022 20:05:23 +0000 +Subject: [PATCH] wcwidth is not supposed to return -1 if the character is + printable, but it does for 0xff... Prevent it from decreasing the width. + +--- a/src/file.c ++++ b/src/file.c +@@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s) + (void)memset(&state, 0, sizeof(state)); + width += 4; + } else { ++ int w = wcwidth(nextchar); + width += ((ms->flags & MAGIC_RAW) != 0 +- || iswprint(nextchar)) ? wcwidth(nextchar) : 4; ++ || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4; + } + + s += bytesconsumed, n -= bytesconsumed; + +From be1ac8c0aa6d21921012f62582f51a9e546e4972 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas <christos@zoulas.com> +Date: Tue, 26 Jul 2022 15:10:05 +0000 +Subject: [PATCH] Fix bug with large flist (Florian Weimer) + +--- + src/file.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/src/file.c b/src/file.c +index e169c08fc..c0b8aa197 100644 +--- a/src/file.c ++++ b/src/file.c +@@ -535,6 +535,7 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list"); + char **nf = realloc(flist, fimax * sizeof(*flist)); + if (nf == NULL) + goto out; ++ flist = nf; + } + flist[fi++] = line; + cwid = file_mbswidth(ms, line); + |