summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam James <sam@gentoo.org>2022-08-16 03:28:55 +0100
committerSam James <sam@gentoo.org>2022-08-16 03:29:57 +0100
commit253ca90f3f968a03ea6fff8f0011cf411764b22e (patch)
tree3a975b6071e8e4821673924289b1e5e1ab492247 /sys-apps/file
parentsys-libs/timezone-data: add 2022c (diff)
downloadgentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.gz
gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.bz2
gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.zip
sys-apps/file: backport unicode handling fixes to 5.42
Temporarily unkeyworded given I had a few issues before I threw in a few extra patches. Want to give it a test run for a day or so myself first before keywording. Bug: https://bugs.gentoo.org/861089 Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'sys-apps/file')
-rw-r--r--sys-apps/file/file-5.42-r1.ebuild162
-rw-r--r--sys-apps/file/files/file-5.42-unicode-fixes.patch414
2 files changed, 576 insertions, 0 deletions
diff --git a/sys-apps/file/file-5.42-r1.ebuild b/sys-apps/file/file-5.42-r1.ebuild
new file mode 100644
index 000000000000..e74d71b49e84
--- /dev/null
+++ b/sys-apps/file/file-5.42-r1.ebuild
@@ -0,0 +1,162 @@
+# Copyright 1999-2022 Gentoo Authors
+# Distributed under the terms of the GNU General Public License v2
+
+EAPI=8
+
+DISTUTILS_USE_PEP517=setuptools
+DISTUTILS_OPTIONAL=1
+PYTHON_COMPAT=( python3_{8..11} )
+
+inherit distutils-r1 libtool toolchain-funcs multilib-minimal
+
+if [[ ${PV} == 9999 ]] ; then
+ EGIT_REPO_URI="https://github.com/glensc/file.git"
+ inherit autotools git-r3
+else
+ VERIFY_SIG_OPENPGP_KEY_PATH="${BROOT}"/usr/share/openpgp-keys/file.asc
+ inherit verify-sig
+ SRC_URI="ftp://ftp.astron.com/pub/file/${P}.tar.gz"
+ SRC_URI+=" verify-sig? ( ftp://ftp.astron.com/pub/file/${P}.tar.gz.asc )"
+
+ #KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~loong ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris"
+
+ BDEPEND="verify-sig? ( sec-keys/openpgp-keys-file )"
+fi
+
+DESCRIPTION="Identify a file's format by scanning binary data for patterns"
+HOMEPAGE="https://www.darwinsys.com/file/"
+
+LICENSE="BSD-2"
+SLOT="0"
+IUSE="bzip2 lzma python seccomp static-libs zlib"
+REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )"
+
+DEPEND="
+ bzip2? ( app-arch/bzip2[${MULTILIB_USEDEP}] )
+ lzma? ( app-arch/xz-utils[${MULTILIB_USEDEP}] )
+ python? (
+ ${PYTHON_DEPS}
+ dev-python/setuptools[${PYTHON_USEDEP}]
+ )
+ zlib? ( >=sys-libs/zlib-1.2.8-r1[${MULTILIB_USEDEP}] )"
+RDEPEND="${DEPEND}
+ python? ( !dev-python/python-magic )
+ seccomp? ( sys-libs/libseccomp[${MULTILIB_USEDEP}] )"
+BDEPEND+="
+ python? (
+ ${PYTHON_DEPS}
+ ${DISTUTILS_DEPS}
+ )"
+
+PATCHES=(
+ "${FILESDIR}/file-5.39-portage-sandbox.patch" #713710 #728978
+ "${FILESDIR}/file-5.40-seccomp-fstatat64-musl.patch" #789336, not upstream yet
+ "${FILESDIR}/${P}-unicode-fixes.patch" #861089
+)
+
+src_prepare() {
+ default
+
+ if [[ ${PV} == 9999 ]] ; then
+ eautoreconf
+ else
+ elibtoolize
+ fi
+
+ # don't let python README kill main README, bug ##60043
+ mv python/README.md python/README.python.md || die
+ # bug #662090
+ sed 's@README.md@README.python.md@' -i python/setup.py || die
+}
+
+multilib_src_configure() {
+ local myeconfargs=(
+ --enable-fsect-man5
+ $(use_enable bzip2 bzlib)
+ $(use_enable lzma xzlib)
+ $(use_enable seccomp libseccomp)
+ $(use_enable static-libs static)
+ $(use_enable zlib)
+ )
+ econf "${myeconfargs[@]}"
+}
+
+build_src_configure() {
+ local myeconfargs=(
+ --disable-shared
+ --disable-libseccomp
+ --disable-bzlib
+ --disable-xzlib
+ --disable-zlib
+ )
+
+ econf_build "${myeconfargs[@]}"
+}
+
+need_build_file() {
+ # when cross-compiling, we need to build up our own file
+ # because people often don't keep matching host/target
+ # file versions, bug #362941
+ tc-is-cross-compiler && ! has_version -b "~${CATEGORY}/${P}"
+}
+
+src_configure() {
+ local ECONF_SOURCE="${S}"
+
+ if need_build_file ; then
+ mkdir -p "${WORKDIR}"/build || die
+ cd "${WORKDIR}"/build || die
+ build_src_configure
+ fi
+
+ multilib-minimal_src_configure
+}
+
+multilib_src_compile() {
+ if multilib_is_native_abi ; then
+ emake
+ else
+ # bug #586444
+ emake -C src magic.h
+ emake -C src libmagic.la
+ fi
+}
+
+src_compile() {
+ if need_build_file ; then
+ # bug #586444
+ emake -C "${WORKDIR}"/build/src magic.h
+ emake -C "${WORKDIR}"/build/src file
+ local -x PATH="${WORKDIR}/build/src:${PATH}"
+ fi
+
+ multilib-minimal_src_compile
+
+ if use python ; then
+ cd python || die
+ distutils-r1_src_compile
+ fi
+}
+
+multilib_src_install() {
+ if multilib_is_native_abi ; then
+ default
+ else
+ emake -C src install-{nodist_includeHEADERS,libLTLIBRARIES} DESTDIR="${D}"
+ fi
+}
+
+multilib_src_install_all() {
+ dodoc ChangeLog MAINT # README
+
+ # Required for `file -C`
+ insinto /usr/share/misc/magic
+ doins -r magic/Magdir/*
+
+ if use python ; then
+ cd python || die
+ distutils-r1_src_install
+ fi
+
+ find "${ED}" -type f -name "*.la" -delete || die
+}
diff --git a/sys-apps/file/files/file-5.42-unicode-fixes.patch b/sys-apps/file/files/file-5.42-unicode-fixes.patch
new file mode 100644
index 000000000000..91c46a358120
--- /dev/null
+++ b/sys-apps/file/files/file-5.42-unicode-fixes.patch
@@ -0,0 +1,414 @@
+https://bugs.gentoo.org/861089
+https://github.com/file/file/commit/19bf47777d0002ee884467e45e6ace702e40a4c1
+https://github.com/file/file/commit/c80065fe6900be5e794941e29b32440e9969b1c3
+https://github.com/file/file/commit/7e59d34206d7c962e093d4239e5367a2cd8b7623
+https://github.com/file/file/commit/f042050f59bfc037677871c4d1037c33273f5213
+https://github.com/file/file/commit/d471022b2772071877895759f209f2c346757a4c
+https://github.com/file/file/commit/441ac2b15508909e82ad467960df4ac0adf9644c
+
+From 19bf47777d0002ee884467e45e6ace702e40a4c1 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 17:00:51 +0000
+Subject: [PATCH] PR/358: Fix width for -f - (jpalus)
+
+---
+ src/file.c | 46 +++++++++++++++++++++++++++++-----------------
+ 2 files changed, 31 insertions(+), 18 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5300e5af8..bb058ce1e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -506,35 +506,47 @@ unwrap(struct magic_set *ms, const char *fn)
+ size_t llen = 0;
+ int wid = 0, cwid;
+ int e = 0;
++ size_t fi = 0, fimax = 100;
++ char **flist = malloc(sizeof(*flist) * fimax);
+
+- if (strcmp("-", fn) == 0) {
++ if (flist == NULL)
++out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
++
++ if (strcmp("-", fn) == 0)
+ f = stdin;
+- wid = 1;
+- } else {
++ else {
+ if ((f = fopen(fn, "r")) == NULL) {
+ file_warn("Cannot open `%s'", fn);
+ return 1;
+ }
+-
+- while ((len = getline(&line, &llen, f)) > 0) {
+- if (line[len - 1] == '\n')
+- line[len - 1] = '\0';
+- cwid = file_mbswidth(ms, line);
+- if (cwid > wid)
+- wid = cwid;
+- }
+-
+- rewind(f);
+ }
+
+ while ((len = getline(&line, &llen, f)) > 0) {
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+- e |= process(ms, line, wid);
++ if (fi >= fimax) {
++ fimax += 100;
++ char **nf = realloc(flist, fimax * sizeof(*flist));
++ if (nf == NULL)
++ goto out;
++ }
++ flist[fi++] = line;
++ cwid = file_mbswidth(ms, line);
++ if (cwid > wid)
++ wid = cwid;
++ line = NULL;
++ llen = 0;
++ }
++
++ fimax = fi;
++ for (fi = 0; fi < fimax; fi++) {
++ e |= process(ms, flist[fi], wid);
++ free(flist[fi]);
+ }
++ free(flist);
+
+- free(line);
+- (void)fclose(f);
++ if (f != stdin)
++ (void)fclose(f);
+ return e;
+ }
+
+
+From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 19:44:35 +0000
+Subject: [PATCH] PR/362: ro-ee: fix wide char printing
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $")
+ #ifdef HAVE_WCTYPE_H
+ #include <wctype.h>
+ #endif
++#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
++ defined(HAVE_WCTYPE_H)
++#define FILE_WIDE_SUPPORT
++#else
++#include <ctype.h>
++#endif
+
+ #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION)
+ # include <getopt.h>
+@@ -550,6 +556,55 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
+ return e;
+ }
+
++private void
++file_octal(unsigned char c)
++{
++ putc('\\', stdout);
++ putc(((c >> 6) & 7) + '0', stdout);
++ putc(((c >> 3) & 7) + '0', stdout);
++ putc(((c >> 0) & 7) + '0', stdout);
++}
++
++private void
++fname_print(const char *inname)
++{
++ size_t n = strlen(inname);
++#ifdef FILE_WIDE_SUPPORT
++ mbstate_t state;
++ wchar_t nextchar;
++ size_t bytesconsumed;
++
++
++ (void)mbrlen(NULL, 0, &state);
++ while (n > 0) {
++ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
++ if (bytesconsumed == CAST(size_t, -1) ||
++ bytesconsumed == CAST(size_t, -2)) {
++ nextchar = *inname;
++ bytesconsumed = 1;
++ }
++ inname += bytesconsumed;
++ n -= bytesconsumed;
++ if (iswprint(nextchar)) {
++ putwc(nextchar, stdout);
++ continue;
++ }
++ /* XXX: What if it is > 255? */
++ file_octal(CAST(unsigned char, nextchar));
++ }
++#else
++ size_t i;
++ for (i = 0; i < n; i++) {
++ unsigned char c = CAST(unsigned char, inname[i]);
++ if (isprint(c)) {
++ putc(c);
++ continue;
++ }
++ file_octal(c);
++ }
++#endif
++}
++
+ /*
+ * Called for each input file on the command line (or in a list of files)
+ */
+@@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int wid)
+ const char *type, c = nulsep > 1 ? '\0' : '\n';
+ int std_in = strcmp(inname, "-") == 0;
+ int haderror = 0;
+- size_t plen = 4 * wid + 1;
+- char *pbuf, *pname;
+-
+- if ((pbuf = CAST(char *, malloc(plen))) == NULL)
+- file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen);
+
+ if (wid > 0 && !bflag) {
+- pname = file_printable(ms, pbuf, plen, inname, wid);
+- (void)printf("%s", std_in ? "/dev/stdin" : pname);
++ const char *pname = std_in ? "/dev/stdin" : inname;
++ if ((ms->flags & MAGIC_RAW) == 0)
++ fname_print(pname);
++ else
++ (void)printf("%s", pname);
+ if (nulsep)
+ (void)putc('\0', stdout);
+ if (nulsep < 2) {
+@@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid)
+ }
+ if (nobuffer)
+ haderror |= fflush(stdout) != 0;
+- free(pbuf);
+ return haderror || type == NULL;
+ }
+
+@@ -594,35 +646,33 @@ protected size_t
+ file_mbswidth(struct magic_set *ms, const char *s)
+ {
+ size_t width = 0;
+-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
+- defined(HAVE_WCTYPE_H)
+- size_t bytesconsumed, old_n, n;
++#ifdef FILE_WIDE_SUPPORT
++ size_t bytesconsumed, n;
+ mbstate_t state;
+ wchar_t nextchar;
+- (void)memset(&state, 0, sizeof(mbstate_t));
+- old_n = n = strlen(s);
++
++ (void)mbrlen(NULL, 0, &state);
++ n = strlen(s);
+
+ while (n > 0) {
+ bytesconsumed = mbrtowc(&nextchar, s, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+- /* Something went wrong, return something reasonable */
+- return old_n;
++ nextchar = *s;
++ bytesconsumed = 1;
+ }
+ width += ((ms->flags & MAGIC_RAW) != 0
+ || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+
+ s += bytesconsumed, n -= bytesconsumed;
+ }
+- return width;
+ #else
+ while (*s) {
+ width += (ms->flags & MAGIC_RAW) != 0
+ || isprint(CAST(unsigned char, *s)) ? 1 : 4;
+ }
+-
+- return strlen(s);
+ #endif
++ return width;
+ }
+
+ private void
+--- a/src/file.h
++++ b/src/file.h
+@@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set *,
+ const struct magic *);
+ protected size_t file_pstring_get_length(struct magic_set *,
+ const struct magic *, const char *);
+-public char * file_printable(struct magic_set *, char *, size_t,
++protected char * file_printable(struct magic_set *, char *, size_t,
+ const char *, size_t);
+ #ifdef __EMX__
+ protected int file_os2_apptype(struct magic_set *, const char *, const void *,
+--- a/src/funcs.c
++++ b/src/funcs.c
+@@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
+ /*
+ * convert string to ascii printable format.
+ */
+-public char *
++protected char *
+ file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
+ const char *str, size_t slen)
+ {
+
+From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 20:16:29 +0000
+Subject: [PATCH] Handle invalid characters as octal (idea from PR/363 by
+ dimich)
+
+---
+ src/file.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5e89137d7..af9be0f0c 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -580,8 +580,11 @@ fname_print(const char *inname)
+ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+- nextchar = *inname;
+- bytesconsumed = 1;
++ nextchar = *inname++;
++ n--;
++ (void)mbrlen(NULL, 0, &state);
++ file_octal(CAST(unsigned char, nextchar));
++ continue;
+ }
+ inname += bytesconsumed;
+ n -= bytesconsumed;
+@@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *s;
+ bytesconsumed = 1;
++ (void)mbrlen(NULL, 0, &state);
++ width += 4;
++ } else {
++ width += ((ms->flags & MAGIC_RAW) != 0
++ || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+ }
+- width += ((ms->flags & MAGIC_RAW) != 0
+- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+
+ s += bytesconsumed, n -= bytesconsumed;
+ }
+
+
+From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 22:30:51 +0000
+Subject: [PATCH] mbrlen(NULL, is not portable; revert to using memset to
+ initialize the state.
+
+---
+ src/file.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index af9be0f0c..8b4f14c2e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -575,14 +575,14 @@ fname_print(const char *inname)
+ size_t bytesconsumed;
+
+
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ while (n > 0) {
+ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *inname++;
+ n--;
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ file_octal(CAST(unsigned char, nextchar));
+ continue;
+ }
+@@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ mbstate_t state;
+ wchar_t nextchar;
+
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ n = strlen(s);
+
+ while (n > 0) {
+@@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *s;
+ bytesconsumed = 1;
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ width += 4;
+ } else {
+ width += ((ms->flags & MAGIC_RAW) != 0
+
+
+From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 5 Jul 2022 19:53:42 +0000
+Subject: [PATCH] Use printf("%lc") instead of putwc(). Somehow mixing wide and
+ narrow stdio does not work on Linux?
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -589,7 +589,7 @@ fname_print(const char *inname)
+ inname += bytesconsumed;
+ n -= bytesconsumed;
+ if (iswprint(nextchar)) {
+- putwc(nextchar, stdout);
++ printf("%lc", nextchar);
+ continue;
+ }
+ /* XXX: What if it is > 255? */
+
+From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 5 Jul 2022 20:05:23 +0000
+Subject: [PATCH] wcwidth is not supposed to return -1 if the character is
+ printable, but it does for 0xff... Prevent it from decreasing the width.
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ (void)memset(&state, 0, sizeof(state));
+ width += 4;
+ } else {
++ int w = wcwidth(nextchar);
+ width += ((ms->flags & MAGIC_RAW) != 0
+- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
++ || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4;
+ }
+
+ s += bytesconsumed, n -= bytesconsumed;
+
+From be1ac8c0aa6d21921012f62582f51a9e546e4972 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 26 Jul 2022 15:10:05 +0000
+Subject: [PATCH] Fix bug with large flist (Florian Weimer)
+
+---
+ src/file.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/file.c b/src/file.c
+index e169c08fc..c0b8aa197 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -535,6 +535,7 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
+ char **nf = realloc(flist, fimax * sizeof(*flist));
+ if (nf == NULL)
+ goto out;
++ flist = nf;
+ }
+ flist[fi++] = line;
+ cwid = file_mbswidth(ms, line);
+