From 68e24256a0449b2a27e44879f95171780f21526b Mon Sep 17 00:00:00 2001 From: Ulrich Müller Date: Tue, 29 Jun 2021 10:09:10 +0200 Subject: sys-apps/file: Fix character count heuristic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This restores behaviour of file-5.39, i.e. correctly identifies small text files as text/plain rather than application/octet-stream again. Patch taken from upstream and (trivially) backported to 5.40. Closes: https://bugs.gentoo.org/799188 Bug: https://bugs.astron.com/view.php?id=261 Package-Manager: Portage-3.0.20, Repoman-3.0.3 Signed-off-by: Ulrich Müller Signed-off-by: Lars Wendler --- sys-apps/file/file-5.40-r3.ebuild | 147 +++++++++++++++++++++ .../file/files/file-5.40-revert-char-count.patch | 49 +++++++ 2 files changed, 196 insertions(+) create mode 100644 sys-apps/file/file-5.40-r3.ebuild create mode 100644 sys-apps/file/files/file-5.40-revert-char-count.patch (limited to 'sys-apps/file') diff --git a/sys-apps/file/file-5.40-r3.ebuild b/sys-apps/file/file-5.40-r3.ebuild new file mode 100644 index 000000000000..7fd5ac3b71b9 --- /dev/null +++ b/sys-apps/file/file-5.40-r3.ebuild @@ -0,0 +1,147 @@ +# Copyright 1999-2021 Gentoo Authors +# Distributed under the terms of the GNU General Public License v2 + +EAPI=7 + +PYTHON_COMPAT=( python3_{8..10} ) +DISTUTILS_OPTIONAL=1 + +inherit distutils-r1 libtool toolchain-funcs multilib-minimal + +if [[ ${PV} == "9999" ]] ; then + EGIT_REPO_URI="https://github.com/glensc/file.git" + inherit autotools git-r3 +else + SRC_URI="ftp://ftp.astron.com/pub/file/${P}.tar.gz" + KEYWORDS="~alpha ~amd64 ~arm ~arm64 ~hppa ~ia64 ~m68k ~mips ~ppc ~ppc64 ~riscv ~s390 ~sparc ~x86 ~x64-cygwin ~amd64-linux ~x86-linux ~ppc-macos ~x64-macos ~sparc-solaris ~sparc64-solaris ~x64-solaris ~x86-solaris" +fi + +DESCRIPTION="identify a file's format by scanning binary data for patterns" +HOMEPAGE="https://www.darwinsys.com/file/" + +LICENSE="BSD-2" +SLOT="0" +IUSE="bzip2 lzma python seccomp static-libs zlib" +REQUIRED_USE="python? ( ${PYTHON_REQUIRED_USE} )" + +DEPEND=" + bzip2? ( app-arch/bzip2[${MULTILIB_USEDEP}] ) + lzma? ( app-arch/xz-utils[${MULTILIB_USEDEP}] ) + python? ( + ${PYTHON_DEPS} + dev-python/setuptools[${PYTHON_USEDEP}] + ) + zlib? ( >=sys-libs/zlib-1.2.8-r1[${MULTILIB_USEDEP}] )" +RDEPEND="${DEPEND} + python? ( !dev-python/python-magic ) + seccomp? ( sys-libs/libseccomp[${MULTILIB_USEDEP}] )" + +PATCHES=( + "${FILESDIR}/file-5.39-portage-sandbox.patch" #713710 #728978 + "${FILESDIR}/file-5.40-xz_magic.patch" #784773 + "${FILESDIR}/file-5.40-seccomp-faccessat.patch" + "${FILESDIR}/file-5.40-seccomp-fstatat64.patch" #784857 + "${FILESDIR}/file-5.40-revert-char-count.patch" #799188 +) + +src_prepare() { + default + + if [[ ${PV} == 9999 ]] ; then + eautoreconf + fi + + elibtoolize + + # don't let python README kill main README #60043 + mv python/README.md python/README.python.md || die + sed 's@README.md@README.python.md@' -i python/setup.py || die #662090 +} + +multilib_src_configure() { + local myeconfargs=( + --enable-fsect-man5 + $(use_enable bzip2 bzlib) + $(use_enable lzma xzlib) + $(use_enable seccomp libseccomp) + $(use_enable static-libs static) + $(use_enable zlib) + ) + econf "${myeconfargs[@]}" +} + +build_src_configure() { + local myeconfargs=( + --disable-shared + --disable-libseccomp + --disable-bzlib + --disable-xzlib + --disable-zlib + ) + tc-env_build econf "${myeconfargs[@]}" +} + +need_build_file() { + # when cross-compiling, we need to build up our own file + # because people often don't keep matching host/target + # file versions #362941 + tc-is-cross-compiler && ! has_version -b "~${CATEGORY}/${P}" +} + +src_configure() { + local ECONF_SOURCE="${S}" + + if need_build_file ; then + mkdir -p "${WORKDIR}"/build || die + cd "${WORKDIR}"/build || die + build_src_configure + fi + + multilib-minimal_src_configure +} + +multilib_src_compile() { + if multilib_is_native_abi ; then + emake + else + cd src || die + emake magic.h #586444 + emake libmagic.la + fi +} + +src_compile() { + if need_build_file ; then + emake -C "${WORKDIR}"/build/src magic.h #586444 + emake -C "${WORKDIR}"/build/src file + local -x PATH="${WORKDIR}/build/src:${PATH}" + fi + multilib-minimal_src_compile + + if use python ; then + cd python || die + distutils-r1_src_compile + fi +} + +multilib_src_install() { + if multilib_is_native_abi ; then + default + else + emake -C src install-{nodist_includeHEADERS,libLTLIBRARIES} DESTDIR="${D}" + fi +} + +multilib_src_install_all() { + dodoc ChangeLog MAINT README + + # Required for `file -C` + insinto /usr/share/misc/magic + doins -r magic/Magdir/* + + if use python ; then + cd python || die + distutils-r1_src_install + fi + find "${ED}" -type f -name "*.la" -delete || die +} diff --git a/sys-apps/file/files/file-5.40-revert-char-count.patch b/sys-apps/file/files/file-5.40-revert-char-count.patch new file mode 100644 index 000000000000..9d6f5be60fac --- /dev/null +++ b/sys-apps/file/files/file-5.40-revert-char-count.patch @@ -0,0 +1,49 @@ +From c07e242e766242a44ff720c149b1bdd4924ec247 Mon Sep 17 00:00:00 2001 +From: Christos Zoulas +Date: Tue, 27 Apr 2021 19:37:14 +0000 +Subject: [PATCH] Revert the fix for PR/180. It lead to PR/261. Using character + count heuristics ends up with confusing behavior, the following should not be + producing different results: echo -n xx | ./file - echo -n xy | + ./file - + +--- +[patch backported to 5.40 release -- ulm] + + src/encoding.c | 15 ++------------- + 1 file changed, 2 insertions(+), 13 deletions(-) + +diff --git a/src/encoding.c b/src/encoding.c +index 31d4d125..3647a481 100644 +--- a/src/encoding.c ++++ b/src/encoding.c +@@ -265,9 +265,7 @@ private int \ + looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \ + size_t *ulen) \ + { \ +- size_t i, u; \ +- unsigned char dist[256]; \ +- memset(dist, 0, sizeof(dist)); \ ++ size_t i; \ + \ + *ulen = 0; \ + \ +@@ -278,16 +276,7 @@ looks_ ## NAME(const unsigned char *buf, size_t nbytes, file_unichar_t *ubuf, \ + return 0; \ + \ + ubuf[(*ulen)++] = buf[i]; \ +- dist[buf[i]]++; \ + } \ +- u = 0; \ +- for (i = 0; i < __arraycount(dist); i++) { \ +- if (dist[i]) \ +- u++; \ +- } \ +- if (u < 3) \ +- return 0; \ +-\ + return 1; \ + } + +-- +2.32.0 + -- cgit v1.2.3-65-gdbad