summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSam James <sam@gentoo.org>2022-08-16 03:28:55 +0100
committerSam James <sam@gentoo.org>2022-08-16 03:29:57 +0100
commit253ca90f3f968a03ea6fff8f0011cf411764b22e (patch)
tree3a975b6071e8e4821673924289b1e5e1ab492247 /sys-apps/file/files
parentsys-libs/timezone-data: add 2022c (diff)
downloadgentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.gz
gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.tar.bz2
gentoo-253ca90f3f968a03ea6fff8f0011cf411764b22e.zip
sys-apps/file: backport unicode handling fixes to 5.42
Temporarily unkeyworded given I had a few issues before I threw in a few extra patches. Want to give it a test run for a day or so myself first before keywording. Bug: https://bugs.gentoo.org/861089 Signed-off-by: Sam James <sam@gentoo.org>
Diffstat (limited to 'sys-apps/file/files')
-rw-r--r--sys-apps/file/files/file-5.42-unicode-fixes.patch414
1 files changed, 414 insertions, 0 deletions
diff --git a/sys-apps/file/files/file-5.42-unicode-fixes.patch b/sys-apps/file/files/file-5.42-unicode-fixes.patch
new file mode 100644
index 000000000000..91c46a358120
--- /dev/null
+++ b/sys-apps/file/files/file-5.42-unicode-fixes.patch
@@ -0,0 +1,414 @@
+https://bugs.gentoo.org/861089
+https://github.com/file/file/commit/19bf47777d0002ee884467e45e6ace702e40a4c1
+https://github.com/file/file/commit/c80065fe6900be5e794941e29b32440e9969b1c3
+https://github.com/file/file/commit/7e59d34206d7c962e093d4239e5367a2cd8b7623
+https://github.com/file/file/commit/f042050f59bfc037677871c4d1037c33273f5213
+https://github.com/file/file/commit/d471022b2772071877895759f209f2c346757a4c
+https://github.com/file/file/commit/441ac2b15508909e82ad467960df4ac0adf9644c
+
+From 19bf47777d0002ee884467e45e6ace702e40a4c1 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 17:00:51 +0000
+Subject: [PATCH] PR/358: Fix width for -f - (jpalus)
+
+---
+ src/file.c | 46 +++++++++++++++++++++++++++++-----------------
+ 2 files changed, 31 insertions(+), 18 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5300e5af8..bb058ce1e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -506,35 +506,47 @@ unwrap(struct magic_set *ms, const char *fn)
+ size_t llen = 0;
+ int wid = 0, cwid;
+ int e = 0;
++ size_t fi = 0, fimax = 100;
++ char **flist = malloc(sizeof(*flist) * fimax);
+
+- if (strcmp("-", fn) == 0) {
++ if (flist == NULL)
++out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
++
++ if (strcmp("-", fn) == 0)
+ f = stdin;
+- wid = 1;
+- } else {
++ else {
+ if ((f = fopen(fn, "r")) == NULL) {
+ file_warn("Cannot open `%s'", fn);
+ return 1;
+ }
+-
+- while ((len = getline(&line, &llen, f)) > 0) {
+- if (line[len - 1] == '\n')
+- line[len - 1] = '\0';
+- cwid = file_mbswidth(ms, line);
+- if (cwid > wid)
+- wid = cwid;
+- }
+-
+- rewind(f);
+ }
+
+ while ((len = getline(&line, &llen, f)) > 0) {
+ if (line[len - 1] == '\n')
+ line[len - 1] = '\0';
+- e |= process(ms, line, wid);
++ if (fi >= fimax) {
++ fimax += 100;
++ char **nf = realloc(flist, fimax * sizeof(*flist));
++ if (nf == NULL)
++ goto out;
++ }
++ flist[fi++] = line;
++ cwid = file_mbswidth(ms, line);
++ if (cwid > wid)
++ wid = cwid;
++ line = NULL;
++ llen = 0;
++ }
++
++ fimax = fi;
++ for (fi = 0; fi < fimax; fi++) {
++ e |= process(ms, flist[fi], wid);
++ free(flist[fi]);
+ }
++ free(flist);
+
+- free(line);
+- (void)fclose(f);
++ if (f != stdin)
++ (void)fclose(f);
+ return e;
+ }
+
+
+From c80065fe6900be5e794941e29b32440e9969b1c3 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 19:44:35 +0000
+Subject: [PATCH] PR/362: ro-ee: fix wide char printing
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -60,6 +60,12 @@ FILE_RCSID("@(#)$File: file.c,v 1.196 2022/07/04 17:00:51 christos Exp $")
+ #ifdef HAVE_WCTYPE_H
+ #include <wctype.h>
+ #endif
++#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
++ defined(HAVE_WCTYPE_H)
++#define FILE_WIDE_SUPPORT
++#else
++#include <ctype.h>
++#endif
+
+ #if defined(HAVE_GETOPT_H) && defined(HAVE_STRUCT_OPTION)
+ # include <getopt.h>
+@@ -550,6 +556,55 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
+ return e;
+ }
+
++private void
++file_octal(unsigned char c)
++{
++ putc('\\', stdout);
++ putc(((c >> 6) & 7) + '0', stdout);
++ putc(((c >> 3) & 7) + '0', stdout);
++ putc(((c >> 0) & 7) + '0', stdout);
++}
++
++private void
++fname_print(const char *inname)
++{
++ size_t n = strlen(inname);
++#ifdef FILE_WIDE_SUPPORT
++ mbstate_t state;
++ wchar_t nextchar;
++ size_t bytesconsumed;
++
++
++ (void)mbrlen(NULL, 0, &state);
++ while (n > 0) {
++ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
++ if (bytesconsumed == CAST(size_t, -1) ||
++ bytesconsumed == CAST(size_t, -2)) {
++ nextchar = *inname;
++ bytesconsumed = 1;
++ }
++ inname += bytesconsumed;
++ n -= bytesconsumed;
++ if (iswprint(nextchar)) {
++ putwc(nextchar, stdout);
++ continue;
++ }
++ /* XXX: What if it is > 255? */
++ file_octal(CAST(unsigned char, nextchar));
++ }
++#else
++ size_t i;
++ for (i = 0; i < n; i++) {
++ unsigned char c = CAST(unsigned char, inname[i]);
++ if (isprint(c)) {
++ putc(c);
++ continue;
++ }
++ file_octal(c);
++ }
++#endif
++}
++
+ /*
+ * Called for each input file on the command line (or in a list of files)
+ */
+@@ -559,15 +614,13 @@ process(struct magic_set *ms, const char *inname, int wid)
+ const char *type, c = nulsep > 1 ? '\0' : '\n';
+ int std_in = strcmp(inname, "-") == 0;
+ int haderror = 0;
+- size_t plen = 4 * wid + 1;
+- char *pbuf, *pname;
+-
+- if ((pbuf = CAST(char *, malloc(plen))) == NULL)
+- file_err(EXIT_FAILURE, "Can't allocate %zu bytes", plen);
+
+ if (wid > 0 && !bflag) {
+- pname = file_printable(ms, pbuf, plen, inname, wid);
+- (void)printf("%s", std_in ? "/dev/stdin" : pname);
++ const char *pname = std_in ? "/dev/stdin" : inname;
++ if ((ms->flags & MAGIC_RAW) == 0)
++ fname_print(pname);
++ else
++ (void)printf("%s", pname);
+ if (nulsep)
+ (void)putc('\0', stdout);
+ if (nulsep < 2) {
+@@ -586,7 +639,6 @@ process(struct magic_set *ms, const char *inname, int wid)
+ }
+ if (nobuffer)
+ haderror |= fflush(stdout) != 0;
+- free(pbuf);
+ return haderror || type == NULL;
+ }
+
+@@ -594,35 +646,33 @@ protected size_t
+ file_mbswidth(struct magic_set *ms, const char *s)
+ {
+ size_t width = 0;
+-#if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH) && \
+- defined(HAVE_WCTYPE_H)
+- size_t bytesconsumed, old_n, n;
++#ifdef FILE_WIDE_SUPPORT
++ size_t bytesconsumed, n;
+ mbstate_t state;
+ wchar_t nextchar;
+- (void)memset(&state, 0, sizeof(mbstate_t));
+- old_n = n = strlen(s);
++
++ (void)mbrlen(NULL, 0, &state);
++ n = strlen(s);
+
+ while (n > 0) {
+ bytesconsumed = mbrtowc(&nextchar, s, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+- /* Something went wrong, return something reasonable */
+- return old_n;
++ nextchar = *s;
++ bytesconsumed = 1;
+ }
+ width += ((ms->flags & MAGIC_RAW) != 0
+ || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+
+ s += bytesconsumed, n -= bytesconsumed;
+ }
+- return width;
+ #else
+ while (*s) {
+ width += (ms->flags & MAGIC_RAW) != 0
+ || isprint(CAST(unsigned char, *s)) ? 1 : 4;
+ }
+-
+- return strlen(s);
+ #endif
++ return width;
+ }
+
+ private void
+--- a/src/file.h
++++ b/src/file.h
+@@ -575,7 +575,7 @@ protected size_t file_pstring_length_size(struct magic_set *,
+ const struct magic *);
+ protected size_t file_pstring_get_length(struct magic_set *,
+ const struct magic *, const char *);
+-public char * file_printable(struct magic_set *, char *, size_t,
++protected char * file_printable(struct magic_set *, char *, size_t,
+ const char *, size_t);
+ #ifdef __EMX__
+ protected int file_os2_apptype(struct magic_set *, const char *, const void *,
+--- a/src/funcs.c
++++ b/src/funcs.c
+@@ -763,7 +763,7 @@ file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
+ /*
+ * convert string to ascii printable format.
+ */
+-public char *
++protected char *
+ file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
+ const char *str, size_t slen)
+ {
+
+From 7e59d34206d7c962e093d4239e5367a2cd8b7623 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 20:16:29 +0000
+Subject: [PATCH] Handle invalid characters as octal (idea from PR/363 by
+ dimich)
+
+---
+ src/file.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index 5e89137d7..af9be0f0c 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -580,8 +580,11 @@ fname_print(const char *inname)
+ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+- nextchar = *inname;
+- bytesconsumed = 1;
++ nextchar = *inname++;
++ n--;
++ (void)mbrlen(NULL, 0, &state);
++ file_octal(CAST(unsigned char, nextchar));
++ continue;
+ }
+ inname += bytesconsumed;
+ n -= bytesconsumed;
+@@ -660,9 +663,12 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *s;
+ bytesconsumed = 1;
++ (void)mbrlen(NULL, 0, &state);
++ width += 4;
++ } else {
++ width += ((ms->flags & MAGIC_RAW) != 0
++ || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+ }
+- width += ((ms->flags & MAGIC_RAW) != 0
+- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
+
+ s += bytesconsumed, n -= bytesconsumed;
+ }
+
+
+From f042050f59bfc037677871c4d1037c33273f5213 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Mon, 4 Jul 2022 22:30:51 +0000
+Subject: [PATCH] mbrlen(NULL, is not portable; revert to using memset to
+ initialize the state.
+
+---
+ src/file.c | 10 +++++-----
+ 1 file changed, 5 insertions(+), 5 deletions(-)
+
+diff --git a/src/file.c b/src/file.c
+index af9be0f0c..8b4f14c2e 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -575,14 +575,14 @@ fname_print(const char *inname)
+ size_t bytesconsumed;
+
+
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ while (n > 0) {
+ bytesconsumed = mbrtowc(&nextchar, inname, n, &state);
+ if (bytesconsumed == CAST(size_t, -1) ||
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *inname++;
+ n--;
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ file_octal(CAST(unsigned char, nextchar));
+ continue;
+ }
+@@ -654,7 +654,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ mbstate_t state;
+ wchar_t nextchar;
+
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ n = strlen(s);
+
+ while (n > 0) {
+@@ -663,7 +663,7 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ bytesconsumed == CAST(size_t, -2)) {
+ nextchar = *s;
+ bytesconsumed = 1;
+- (void)mbrlen(NULL, 0, &state);
++ (void)memset(&state, 0, sizeof(state));
+ width += 4;
+ } else {
+ width += ((ms->flags & MAGIC_RAW) != 0
+
+
+From d471022b2772071877895759f209f2c346757a4c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 5 Jul 2022 19:53:42 +0000
+Subject: [PATCH] Use printf("%lc") instead of putwc(). Somehow mixing wide and
+ narrow stdio does not work on Linux?
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -589,7 +589,7 @@ fname_print(const char *inname)
+ inname += bytesconsumed;
+ n -= bytesconsumed;
+ if (iswprint(nextchar)) {
+- putwc(nextchar, stdout);
++ printf("%lc", nextchar);
+ continue;
+ }
+ /* XXX: What if it is > 255? */
+
+From 441ac2b15508909e82ad467960df4ac0adf9644c Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 5 Jul 2022 20:05:23 +0000
+Subject: [PATCH] wcwidth is not supposed to return -1 if the character is
+ printable, but it does for 0xff... Prevent it from decreasing the width.
+
+--- a/src/file.c
++++ b/src/file.c
+@@ -666,8 +666,9 @@ file_mbswidth(struct magic_set *ms, const char *s)
+ (void)memset(&state, 0, sizeof(state));
+ width += 4;
+ } else {
++ int w = wcwidth(nextchar);
+ width += ((ms->flags & MAGIC_RAW) != 0
+- || iswprint(nextchar)) ? wcwidth(nextchar) : 4;
++ || iswprint(nextchar)) ? (w > 0 ? w : 1) : 4;
+ }
+
+ s += bytesconsumed, n -= bytesconsumed;
+
+From be1ac8c0aa6d21921012f62582f51a9e546e4972 Mon Sep 17 00:00:00 2001
+From: Christos Zoulas <christos@zoulas.com>
+Date: Tue, 26 Jul 2022 15:10:05 +0000
+Subject: [PATCH] Fix bug with large flist (Florian Weimer)
+
+---
+ src/file.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/src/file.c b/src/file.c
+index e169c08fc..c0b8aa197 100644
+--- a/src/file.c
++++ b/src/file.c
+@@ -535,6 +535,7 @@ out: file_err(EXIT_FAILURE, "Cannot allocate memory for file list");
+ char **nf = realloc(flist, fimax * sizeof(*flist));
+ if (nf == NULL)
+ goto out;
++ flist = nf;
+ }
+ flist[fi++] = line;
+ cwid = file_mbswidth(ms, line);
+