aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorŁukasz Langa <lukasz@langa.pl>2023-11-06 15:46:20 +0100
committerGitHub <noreply@github.com>2023-11-06 15:46:20 +0100
commit6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed (patch)
tree871156d13be2a398aa9347796def842c5bc51f6d
parent[3.10] gh-109991: Update Windows build to use OpenSSL 1.1.1w (GH-110090) (diff)
downloadcpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.tar.gz
cpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.tar.bz2
cpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.zip
[3.10] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds (gh-111695) (gh-111779)
(cherry picked from commit c8faa3568afd255708096f6aa8df0afa80cf7697) Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
-rw-r--r--Lib/test/test_codecencodings_iso2022.py46
-rw-r--r--Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst1
-rw-r--r--Modules/cjkcodecs/_codecs_iso2022.c9
3 files changed, 53 insertions, 3 deletions
diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py
index 00ea1c39dd6..027dbecc613 100644
--- a/Lib/test/test_codecencodings_iso2022.py
+++ b/Lib/test/test_codecencodings_iso2022.py
@@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase):
(b'ab\x1BNdef', 'replace', 'abdef'),
)
+class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'iso2022_jp_3'
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
+ codectests = COMMON_CODEC_TESTS + (
+ (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+ (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ),
+ (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
+ (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+ (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
+ (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
+ ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'),
+ ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'),
+ ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'),
+ ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
+ ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+ (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'),
+ ('ab\u4FF1def', 'replace', b'ab?def'),
+ )
+ xmlcharnametest = (
+ '\xAB\u211C\xBB = \u2329\u1234\u232A',
+ b'\x1B$(O\x29\x28\x1B(B&real;\x1B$(O\x29\x32\x1B(B = &lang;&#4660;&rang;'
+ )
+
+class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase):
+ encoding = 'iso2022_jp_2004'
+ tstring = multibytecodec_support.load_teststring('iso2022_jp')
+ codectests = COMMON_CODEC_TESTS + (
+ (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'),
+ (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ),
+ (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ),
+ (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'),
+ (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ),
+ (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ),
+ ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'),
+ ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'),
+ ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'),
+ ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'),
+ ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'),
+ (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'),
+ ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'),
+ )
+ xmlcharnametest = (
+ '\xAB\u211C\xBB = \u2329\u1234\u232A',
+ b'\x1B$(Q\x29\x28\x1B(B&real;\x1B$(Q\x29\x32\x1B(B = &lang;&#4660;&rang;'
+ )
+
class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase):
encoding = 'iso2022_kr'
tstring = multibytecodec_support.load_teststring('iso2022_kr')
diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
new file mode 100644
index 00000000000..268a3d310f2
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst
@@ -0,0 +1 @@
+Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds
diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c
index 7394cf67e0e..6d906ecdd39 100644
--- a/Modules/cjkcodecs/_codecs_iso2022.c
+++ b/Modules/cjkcodecs/_codecs_iso2022.c
@@ -181,8 +181,9 @@ ENCODER(iso2022)
encoded = MAP_UNMAPPABLE;
for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) {
+ Py_UCS4 buf[2] = {c, 0};
Py_ssize_t length = 1;
- encoded = dsg->encoder(&c, &length);
+ encoded = dsg->encoder(buf, &length);
if (encoded == MAP_MULTIPLE_AVAIL) {
/* this implementation won't work for pair
* of non-bmp characters. */
@@ -191,9 +192,11 @@ ENCODER(iso2022)
return MBERR_TOOFEW;
length = -1;
}
- else
+ else {
+ buf[1] = INCHAR2;
length = 2;
- encoded = dsg->encoder(&c, &length);
+ }
+ encoded = dsg->encoder(buf, &length);
if (encoded != MAP_UNMAPPABLE) {
insize = length;
break;