diff options
author | Łukasz Langa <lukasz@langa.pl> | 2023-11-06 15:46:20 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-11-06 15:46:20 +0100 |
commit | 6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed (patch) | |
tree | 871156d13be2a398aa9347796def842c5bc51f6d | |
parent | [3.10] gh-109991: Update Windows build to use OpenSSL 1.1.1w (GH-110090) (diff) | |
download | cpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.tar.gz cpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.tar.bz2 cpython-6c2f34fa77f884bd79801a9ab8a117cab7d9c7ed.zip |
[3.10] gh-101180: Fix a bug where iso2022_jp_3 and iso2022_jp_2004 codecs read out of bounds (gh-111695) (gh-111779)
(cherry picked from commit c8faa3568afd255708096f6aa8df0afa80cf7697)
Co-authored-by: Masayuki Moriyama <masayuki.moriyama@miraclelinux.com>
-rw-r--r-- | Lib/test/test_codecencodings_iso2022.py | 46 | ||||
-rw-r--r-- | Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst | 1 | ||||
-rw-r--r-- | Modules/cjkcodecs/_codecs_iso2022.c | 9 |
3 files changed, 53 insertions, 3 deletions
diff --git a/Lib/test/test_codecencodings_iso2022.py b/Lib/test/test_codecencodings_iso2022.py index 00ea1c39dd6..027dbecc613 100644 --- a/Lib/test/test_codecencodings_iso2022.py +++ b/Lib/test/test_codecencodings_iso2022.py @@ -24,6 +24,52 @@ class Test_ISO2022_JP2(multibytecodec_support.TestBase, unittest.TestCase): (b'ab\x1BNdef', 'replace', 'abdef'), ) +class Test_ISO2022_JP3(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_3' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(O\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(O\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(O\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(O\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(O\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(O\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(O\x2E\x21\x1B(Bdef', 'replace', 'ab\uFFFDdef'), + ('ab\u4FF1def', 'replace', b'ab?def'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(O\x29\x28\x1B(Bℜ\x1B$(O\x29\x32\x1B(B = ⟨ሴ⟩' + ) + +class Test_ISO2022_JP2004(multibytecodec_support.TestBase, unittest.TestCase): + encoding = 'iso2022_jp_2004' + tstring = multibytecodec_support.load_teststring('iso2022_jp') + codectests = COMMON_CODEC_TESTS + ( + (b'ab\x1BNdef', 'replace', 'ab\x1BNdef'), + (b'\x1B$(Q\x2E\x23\x1B(B', 'strict', '\u3402' ), + (b'\x1B$(Q\x2E\x22\x1B(B', 'strict', '\U0002000B' ), + (b'\x1B$(Q\x24\x77\x1B(B', 'strict', '\u304B\u309A'), + (b'\x1B$(P\x21\x22\x1B(B', 'strict', '\u4E02' ), + (b'\x1B$(P\x7E\x76\x1B(B', 'strict', '\U0002A6B2' ), + ('\u3402', 'strict', b'\x1B$(Q\x2E\x23\x1B(B'), + ('\U0002000B', 'strict', b'\x1B$(Q\x2E\x22\x1B(B'), + ('\u304B\u309A', 'strict', b'\x1B$(Q\x24\x77\x1B(B'), + ('\u4E02', 'strict', b'\x1B$(P\x21\x22\x1B(B'), + ('\U0002A6B2', 'strict', b'\x1B$(P\x7E\x76\x1B(B'), + (b'ab\x1B$(Q\x2E\x21\x1B(Bdef', 'replace', 'ab\u4FF1def'), + ('ab\u4FF1def', 'replace', b'ab\x1B$(Q\x2E\x21\x1B(Bdef'), + ) + xmlcharnametest = ( + '\xAB\u211C\xBB = \u2329\u1234\u232A', + b'\x1B$(Q\x29\x28\x1B(Bℜ\x1B$(Q\x29\x32\x1B(B = ⟨ሴ⟩' + ) + class Test_ISO2022_KR(multibytecodec_support.TestBase, unittest.TestCase): encoding = 'iso2022_kr' tstring = multibytecodec_support.load_teststring('iso2022_kr') diff --git a/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst new file mode 100644 index 00000000000..268a3d310f2 --- /dev/null +++ b/Misc/NEWS.d/next/Core and Builtins/2023-10-27-19-38-33.gh-issue-102388.vd5YUZ.rst @@ -0,0 +1 @@ +Fix a bug where ``iso2022_jp_3`` and ``iso2022_jp_2004`` codecs read out of bounds diff --git a/Modules/cjkcodecs/_codecs_iso2022.c b/Modules/cjkcodecs/_codecs_iso2022.c index 7394cf67e0e..6d906ecdd39 100644 --- a/Modules/cjkcodecs/_codecs_iso2022.c +++ b/Modules/cjkcodecs/_codecs_iso2022.c @@ -181,8 +181,9 @@ ENCODER(iso2022) encoded = MAP_UNMAPPABLE; for (dsg = CONFIG_DESIGNATIONS; dsg->mark; dsg++) { + Py_UCS4 buf[2] = {c, 0}; Py_ssize_t length = 1; - encoded = dsg->encoder(&c, &length); + encoded = dsg->encoder(buf, &length); if (encoded == MAP_MULTIPLE_AVAIL) { /* this implementation won't work for pair * of non-bmp characters. */ @@ -191,9 +192,11 @@ ENCODER(iso2022) return MBERR_TOOFEW; length = -1; } - else + else { + buf[1] = INCHAR2; length = 2; - encoded = dsg->encoder(&c, &length); + } + encoded = dsg->encoder(buf, &length); if (encoded != MAP_UNMAPPABLE) { insize = length; break; |