diff --git a/icu4c/source/data/mappings/gb18030.ucm b/icu4c/source/data/mappings/gb18030.ucm index 62b57a93fc24..d317540fb821 100644 --- a/icu4c/source/data/mappings/gb18030.ucm +++ b/icu4c/source/data/mappings/gb18030.ucm @@ -3,7 +3,7 @@ # Copyright (C) 2000-2012, International Business Machines Corporation and others. # All Rights Reserved. -# ICU codepage data for GB 18030 +# ICU codepage data for GB 18030-2022 "gb18030" "AXXXX" @@ -51,7 +51,7 @@ # GB 18030 BMP mappings that are not handled algorithmically are # generated using gbmake4 and gbtoucm tools. Please see charset/source/gb18030/gb18030.html -# or http://source.icu-project.org/repos/icu/data/trunk/charset/source/gb18030/gb18030.html +# or https://htmlpreview.github.io/?https://github.com/unicode-org/icu-data/blob/main/charset/source/gb18030/gb18030.html # for more information. CHARMAP @@ -28077,6 +28077,22 @@ CHARMAP \xFD\x99 |0 \xFD\x9A |0 \xFD\x9B |0 + \xFE\x59 |0 + \x82\x35\x90\x37 |3 + \xFE\x61 |0 + \x82\x35\x90\x38 |3 + \xFE\x66 |0 + \x82\x35\x90\x39 |3 + \xFE\x67 |0 + \x82\x35\x91\x30 |3 + \xFE\x6D |0 + \x82\x35\x91\x31 |3 + \xFE\x7E |0 + \x82\x35\x91\x32 |3 + \xFE\x90 |0 + \x82\x35\x91\x33 |3 + \xFE\xA0 |0 + \x82\x35\x91\x34 |3 \xAA\xA1 |0 \xAA\xA2 |0 \xAA\xA3 |0 @@ -30010,16 +30026,16 @@ CHARMAP \xA6\xBE |0 \xA6\xBF |0 \xA6\xC0 |0 - \xA6\xD9 |0 - \xA6\xDA |0 - \xA6\xDB |0 - \xA6\xDC |0 - \xA6\xDD |0 - \xA6\xDE |0 - \xA6\xDF |0 - \xA6\xEC |0 - \xA6\xED |0 - \xA6\xF3 |0 + \xA6\xD9 |1 + \xA6\xDA |1 + \xA6\xDB |1 + \xA6\xDC |1 + \xA6\xDD |1 + \xA6\xDE |1 + \xA6\xDF |1 + \xA6\xEC |1 + \xA6\xED |1 + \xA6\xF3 |1 \xA6\xF6 |0 \xA6\xF7 |0 \xA6\xF8 |0 @@ -30155,7 +30171,7 @@ CHARMAP \x83\x36\xC9\x37 |0 \x83\x36\xC9\x38 |0 \x83\x36\xC9\x39 |0 - \xFE\x59 |0 + \xFE\x59 |1 \x83\x36\xCA\x30 |0 \x83\x36\xCA\x31 |0 \x83\x36\xCA\x32 |0 @@ -30163,19 +30179,19 @@ CHARMAP \x83\x36\xCA\x34 |0 \x83\x36\xCA\x35 |0 \x83\x36\xCA\x36 |0 - \xFE\x61 |0 + \xFE\x61 |1 \x83\x36\xCA\x37 |0 \x83\x36\xCA\x38 |0 \x83\x36\xCA\x39 |0 \x83\x36\xCB\x30 |0 - \xFE\x66 |0 - \xFE\x67 |0 + \xFE\x66 |1 + \xFE\x67 |1 \x83\x36\xCB\x31 |0 \x83\x36\xCB\x32 |0 \x83\x36\xCB\x33 |0 \x83\x36\xCB\x34 |0 \xFE\x6C |0 - \xFE\x6D |0 + \xFE\x6D |1 \x83\x36\xCB\x35 |0 \x83\x36\xCB\x36 |0 \x83\x36\xCB\x37 |0 @@ -30192,7 +30208,7 @@ CHARMAP \x83\x36\xCC\x37 |0 \x83\x36\xCC\x38 |0 \x83\x36\xCC\x39 |0 - \xFE\x7E |0 + \xFE\x7E |1 \x83\x36\xCD\x30 |0 \x83\x36\xCD\x31 |0 \x83\x36\xCD\x32 |0 @@ -30209,7 +30225,7 @@ CHARMAP \x83\x36\xCE\x33 |0 \x83\x36\xCE\x34 |0 \x83\x36\xCE\x35 |0 - \xFE\x90 |0 + \xFE\x90 |1 \xFE\x91 |0 \x83\x36\xCE\x36 |0 \x83\x36\xCE\x37 |0 @@ -30225,7 +30241,7 @@ CHARMAP \x83\x36\xCF\x37 |0 \x83\x36\xCF\x38 |0 \x83\x36\xCF\x39 |0 - \xFE\xA0 |0 + \xFE\xA0 |1 \xFD\x9C |0 \x84\x30\x85\x35 |0 \x84\x30\x85\x36 |0 @@ -30480,6 +30496,26 @@ CHARMAP \xFE\x4D |0 \xFE\x4E |0 \xFE\x4F |0 + \xA6\xD9 |0 + \x84\x31\x82\x36 |3 + \xA6\xDB |0 + \x84\x31\x82\x37 |3 + \xA6\xDA |0 + \x84\x31\x82\x38 |3 + \xA6\xDC |0 + \x84\x31\x82\x39 |3 + \xA6\xDD |0 + \x84\x31\x83\x30 |3 + \xA6\xDE |0 + \x84\x31\x83\x31 |3 + \xA6\xDF |0 + \x84\x31\x83\x32 |3 + \xA6\xEC |0 + \x84\x31\x83\x33 |3 + \xA6\xED |0 + \x84\x31\x83\x34 |3 + \xA6\xF3 |0 + \x84\x31\x83\x35 |3 \xA9\x55 |0 \xA6\xF2 |0 \x84\x31\x85\x38 |0 diff --git a/icu4c/source/test/testdata/conversion.txt b/icu4c/source/test/testdata/conversion.txt index bc8402fe4a26..290925ef31fa 100644 --- a/icu4c/source/test/testdata/conversion.txt +++ b/icu4c/source/test/testdata/conversion.txt @@ -108,10 +108,10 @@ conversion:table(nofallback) { :intvector{ 0,1,4,4,4,4,5,5,5,5,6,7,7,7,7,8,9,11 }, :int{1}, :int{0}, "", "&C", :bin{""} } - { + { // gb18030-2022 changes mapping for 0xFE90 "gb18030", :bin{ 618130fc318130fc8181303c3e813cfc817afe90a8bc }, - "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z\ue854\u1e3f", + "a\u05ed\\x810\u9f07\\x810<>\\x81<\u9f07z\u9fba\u1e3f", :intvector{ 0,1,5,5,5,5,6,7,9,9,9,9,10,11,12,13,13,13,13,14,15,17,18,20 }, :int{1}, :int{0}, "", "&C", :bin{""} } @@ -826,6 +826,27 @@ conversion:table(nofallback) { :intvector{ 0, 4, 8, 12 }, :int{1}, :int{0}, "", "?", :bin{""} } + { // gb18030->U 2005 vs 2022 part 1 (gb18030 2-byte) + "gb18030", + :bin{ A6D9 A6DA A6DB A6DC A6DF A6EC A6ED A6F3 FE59 FE61 FE66 FE67 FE6D FE7E FE90 FEA0 }, + "\uFE10\uFE12\uFE11\uFE13\uFE16\uFE17\uFE18\uFE19\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB", // -2005: "\uE78D\uE78E\uE78F\uE790\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864" + :intvector{ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30 }, + :int{1}, :int{0}, "", "?", :bin{""} + } + { // gb18030->U 2005 vs 2022 part 2 (gb18030 4-byte) + "gb18030", + :bin{ 82359037 82359038 82359039 82359130 82359131 82359132 82359133 82359134 84318236 84318239 84318332 84318335 }, + "\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB\uFE10\uFE13\uFE16\uFE19", // unchanged from 2005 mapping + :intvector{ 0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44 }, + :int{1}, :int{0}, "", "?", :bin{""} + } + { // gb18030->U 2005 vs 2022 part 3 (gb18030 4-byte), non-changing mappings next to or in linear ranges partially overridden by new explicit maps + "gb18030", + :bin{ 82358F33823590368235913584318235843183368431843684318537 }, + "\u9FA6\u9FB3\u9FBC\uFE0F\uFE1A\uFE24\uFE2F", + :intvector{ 0, 4, 8, 12, 16, 20, 24 }, + :int{1}, :int{0}, "", "?", :bin{""} + } { "x11-compound-text", :bin{ 1b242944b5ac1b2d41a5e31b2d43d5f51b2d4dd01b2d41411b2d43bc1b2d42ff1b2d54df1b2d44c0b31b2d46b41b2d47b01b2d48e01b2d4ca1 }, @@ -1817,13 +1838,34 @@ conversion:table(nofallback) { :intvector{}, :int{1}, :int{0}, "", "0", "" } - { + { // gb18030-2022 changes mappings for 0xA6DC,0xA6DB "gb18030", - "\U00020087\ue790\ue78f\u1e3f", + "\U00020087\ufe13\ufe11\u1e3f", :bin{ 95329031a6dca6dba8bc }, :intvector{ 0,0,0,0,2,2,3,3,4,4 }, :int{1}, :int{0}, "", "0", "" } + { // U->gb18030 2005 vs 2022 part 1 (gb18030 2-byte) + "gb18030", + "\uFE10\uFE12\uFE11\uFE13\uFE16\uFE17\uFE18\uFE19\u9FB4\u9FB5\u9FB6\u9FB7\u9FB8\u9FB9\u9FBA\u9FBB", // -2005: "\uE78D\uE78E\uE78F\uE790\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864" + :bin{ A6D9A6DAA6DBA6DCA6DFA6ECA6EDA6F3FE59FE61FE66FE67FE6DFE7EFE90FEA0 }, + :intvector{ 0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15 }, + :int{1}, :int{0}, "", "0", "" + } + { // U->gb18030 2005 vs 2022 part 2 (gb18030 fallback mappings from Unicode PUA) + "gb18030", + "\uE78D\uE793\uE794\uE795\uE796\uE81E\uE826\uE82B\uE82C\uE832\uE843\uE854\uE864", + :bin{ A6D9A6DFA6ECA6EDA6F3FE59FE61FE66FE67FE6DFE7EFE90FEA0 }, + :intvector{ 0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12 }, + :int{1}, :int{0}, "", "0", "" + } + { // U->gb18030 2005 vs 2022 part 3 (gb18030 4-byte), non-changing mappings next to or in linear ranges partially overridden by new explicit maps + "gb18030", + "\u9FA6\u9FB3\u9FBC\uFE0F\uFE1A\uFE24\uFE2F", + :bin{ 82358F33823590368235913584318235843183368431843684318537 }, + :intvector{ 0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,6,6,6,6 }, + :int{1}, :int{0}, "", "0", "" + } { "UTF-7", "\u00a3I\u00a3\u00a4", diff --git a/icu4j/main/shared/data/icudata.jar b/icu4j/main/shared/data/icudata.jar index d2989f6501e3..a2e0daf50910 100644 --- a/icu4j/main/shared/data/icudata.jar +++ b/icu4j/main/shared/data/icudata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:149ffc15b2c8445d90afefbbdaffdd446a6d78181e1693d3da178a54811bce8f -size 14330624 +oid sha256:49f886698feb647c2dcfb10d6f872601280bf8783483456a706dd45549308875 +size 14330800 diff --git a/icu4j/main/shared/data/icutzdata.jar b/icu4j/main/shared/data/icutzdata.jar index 89153f23c85f..04f3b213463f 100644 --- a/icu4j/main/shared/data/icutzdata.jar +++ b/icu4j/main/shared/data/icutzdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5d47b16bcbb1d55cde6c82234159dd11daeae9fbc7a639ffa837b7abf43afc17 +oid sha256:2903ecb7a300f5d52e7636addfe2756cbdb00262835a787379d7aa6456c0349a size 94829 diff --git a/icu4j/main/shared/data/testdata.jar b/icu4j/main/shared/data/testdata.jar index 148a56dc59a5..f85b1e6fd6ce 100644 --- a/icu4j/main/shared/data/testdata.jar +++ b/icu4j/main/shared/data/testdata.jar @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6478e18e5392984bcb40946bf4ca076e0f9fd04fc18fabf47b8b01f4295b740d -size 831605 +oid sha256:1cda304595c1fa919460b28de344a541938d5ed91af9f3089ccff9ce2effd440 +size 831983 diff --git a/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/icudt73b/gb18030.cnv b/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/icudt73b/gb18030.cnv index d27d6fefb9b8..ecd0888e40ed 100644 Binary files a/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/icudt73b/gb18030.cnv and b/icu4j/maven-build/maven-icu4j-datafiles/src/main/resources/com/ibm/icu/impl/data/icudt73b/gb18030.cnv differ diff --git a/icu4j/maven-build/maven-icu4j-test-datafiles/src/main/resources/com/ibm/icu/dev/data/testdata/conversion.res b/icu4j/maven-build/maven-icu4j-test-datafiles/src/main/resources/com/ibm/icu/dev/data/testdata/conversion.res index fa88f8faa09a..a61b4b8d676c 100644 Binary files a/icu4j/maven-build/maven-icu4j-test-datafiles/src/main/resources/com/ibm/icu/dev/data/testdata/conversion.res and b/icu4j/maven-build/maven-icu4j-test-datafiles/src/main/resources/com/ibm/icu/dev/data/testdata/conversion.res differ