Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 5bc429a

Browse files
committed
Extend collection of Unicode combining characters to beyond the BMP
The former limit was perhaps a carryover from an older hand-coded table. Since commit bab9821 we have enough space in mbinterval to store larger codepoints, so collect all combining characters. Discussion: https://www.postgresql.org/message-id/49ad1fa0-174e-c901-b14c-c484b60907f1%40enterprisedb.com
1 parent bab9821 commit 5bc429a

File tree

2 files changed

+102
-2
lines changed

2 files changed

+102
-2
lines changed

src/common/unicode/generate-unicode_combining_table.pl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@
2525
my @fields = split ';', $line;
2626
$codepoint = hex $fields[0];
2727

28-
next if $codepoint > 0xFFFF;
29-
3028
if ($fields[2] eq 'Me' || $fields[2] eq 'Mn')
3129
{
3230
# combining character, save for start of range

src/include/common/unicode_combining_table.h

Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,4 +193,106 @@ static const struct mbinterval combining[] = {
193193
{0xFB1E, 0xFB1E},
194194
{0xFE00, 0xFE0F},
195195
{0xFE20, 0xFE2F},
196+
{0x101FD, 0x101FD},
197+
{0x102E0, 0x102E0},
198+
{0x10376, 0x1037A},
199+
{0x10A01, 0x10A0F},
200+
{0x10A38, 0x10A3F},
201+
{0x10AE5, 0x10AE6},
202+
{0x10D24, 0x10D27},
203+
{0x10EAB, 0x10EAC},
204+
{0x10F46, 0x10F50},
205+
{0x11001, 0x11001},
206+
{0x11038, 0x11046},
207+
{0x1107F, 0x11081},
208+
{0x110B3, 0x110B6},
209+
{0x110B9, 0x110BA},
210+
{0x11100, 0x11102},
211+
{0x11127, 0x1112B},
212+
{0x1112D, 0x11134},
213+
{0x11173, 0x11173},
214+
{0x11180, 0x11181},
215+
{0x111B6, 0x111BE},
216+
{0x111C9, 0x111CC},
217+
{0x111CF, 0x111CF},
218+
{0x1122F, 0x11231},
219+
{0x11234, 0x11234},
220+
{0x11236, 0x11237},
221+
{0x1123E, 0x1123E},
222+
{0x112DF, 0x112DF},
223+
{0x112E3, 0x112EA},
224+
{0x11300, 0x11301},
225+
{0x1133B, 0x1133C},
226+
{0x11340, 0x11340},
227+
{0x11366, 0x11374},
228+
{0x11438, 0x1143F},
229+
{0x11442, 0x11444},
230+
{0x11446, 0x11446},
231+
{0x1145E, 0x1145E},
232+
{0x114B3, 0x114B8},
233+
{0x114BA, 0x114BA},
234+
{0x114BF, 0x114C0},
235+
{0x114C2, 0x114C3},
236+
{0x115B2, 0x115B5},
237+
{0x115BC, 0x115BD},
238+
{0x115BF, 0x115C0},
239+
{0x115DC, 0x115DD},
240+
{0x11633, 0x1163A},
241+
{0x1163D, 0x1163D},
242+
{0x1163F, 0x11640},
243+
{0x116AB, 0x116AB},
244+
{0x116AD, 0x116AD},
245+
{0x116B0, 0x116B5},
246+
{0x116B7, 0x116B7},
247+
{0x1171D, 0x1171F},
248+
{0x11722, 0x11725},
249+
{0x11727, 0x1172B},
250+
{0x1182F, 0x11837},
251+
{0x11839, 0x1183A},
252+
{0x1193B, 0x1193C},
253+
{0x1193E, 0x1193E},
254+
{0x11943, 0x11943},
255+
{0x119D4, 0x119DB},
256+
{0x119E0, 0x119E0},
257+
{0x11A01, 0x11A0A},
258+
{0x11A33, 0x11A38},
259+
{0x11A3B, 0x11A3E},
260+
{0x11A47, 0x11A47},
261+
{0x11A51, 0x11A56},
262+
{0x11A59, 0x11A5B},
263+
{0x11A8A, 0x11A96},
264+
{0x11A98, 0x11A99},
265+
{0x11C30, 0x11C3D},
266+
{0x11C3F, 0x11C3F},
267+
{0x11C92, 0x11CA7},
268+
{0x11CAA, 0x11CB0},
269+
{0x11CB2, 0x11CB3},
270+
{0x11CB5, 0x11CB6},
271+
{0x11D31, 0x11D45},
272+
{0x11D47, 0x11D47},
273+
{0x11D90, 0x11D91},
274+
{0x11D95, 0x11D95},
275+
{0x11D97, 0x11D97},
276+
{0x11EF3, 0x11EF4},
277+
{0x16AF0, 0x16AF4},
278+
{0x16B30, 0x16B36},
279+
{0x16F4F, 0x16F4F},
280+
{0x16F8F, 0x16F92},
281+
{0x16FE4, 0x16FE4},
282+
{0x1BC9D, 0x1BC9E},
283+
{0x1D167, 0x1D169},
284+
{0x1D17B, 0x1D182},
285+
{0x1D185, 0x1D18B},
286+
{0x1D1AA, 0x1D1AD},
287+
{0x1D242, 0x1D244},
288+
{0x1DA00, 0x1DA36},
289+
{0x1DA3B, 0x1DA6C},
290+
{0x1DA75, 0x1DA75},
291+
{0x1DA84, 0x1DA84},
292+
{0x1DA9B, 0x1E02A},
293+
{0x1E130, 0x1E136},
294+
{0x1E2EC, 0x1E2EF},
295+
{0x1E8D0, 0x1E8D6},
296+
{0x1E944, 0x1E94A},
297+
{0xE0100, 0xE01EF},
196298
};

0 commit comments

Comments
 (0)