@@ -49,14 +49,24 @@ gettyperecord(Py_UNICODE code)
4949 return & _PyUnicode_TypeRecords [index ];
5050}
5151
52- /* Returns 1 for Unicode characters having the category 'Zl' or type
53- 'B', 0 otherwise. */
52+ /* Returns 1 for Unicode characters having the category 'Zl', 'Zp' or
53+ type 'B', 0 otherwise. */
5454
55- int _PyUnicode_IsLinebreak (Py_UNICODE ch )
55+ int _PyUnicode_IsLinebreak (register const Py_UNICODE ch )
5656{
57- const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
58-
59- return (ctype -> flags & LINEBREAK_MASK ) != 0 ;
57+ switch (ch ) {
58+ case 0x000A : /* LINE FEED */
59+ case 0x000D : /* CARRIAGE RETURN */
60+ case 0x001C : /* FILE SEPARATOR */
61+ case 0x001D : /* GROUP SEPARATOR */
62+ case 0x001E : /* RECORD SEPARATOR */
63+ case 0x0085 : /* NEXT LINE */
64+ case 0x2028 : /* LINE SEPARATOR */
65+ case 0x2029 : /* PARAGRAPH SEPARATOR */
66+ return 1 ;
67+ default :
68+ return 0 ;
69+ }
6070}
6171
6272/* Returns the titlecase Unicode characters corresponding to ch or just
@@ -327,11 +337,43 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
327337/* Returns 1 for Unicode characters having the bidirectional type
328338 'WS', 'B' or 'S' or the category 'Zs', 0 otherwise. */
329339
330- int _PyUnicode_IsWhitespace (Py_UNICODE ch )
340+ int _PyUnicode_IsWhitespace (register const Py_UNICODE ch )
331341{
332- const _PyUnicode_TypeRecord * ctype = gettyperecord (ch );
333-
334- return (ctype -> flags & SPACE_MASK ) != 0 ;
342+ switch (ch ) {
343+ case 0x0009 : /* HORIZONTAL TABULATION */
344+ case 0x000A : /* LINE FEED */
345+ case 0x000B : /* VERTICAL TABULATION */
346+ case 0x000C : /* FORM FEED */
347+ case 0x000D : /* CARRIAGE RETURN */
348+ case 0x001C : /* FILE SEPARATOR */
349+ case 0x001D : /* GROUP SEPARATOR */
350+ case 0x001E : /* RECORD SEPARATOR */
351+ case 0x001F : /* UNIT SEPARATOR */
352+ case 0x0020 : /* SPACE */
353+ case 0x0085 : /* NEXT LINE */
354+ case 0x00A0 : /* NO-BREAK SPACE */
355+ case 0x1680 : /* OGHAM SPACE MARK */
356+ case 0x2000 : /* EN QUAD */
357+ case 0x2001 : /* EM QUAD */
358+ case 0x2002 : /* EN SPACE */
359+ case 0x2003 : /* EM SPACE */
360+ case 0x2004 : /* THREE-PER-EM SPACE */
361+ case 0x2005 : /* FOUR-PER-EM SPACE */
362+ case 0x2006 : /* SIX-PER-EM SPACE */
363+ case 0x2007 : /* FIGURE SPACE */
364+ case 0x2008 : /* PUNCTUATION SPACE */
365+ case 0x2009 : /* THIN SPACE */
366+ case 0x200A : /* HAIR SPACE */
367+ case 0x200B : /* ZERO WIDTH SPACE */
368+ case 0x2028 : /* LINE SEPARATOR */
369+ case 0x2029 : /* PARAGRAPH SEPARATOR */
370+ case 0x202F : /* NARROW NO-BREAK SPACE */
371+ case 0x205F : /* MEDIUM MATHEMATICAL SPACE */
372+ case 0x3000 : /* IDEOGRAPHIC SPACE */
373+ return 1 ;
374+ default :
375+ return 0 ;
376+ }
335377}
336378
337379/* Returns 1 for Unicode characters having the category 'Ll', 0
0 commit comments