File tree Expand file tree Collapse file tree 1 file changed +16
-0
lines changed Expand file tree Collapse file tree 1 file changed +16
-0
lines changed Original file line number Diff line number Diff line change @@ -501,19 +501,35 @@ valid_utf8(const unsigned char* s)
501
501
/* single-byte code */
502
502
return 1 ;
503
503
} else if (* s < 0xE0 ) {
504
+ /* \xC2\x80-\xDF\xBF -- 0080-07FF */
504
505
if (* s < 0xC2 ) {
506
+ /* invalid sequence
507
+ \x80-\xBF -- continuation byte
508
+ \xC0-\xC1 -- fake 0000-007F */
505
509
return 0 ;
506
510
}
507
511
expected = 1 ;
508
512
} else if (* s < 0xF0 ) {
513
+ /* \xE0\xA0\x80-\xEF\xBF\xBF -- 0800-FFFF */
509
514
if (* s == 0xE0 && * (s + 1 ) < 0xA0 ) {
515
+ /* invalid sequence
516
+ \xE0\x80\x80-\xE0\x9F\xBF -- fake 0000-0800 */
510
517
return 0 ;
511
518
} else if (* s == 0xED && * (s + 1 ) >= 0xA0 ) {
519
+ /* Decoding UTF-8 sequences in range \xED\xA0\x80-\xED\xBF\xBF
520
+ will result in surrogates in range D800-DFFF. Surrogates are
521
+ not valid UTF-8 so they are rejected.
522
+ See https://www.unicode.org/versions/Unicode5.2.0/ch03.pdf
523
+ (table 3-7) and http://www.rfc-editor.org/rfc/rfc3629.txt */
512
524
return 0 ;
513
525
}
514
526
expected = 2 ;
515
527
} else if (* s < 0xF5 ) {
528
+ /* \xF0\x90\x80\x80-\xF4\x8F\xBF\xBF -- 10000-10FFFF */
516
529
if (* (s + 1 ) < 0x90 ? * s == 0xF0 : * s == 0xF4 ) {
530
+ /* invalid sequence -- one of:
531
+ \xF0\x80\x80\x80-\xF0\x8F\xBF\xBF -- fake 0000-FFFF
532
+ \xF4\x90\x80\x80- -- 110000- overflow */
517
533
return 0 ;
518
534
}
519
535
expected = 3 ;
You can’t perform that action at this time.
0 commit comments