@@ -130,15 +130,24 @@ static const unsigned char table_a2b_hqx[256] = {
130130static const unsigned char table_b2a_hqx [] =
131131"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr" ;
132132
133- static const char table_a2b_base64 [] = {
133+ static const unsigned char table_a2b_base64 [] = {
134134 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
135135 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
136136 -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,62 , -1 ,-1 ,-1 ,63 ,
137137 52 ,53 ,54 ,55 , 56 ,57 ,58 ,59 , 60 ,61 ,-1 ,-1 , -1 , 0 ,-1 ,-1 , /* Note PAD->0 */
138138 -1 , 0 , 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 ,10 , 11 ,12 ,13 ,14 ,
139139 15 ,16 ,17 ,18 , 19 ,20 ,21 ,22 , 23 ,24 ,25 ,-1 , -1 ,-1 ,-1 ,-1 ,
140140 -1 ,26 ,27 ,28 , 29 ,30 ,31 ,32 , 33 ,34 ,35 ,36 , 37 ,38 ,39 ,40 ,
141- 41 ,42 ,43 ,44 , 45 ,46 ,47 ,48 , 49 ,50 ,51 ,-1 , -1 ,-1 ,-1 ,-1
141+ 41 ,42 ,43 ,44 , 45 ,46 ,47 ,48 , 49 ,50 ,51 ,-1 , -1 ,-1 ,-1 ,-1 ,
142+
143+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
144+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
145+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
146+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
147+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
148+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
149+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
150+ -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 , -1 ,-1 ,-1 ,-1 ,
142151};
143152
144153#define BASE64_PAD '='
@@ -413,32 +422,6 @@ binascii_b2a_uu_impl(PyObject *module, Py_buffer *data, int backtick)
413422 return _PyBytesWriter_Finish (& writer , ascii_data );
414423}
415424
416-
417- static int
418- binascii_find_valid (const unsigned char * s , Py_ssize_t slen , int num )
419- {
420- /* Finds & returns the (num+1)th
421- ** valid character for base64, or -1 if none.
422- */
423-
424- int ret = -1 ;
425- unsigned char c , b64val ;
426-
427- while ((slen > 0 ) && (ret == -1 )) {
428- c = * s ;
429- b64val = table_a2b_base64 [c & 0x7f ];
430- if ( ((c <= 0x7f ) && (b64val != (unsigned char )-1 )) ) {
431- if (num == 0 )
432- ret = * s ;
433- num -- ;
434- }
435-
436- s ++ ;
437- slen -- ;
438- }
439- return ret ;
440- }
441-
442425/*[clinic input]
443426binascii.a2b_base64
444427
@@ -452,88 +435,74 @@ static PyObject *
452435binascii_a2b_base64_impl (PyObject * module , Py_buffer * data )
453436/*[clinic end generated code: output=0628223f19fd3f9b input=5872acf6e1cac243]*/
454437{
455- const unsigned char * ascii_data ;
456- unsigned char * bin_data ;
457- unsigned char * bin_data_start ;
458- int leftbits = 0 ;
459- unsigned char this_ch ;
460- unsigned int leftchar = 0 ;
461- Py_ssize_t ascii_len , bin_len ;
462- int quad_pos = 0 ;
463- _PyBytesWriter writer ;
464- binascii_state * state ;
465-
466- ascii_data = data -> buf ;
467- ascii_len = data -> len ;
438+ assert (data -> len >= 0 );
468439
469- assert (ascii_len >= 0 );
470-
471- if (ascii_len > PY_SSIZE_T_MAX - 3 )
472- return PyErr_NoMemory ();
473-
474- bin_len = ((ascii_len + 3 )/4 )* 3 ; /* Upper bound, corrected later */
475-
476- _PyBytesWriter_Init (& writer );
440+ const unsigned char * ascii_data = data -> buf ;
441+ size_t ascii_len = data -> len ;
477442
478443 /* Allocate the buffer */
479- bin_data = _PyBytesWriter_Alloc (& writer , bin_len );
444+ Py_ssize_t bin_len = ((ascii_len + 3 )/4 )* 3 ; /* Upper bound, corrected later */
445+ _PyBytesWriter writer ;
446+ _PyBytesWriter_Init (& writer );
447+ unsigned char * bin_data = _PyBytesWriter_Alloc (& writer , bin_len );
480448 if (bin_data == NULL )
481449 return NULL ;
482- bin_data_start = bin_data ;
483-
484- for ( ; ascii_len > 0 ; ascii_len -- , ascii_data ++ ) {
485- this_ch = * ascii_data ;
450+ unsigned char * bin_data_start = bin_data ;
486451
487- if (this_ch > 0x7f ||
488- this_ch == '\r' || this_ch == '\n' || this_ch == ' ' )
489- continue ;
452+ int quad_pos = 0 ;
453+ unsigned char leftchar = 0 ;
454+ int pads = 0 ;
455+ for (size_t i = 0 ; i < ascii_len ; i ++ ) {
456+ unsigned char this_ch = ascii_data [i ];
490457
491458 /* Check for pad sequences and ignore
492459 ** the invalid ones.
493460 */
494461 if (this_ch == BASE64_PAD ) {
495- if ( (quad_pos < 2 ) ||
496- ((quad_pos == 2 ) &&
497- (binascii_find_valid (ascii_data , ascii_len , 1 )
498- != BASE64_PAD )) )
499- {
500- continue ;
501- }
502- else {
462+ if (quad_pos >= 2 && quad_pos + ++ pads >= 4 ) {
503463 /* A pad sequence means no more input.
504464 ** We've already interpreted the data
505465 ** from the quad at this point.
506466 */
507- leftbits = 0 ;
508- break ;
467+ goto done ;
509468 }
469+ continue ;
510470 }
511471
512- this_ch = table_a2b_base64 [* ascii_data ];
513- if ( this_ch == ( unsigned char ) -1 )
472+ this_ch = table_a2b_base64 [this_ch ];
473+ if (this_ch >= 64 ) {
514474 continue ;
475+ }
476+ pads = 0 ;
515477
516- /*
517- ** Shift it in on the low end, and see if there's
518- ** a byte ready for output.
519- */
520- quad_pos = (quad_pos + 1 ) & 0x03 ;
521- leftchar = (leftchar << 6 ) | (this_ch );
522- leftbits += 6 ;
523-
524- if ( leftbits >= 8 ) {
525- leftbits -= 8 ;
526- * bin_data ++ = (leftchar >> leftbits ) & 0xff ;
527- leftchar &= ((1 << leftbits ) - 1 );
478+ switch (quad_pos ) {
479+ case 0 :
480+ quad_pos = 1 ;
481+ leftchar = this_ch ;
482+ break ;
483+ case 1 :
484+ quad_pos = 2 ;
485+ * bin_data ++ = (leftchar << 2 ) | (this_ch >> 4 );
486+ leftchar = this_ch & 0x0f ;
487+ break ;
488+ case 2 :
489+ quad_pos = 3 ;
490+ * bin_data ++ = (leftchar << 4 ) | (this_ch >> 2 );
491+ leftchar = this_ch & 0x03 ;
492+ break ;
493+ case 3 :
494+ quad_pos = 0 ;
495+ * bin_data ++ = (leftchar << 6 ) | (this_ch );
496+ leftchar = 0 ;
497+ break ;
528498 }
529499 }
530500
531- if (leftbits != 0 ) {
532- state = PyModule_GetState (module );
501+ if (quad_pos != 0 ) {
502+ binascii_state * state = PyModule_GetState (module );
533503 if (state == NULL ) {
534- return NULL ;
535- }
536- if (leftbits == 6 ) {
504+ /* error already set, from PyModule_GetState */
505+ } else if (quad_pos == 1 ) {
537506 /*
538507 ** There is exactly one extra valid, non-padding, base64 character.
539508 ** This is an invalid length, as there is no possible input that
@@ -551,6 +520,7 @@ binascii_a2b_base64_impl(PyObject *module, Py_buffer *data)
551520 return NULL ;
552521 }
553522
523+ done :
554524 return _PyBytesWriter_Finish (& writer , bin_data );
555525}
556526
0 commit comments