@@ -1637,6 +1637,51 @@ unicode_putchar(PyObject **p_unicode, Py_ssize_t *pos,
16371637 return 0 ;
16381638}
16391639
1640+ /* Copy a ASCII or latin1 char* string into a Python Unicode string.
1641+ Return the length of the input string.
1642+
1643+ WARNING: Don't copy the terminating null character and don't check the
1644+ maximum character (may write a latin1 character in an ASCII string). */
1645+ static Py_ssize_t
1646+ unicode_write_cstr (PyObject * unicode , Py_ssize_t index , const char * str )
1647+ {
1648+ enum PyUnicode_Kind kind = PyUnicode_KIND (unicode );
1649+ void * data = PyUnicode_DATA (unicode );
1650+
1651+ switch (kind ) {
1652+ case PyUnicode_1BYTE_KIND : {
1653+ Py_ssize_t len = strlen (str );
1654+ assert (index + len <= PyUnicode_GET_LENGTH (unicode ));
1655+ memcpy (data + index , str , len );
1656+ return len ;
1657+ }
1658+ case PyUnicode_2BYTE_KIND : {
1659+ Py_UCS2 * start = (Py_UCS2 * )data + index ;
1660+ Py_UCS2 * ucs2 = start ;
1661+ assert (index <= PyUnicode_GET_LENGTH (unicode ));
1662+
1663+ for (; * str ; ++ ucs2 , ++ str )
1664+ * ucs2 = (Py_UCS2 )* str ;
1665+
1666+ assert ((ucs2 - start ) <= PyUnicode_GET_LENGTH (unicode ));
1667+ return ucs2 - start ;
1668+ }
1669+ default : {
1670+ Py_UCS4 * start = (Py_UCS4 * )data + index ;
1671+ Py_UCS4 * ucs4 = start ;
1672+ assert (kind == PyUnicode_4BYTE_KIND );
1673+ assert (index <= PyUnicode_GET_LENGTH (unicode ));
1674+
1675+ for (; * str ; ++ ucs4 , ++ str )
1676+ * ucs4 = (Py_UCS4 )* str ;
1677+
1678+ assert ((ucs4 - start ) <= PyUnicode_GET_LENGTH (unicode ));
1679+ return ucs4 - start ;
1680+ }
1681+ }
1682+ }
1683+
1684+
16401685static PyObject *
16411686get_latin1_char (unsigned char ch )
16421687{
@@ -2590,19 +2635,23 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
25902635 case 'u' :
25912636 case 'x' :
25922637 case 'p' :
2638+ {
2639+ Py_ssize_t written ;
25932640 /* unused, since we already have the result */
25942641 if (* f == 'p' )
25952642 (void ) va_arg (vargs , void * );
25962643 else
25972644 (void ) va_arg (vargs , int );
25982645 /* extract the result from numberresults and append. */
2599- for (; * numberresult ; ++ i , ++ numberresult )
2600- PyUnicode_WRITE (kind , data , i , * numberresult );
2646+ written = unicode_write_cstr (string , i , numberresult );
26012647 /* skip over the separating '\0' */
2648+ i += written ;
2649+ numberresult += written ;
26022650 assert (* numberresult == '\0' );
26032651 numberresult ++ ;
26042652 assert (numberresult <= numberresults + numbersize );
26052653 break ;
2654+ }
26062655 case 's' :
26072656 {
26082657 /* unused, since we already have the result */
@@ -2669,8 +2718,7 @@ PyUnicode_FromFormatV(const char *format, va_list vargs)
26692718 PyUnicode_WRITE (kind , data , i ++ , '%' );
26702719 break ;
26712720 default :
2672- for (; * p ; ++ p , ++ i )
2673- PyUnicode_WRITE (kind , data , i , * p );
2721+ i += unicode_write_cstr (string , i , p );
26742722 assert (i == PyUnicode_GET_LENGTH (string ));
26752723 goto end ;
26762724 }
0 commit comments