@@ -119,6 +119,7 @@ tok_new(void)
119119 tok -> buf = tok -> cur = tok -> end = tok -> inp = tok -> start = NULL ;
120120 tok -> done = E_OK ;
121121 tok -> fp = NULL ;
122+ tok -> input = NULL ;
122123 tok -> tabsize = TABSIZE ;
123124 tok -> indent = 0 ;
124125 tok -> indstack [0 ] = 0 ;
@@ -145,6 +146,17 @@ tok_new(void)
145146 return tok ;
146147}
147148
149+ static char *
150+ new_string (const char * s , Py_ssize_t len )
151+ {
152+ char * result = (char * )PyMem_MALLOC (len + 1 );
153+ if (result != NULL ) {
154+ memcpy (result , s , len );
155+ result [len ] = '\0' ;
156+ }
157+ return result ;
158+ }
159+
148160#ifdef PGEN
149161
150162static char *
@@ -159,10 +171,10 @@ decoding_feof(struct tok_state *tok)
159171 return feof (tok -> fp );
160172}
161173
162- static const char *
163- decode_str (const char * str , struct tok_state * tok )
174+ static char *
175+ decode_str (const char * str , int exec_input , struct tok_state * tok )
164176{
165- return str ;
177+ return new_string ( str , strlen ( str )) ;
166178}
167179
168180#else /* PGEN */
@@ -177,16 +189,6 @@ error_ret(struct tok_state *tok) /* XXX */
177189 return NULL ; /* as if it were EOF */
178190}
179191
180- static char *
181- new_string (const char * s , Py_ssize_t len )
182- {
183- char * result = (char * )PyMem_MALLOC (len + 1 );
184- if (result != NULL ) {
185- memcpy (result , s , len );
186- result [len ] = '\0' ;
187- }
188- return result ;
189- }
190192
191193static char *
192194get_normal_name (char * s ) /* for utf-8 and latin-1 */
@@ -635,17 +637,63 @@ translate_into_utf8(const char* str, const char* enc) {
635637 return utf8 ;
636638}
637639
640+
641+ static char *
642+ translate_newlines (const char * s , int exec_input , struct tok_state * tok ) {
643+ int skip_next_lf = 0 , length = strlen (s ), final_length ;
644+ char * buf , * current ;
645+ char c ;
646+ buf = PyMem_MALLOC (length + 2 );
647+ if (buf == NULL ) {
648+ tok -> done = E_NOMEM ;
649+ return NULL ;
650+ }
651+ for (current = buf ; (c = * s ++ );) {
652+ if (skip_next_lf ) {
653+ skip_next_lf = 0 ;
654+ if (c == '\n' ) {
655+ c = * s ;
656+ s ++ ;
657+ if (!c )
658+ break ;
659+ }
660+ }
661+ if (c == '\r' ) {
662+ skip_next_lf = 1 ;
663+ c = '\n' ;
664+ }
665+ * current = c ;
666+ current ++ ;
667+ }
668+ /* If this is exec input, add a newline to the end of the file if
669+ there isn't one already. */
670+ if (exec_input && * current != '\n' ) {
671+ * current = '\n' ;
672+ current ++ ;
673+ }
674+ * current = '\0' ;
675+ final_length = current - buf ;
676+ if (final_length < length && final_length )
677+ /* should never fail */
678+ buf = PyMem_REALLOC (buf , final_length + 1 );
679+ return buf ;
680+ }
681+
638682/* Decode a byte string STR for use as the buffer of TOK.
639683 Look for encoding declarations inside STR, and record them
640684 inside TOK. */
641685
642686static const char *
643- decode_str (const char * str , struct tok_state * tok )
687+ decode_str (const char * input , int single , struct tok_state * tok )
644688{
645689 PyObject * utf8 = NULL ;
690+ const char * str ;
646691 const char * s ;
647692 const char * newl [2 ] = {NULL , NULL };
648693 int lineno = 0 ;
694+ tok -> input = str = translate_newlines (input , single , tok );
695+ if (str == NULL )
696+ return NULL ;
649697 tok -> enc = NULL ;
650698 tok -> str = str ;
651699 if (!check_bom (buf_getc , buf_ungetc , buf_setreadl , tok ))
@@ -696,12 +744,12 @@ decode_str(const char *str, struct tok_state *tok)
696744/* Set up tokenizer for string */
697745
698746struct tok_state *
699- PyTokenizer_FromString (const char * str )
747+ PyTokenizer_FromString (const char * str , int exec_input )
700748{
701749 struct tok_state * tok = tok_new ();
702750 if (tok == NULL )
703751 return NULL ;
704- str = (char * )decode_str (str , tok );
752+ str = (char * )decode_str (str , exec_input , tok );
705753 if (str == NULL ) {
706754 PyTokenizer_Free (tok );
707755 return NULL ;
@@ -713,11 +761,18 @@ PyTokenizer_FromString(const char *str)
713761}
714762
715763struct tok_state *
716- PyTokenizer_FromUTF8 (const char * str )
764+ PyTokenizer_FromUTF8 (const char * str , int exec_input )
717765{
718766 struct tok_state * tok = tok_new ();
719767 if (tok == NULL )
720768 return NULL ;
769+ #ifndef PGEN
770+ tok -> input = str = translate_newlines (str , exec_input , tok );
771+ #endif
772+ if (str == NULL ) {
773+ PyTokenizer_Free (tok );
774+ return NULL ;
775+ }
721776 tok -> decoding_state = STATE_RAW ;
722777 tok -> read_coding_spec = 1 ;
723778 tok -> enc = NULL ;
@@ -734,7 +789,6 @@ PyTokenizer_FromUTF8(const char *str)
734789 return tok ;
735790}
736791
737-
738792/* Set up tokenizer for file */
739793
740794struct tok_state *
@@ -780,6 +834,8 @@ PyTokenizer_Free(struct tok_state *tok)
780834#endif
781835 if (tok -> fp != NULL && tok -> buf != NULL )
782836 PyMem_FREE (tok -> buf );
837+ if (tok -> input )
838+ PyMem_FREE ((char * )tok -> input );
783839 PyMem_FREE (tok );
784840}
785841
0 commit comments