Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit b3bae11

Browse files
committed
WIP
1 parent 1668b41 commit b3bae11

File tree

6 files changed

+55
-25
lines changed

6 files changed

+55
-25
lines changed

Lib/test/test_tokenize.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -1827,9 +1827,10 @@ class CTokenizeTest(TestCase):
18271827
def check_tokenize(self, s, expected):
18281828
# Format the tokens in s in a table format.
18291829
# The ENDMARKER and final NEWLINE are omitted.
1830+
f = StringIO(s)
18301831
with self.subTest(source=s):
18311832
result = stringify_tokens_from_source(
1832-
_generate_tokens_from_c_tokenizer(s), s
1833+
_generate_tokens_from_c_tokenizer(f.readline), s
18331834
)
18341835
self.assertEqual(result, expected.rstrip().splitlines())
18351836

Lib/tokenize.py

+8-2
Original file line numberDiff line numberDiff line change
@@ -446,8 +446,14 @@ def tokenize(readline):
446446
yield from _tokenize(rl_gen, encoding)
447447

448448
def _tokenize(rl_gen, encoding):
449-
source = b"".join(rl_gen).decode(encoding)
450-
for token in _generate_tokens_from_c_tokenizer(source, extra_tokens=True):
449+
def gen(rl_gen):
450+
while True:
451+
try:
452+
yield next(rl_gen).decode(encoding)
453+
except StopIteration:
454+
return
455+
g = gen(rl_gen)
456+
for token in _generate_tokens_from_c_tokenizer(g.__next__, extra_tokens=True):
451457
yield token
452458

453459
def generate_tokens(readline):

Parser/tokenizer.c

+36-2
Original file line numberDiff line numberDiff line change
@@ -542,7 +542,13 @@ tok_readline_recode(struct tok_state *tok) {
542542
if (line == NULL) {
543543
line = PyObject_CallNoArgs(tok->decoding_readline);
544544
if (line == NULL) {
545-
error_ret(tok);
545+
if (!PyErr_ExceptionMatches(PyExc_StopIteration)) {
546+
error_ret(tok);
547+
} else {
548+
PyErr_Clear();
549+
tok->inp = tok->cur;
550+
tok->done = E_EOF;
551+
}
546552
goto error;
547553
}
548554
}
@@ -569,6 +575,7 @@ tok_readline_recode(struct tok_state *tok) {
569575
goto error;
570576
}
571577
Py_DECREF(line);
578+
exit:
572579
return 1;
573580
error:
574581
Py_XDECREF(line);
@@ -900,6 +907,30 @@ _PyTokenizer_FromString(const char *str, int exec_input, int preserve_crlf)
900907
return tok;
901908
}
902909

910+
struct tok_state *
911+
_PyTokenizer_FromUTF8Readline(PyObject* readline, int exec_input, int preserve_crlf)
912+
{
913+
struct tok_state *tok = tok_new();
914+
if (tok == NULL)
915+
return NULL;
916+
if ((tok->buf = (char *)PyMem_Malloc(BUFSIZ)) == NULL) {
917+
_PyTokenizer_Free(tok);
918+
return NULL;
919+
}
920+
tok->cur = tok->inp = tok->buf;
921+
tok->end = tok->buf + BUFSIZ;
922+
tok->fp = NULL;
923+
924+
tok->enc = NULL;
925+
tok->encoding = new_string("utf-8", 5, tok);
926+
Py_INCREF(readline);
927+
tok->decoding_readline = readline;
928+
tok->decoding_state = STATE_NORMAL;
929+
return tok;
930+
}
931+
932+
933+
903934
/* Set up tokenizer for UTF-8 string */
904935

905936
struct tok_state *
@@ -1238,7 +1269,10 @@ tok_nextc(struct tok_state *tok)
12381269
if (tok->done != E_OK) {
12391270
return EOF;
12401271
}
1241-
if (tok->fp == NULL) {
1272+
if (tok->decoding_readline != NULL) {
1273+
rc = tok_underflow_file(tok);
1274+
}
1275+
else if (tok->fp == NULL) {
12421276
rc = tok_underflow_string(tok);
12431277
}
12441278
else if (tok->prompt != NULL) {

Parser/tokenizer.h

+1
Original file line numberDiff line numberDiff line change
@@ -137,6 +137,7 @@ struct tok_state {
137137

138138
extern struct tok_state *_PyTokenizer_FromString(const char *, int, int);
139139
extern struct tok_state *_PyTokenizer_FromUTF8(const char *, int, int);
140+
extern struct tok_state *_PyTokenizer_FromUTF8Readline(PyObject*, int, int);
140141
extern struct tok_state *_PyTokenizer_FromFile(FILE *, const char*,
141142
const char *, const char *);
142143
extern void _PyTokenizer_Free(struct tok_state *);

Python/Python-tokenize.c

+4-4
Original file line numberDiff line numberDiff line change
@@ -37,15 +37,15 @@ typedef struct
3737
@classmethod
3838
_tokenizer.tokenizeriter.__new__ as tokenizeriter_new
3939
40-
source: str
40+
source: object
4141
*
4242
extra_tokens: bool
4343
[clinic start generated code]*/
4444

4545
static PyObject *
46-
tokenizeriter_new_impl(PyTypeObject *type, const char *source,
46+
tokenizeriter_new_impl(PyTypeObject *type, PyObject *source,
4747
int extra_tokens)
48-
/*[clinic end generated code: output=f6f9d8b4beec8106 input=90dc5b6a5df180c2]*/
48+
/*[clinic end generated code: output=f174f61e34b2c306 input=32ddfe6d52575938]*/
4949
{
5050
tokenizeriterobject *self = (tokenizeriterobject *)type->tp_alloc(type, 0);
5151
if (self == NULL) {
@@ -55,7 +55,7 @@ tokenizeriter_new_impl(PyTypeObject *type, const char *source,
5555
if (filename == NULL) {
5656
return NULL;
5757
}
58-
self->tok = _PyTokenizer_FromUTF8(source, 1, 1);
58+
self->tok = _PyTokenizer_FromUTF8Readline(source, 1, 1);
5959
if (self->tok == NULL) {
6060
Py_DECREF(filename);
6161
return NULL;

Python/clinic/Python-tokenize.c.h

+4-16
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)