@@ -1333,7 +1333,7 @@ successive matches::
13331333
13341334 Token = collections.namedtuple('Token', ['typ', 'value', 'line', 'column'])
13351335
1336- def tokenize(s ):
1336+ def tokenize(code ):
13371337 keywords = {'IF', 'THEN', 'ENDIF', 'FOR', 'NEXT', 'GOSUB', 'RETURN'}
13381338 token_specification = [
13391339 ('NUMBER', r'\d+(\.\d*)?'), # Integer or decimal number
@@ -1343,26 +1343,27 @@ successive matches::
13431343 ('OP', r'[+\-*/]'), # Arithmetic operators
13441344 ('NEWLINE', r'\n'), # Line endings
13451345 ('SKIP', r'[ \t]+'), # Skip over spaces and tabs
1346+ ('MISMATCH',r'.'), # Any other character
13461347 ]
13471348 tok_regex = '|'.join('(?P<%s>%s)' % pair for pair in token_specification)
13481349 get_token = re.compile(tok_regex).match
1349- line = 1
1350- pos = line_start = 0
1351- mo = get_token(s)
1352- while mo is not None:
1353- typ = mo.lastgroup
1354- if typ == 'NEWLINE':
1355- line_start = pos
1356- line += 1
1357- elif typ ! = 'SKIP':
1358- val = mo.group(typ)
1359- if typ == 'ID' and val in keywords :
1360- typ = val
1361- yield Token(typ, val, line, mo.start()-line_start)
1362- pos = mo.end()
1363- mo = get_token(s, pos)
1364- if pos != len(s):
1365- raise RuntimeError('Unexpected character %r on line %d' %(s[pos], line) )
1350+ line_num = 1
1351+ line_start = 0
1352+ for mo in re.finditer(tok_regex, code):
1353+ kind = mo.lastgroup
1354+ value = mo.group(kind)
1355+ if kind == 'NEWLINE':
1356+ line_start = mo.end()
1357+ line_num += 1
1358+ elif kind = = 'SKIP':
1359+ pass
1360+ elif kind == 'MISMATCH' :
1361+ raise RuntimeError('%r unexpected on line %d' % (value, line_num))
1362+ else:
1363+ if kind == 'ID' and value in keywords:
1364+ kind = value
1365+ column = mo.start() - line_start
1366+ yield Token(kind, value, line_num, column )
13661367
13671368 statements = '''
13681369 IF quantity THEN
@@ -1376,22 +1377,22 @@ successive matches::
13761377
13771378The tokenizer produces the following output::
13781379
1379- Token(typ='IF', value='IF', line=2, column=5 )
1380- Token(typ='ID', value='quantity', line=2, column=8 )
1381- Token(typ='THEN', value='THEN', line=2, column=17 )
1382- Token(typ='ID', value='total', line=3, column=9 )
1383- Token(typ='ASSIGN', value=':=', line=3, column=15 )
1384- Token(typ='ID', value='total', line=3, column=18 )
1385- Token(typ='OP', value='+', line=3, column=24 )
1386- Token(typ='ID', value='price', line=3, column=26 )
1387- Token(typ='OP', value='*', line=3, column=32 )
1388- Token(typ='ID', value='quantity', line=3, column=34 )
1389- Token(typ='END', value=';', line=3, column=42 )
1390- Token(typ='ID', value='tax', line=4, column=9 )
1391- Token(typ='ASSIGN', value=':=', line=4, column=13 )
1392- Token(typ='ID', value='price', line=4, column=16 )
1393- Token(typ='OP', value='*', line=4, column=22 )
1394- Token(typ='NUMBER', value='0.05', line=4, column=24 )
1395- Token(typ='END', value=';', line=4, column=28 )
1396- Token(typ='ENDIF', value='ENDIF', line=5, column=5 )
1397- Token(typ='END', value=';', line=5, column=10 )
1380+ Token(typ='IF', value='IF', line=2, column=4 )
1381+ Token(typ='ID', value='quantity', line=2, column=7 )
1382+ Token(typ='THEN', value='THEN', line=2, column=16 )
1383+ Token(typ='ID', value='total', line=3, column=8 )
1384+ Token(typ='ASSIGN', value=':=', line=3, column=14 )
1385+ Token(typ='ID', value='total', line=3, column=17 )
1386+ Token(typ='OP', value='+', line=3, column=23 )
1387+ Token(typ='ID', value='price', line=3, column=25 )
1388+ Token(typ='OP', value='*', line=3, column=31 )
1389+ Token(typ='ID', value='quantity', line=3, column=33 )
1390+ Token(typ='END', value=';', line=3, column=41 )
1391+ Token(typ='ID', value='tax', line=4, column=8 )
1392+ Token(typ='ASSIGN', value=':=', line=4, column=12 )
1393+ Token(typ='ID', value='price', line=4, column=15 )
1394+ Token(typ='OP', value='*', line=4, column=21 )
1395+ Token(typ='NUMBER', value='0.05', line=4, column=23 )
1396+ Token(typ='END', value=';', line=4, column=27 )
1397+ Token(typ='ENDIF', value='ENDIF', line=5, column=4 )
1398+ Token(typ='END', value=';', line=5, column=9 )
0 commit comments