|
578 | 578 | >>> tempdir = os.path.dirname(f) or os.curdir |
579 | 579 | >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py")) |
580 | 580 |
|
581 | | -tokenize is broken on test_pep3131.py because regular expressions are broken on |
582 | | -the obscure unicode identifiers in it. *sigh* |
| 581 | +Tokenize is broken on test_pep3131.py because regular expressions are |
| 582 | +broken on the obscure unicode identifiers in it. *sigh* |
| 583 | +With roundtrip extended to test the 5-tuple mode of untokenize, |
| 584 | +7 more testfiles fail. Remove them also until the failure is diagnosed. |
| 585 | +
|
583 | 586 | >>> testfiles.remove(os.path.join(tempdir, "test_pep3131.py")) |
| 587 | + >>> for f in ('buffer', 'builtin', 'fileio', 'inspect', 'os', 'platform', 'sys'): |
| 588 | + ... testfiles.remove(os.path.join(tempdir, "test_%s.py") % f) |
| 589 | + ... |
584 | 590 | >>> if not support.is_resource_enabled("cpu"): |
585 | 591 | ... testfiles = random.sample(testfiles, 10) |
586 | 592 | ... |
@@ -659,21 +665,39 @@ def dump_tokens(s): |
659 | 665 | def roundtrip(f): |
660 | 666 | """ |
661 | 667 | Test roundtrip for `untokenize`. `f` is an open file or a string. |
662 | | - The source code in f is tokenized, converted back to source code via |
663 | | - tokenize.untokenize(), and tokenized again from the latter. The test |
664 | | - fails if the second tokenization doesn't match the first. |
| 668 | + The source code in f is tokenized to both 5- and 2-tuples. |
| 669 | + Both sequences are converted back to source code via |
| 670 | + tokenize.untokenize(), and the latter tokenized again to 2-tuples. |
| 671 | + The test fails if the 3 pair tokenizations do not match. |
| 672 | +
|
| 673 | + When untokenize bugs are fixed, untokenize with 5-tuples should |
| 674 | + reproduce code that does not contain a backslash continuation |
| 675 | + following spaces. A proper test should test this. |
| 676 | +
|
| 677 | + This function would be more useful for correcting bugs if it reported |
| 678 | + the first point of failure, like assertEqual, rather than just |
| 679 | + returning False -- or if it were only used in unittests and not |
| 680 | + doctest and actually used assertEqual. |
665 | 681 | """ |
| 682 | + # Get source code and original tokenizations |
666 | 683 | if isinstance(f, str): |
667 | | - f = BytesIO(f.encode('utf-8')) |
668 | | - try: |
669 | | - token_list = list(tokenize(f.readline)) |
670 | | - finally: |
| 684 | + code = f.encode('utf-8') |
| 685 | + else: |
| 686 | + code = f.read() |
671 | 687 | f.close() |
672 | | - tokens1 = [tok[:2] for tok in token_list] |
673 | | - new_bytes = untokenize(tokens1) |
674 | | - readline = (line for line in new_bytes.splitlines(keepends=True)).__next__ |
675 | | - tokens2 = [tok[:2] for tok in tokenize(readline)] |
676 | | - return tokens1 == tokens2 |
| 688 | + readline = iter(code.splitlines(keepends=True)).__next__ |
| 689 | + tokens5 = list(tokenize(readline)) |
| 690 | + tokens2 = [tok[:2] for tok in tokens5] |
| 691 | + # Reproduce tokens2 from pairs |
| 692 | + bytes_from2 = untokenize(tokens2) |
| 693 | + readline2 = iter(bytes_from2.splitlines(keepends=True)).__next__ |
| 694 | + tokens2_from2 = [tok[:2] for tok in tokenize(readline2)] |
| 695 | + # Reproduce tokens2 from 5-tuples |
| 696 | + bytes_from5 = untokenize(tokens5) |
| 697 | + readline5 = iter(bytes_from5.splitlines(keepends=True)).__next__ |
| 698 | + tokens2_from5 = [tok[:2] for tok in tokenize(readline5)] |
| 699 | + # Compare 3 versions |
| 700 | + return tokens2 == tokens2_from2 == tokens2_from5 |
677 | 701 |
|
678 | 702 | # This is an example from the docs, set up as a doctest. |
679 | 703 | def decistmt(s): |
|
0 commit comments