Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 753e908

Browse files
Paul Sowdenempty
Paul Sowden
authored andcommitted
implement a far far faster diff parser
The old diff parser in list_from_string took a large amount of time to parse long diffs, on one of my repositories it took over 3 minutes to parse the initial commit. The new parser uses a single regexp to match the header of a diff, and iterates over the each individual diff by splitting the entire string by the diff seperator, attempting to match the header for each individual diff. With the new parser parsing the same repository is almost instant, woohoo! (cherry picked from commit 5b6b27f153bdc30380bea12a528ef483571dd57a)
1 parent 9e14356 commit 753e908

File tree

2 files changed

+20
-43
lines changed

2 files changed

+20
-43
lines changed

lib/git/diff.py

Lines changed: 19 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -34,52 +34,29 @@ def __init__(self, repo, a_path, b_path, a_commit, b_commit, a_mode, b_mode, new
3434

3535
@classmethod
3636
def list_from_string(cls, repo, text):
37-
lines = text.splitlines()
38-
a_mode = None
39-
b_mode = None
4037
diffs = []
41-
while lines:
42-
m = re.search(r'^diff --git a/(\S+) b/(\S+)$', lines.pop(0))
43-
if m:
44-
a_path, b_path = m.groups()
45-
if re.search(r'^old mode', lines[0]):
46-
m = re.search(r'^old mode (\d+)', lines.pop(0))
47-
if m:
48-
a_mode, = m.groups()
49-
m = re.search(r'^new mode (\d+)', lines.pop(0))
50-
if m:
51-
b_mode, = m.groups()
52-
if re.search(r'^diff --git', lines[0]):
53-
diffs.append(Diff(repo, a_path, b_path, None, None, a_mode, b_mode, False, False, None))
54-
continue
5538

56-
new_file = False
57-
deleted_file = False
39+
diff_header = re.compile(r"""
40+
#^diff[ ]--git
41+
[ ]a/(?P<a_path>\S+)[ ]b/(?P<b_path>\S+)\n
42+
(?:^old[ ]mode[ ](?P<old_mode>\d+)(?:\n|$))?
43+
(?:^new[ ]mode[ ](?P<new_mode>\d+)(?:\n|$))?
44+
(?:^new[ ]file[ ]mode[ ](?P<new_file_mode>.+)(?:\n|$))?
45+
(?:^deleted[ ]file[ ]mode[ ](?P<deleted_file_mode>.+)(?:\n|$))?
46+
(?:^index[ ](?P<a_commit>[0-9A-Fa-f]+)
47+
\.\.(?P<b_commit>[0-9A-Fa-f]+)[ ]?(?P<b_mode>.+)?(?:\n|$))?
48+
""", re.VERBOSE | re.MULTILINE).match
5849

59-
if re.search(r'^new file', lines[0]):
60-
m = re.search(r'^new file mode (.+)', lines.pop(0))
61-
if m:
62-
b_mode, = m.groups()
63-
a_mode = None
64-
new_file = True
65-
elif re.search(r'^deleted file', lines[0]):
66-
m = re.search(r'^deleted file mode (.+)$', lines.pop(0))
67-
if m:
68-
a_mode, = m.groups()
69-
b_mode = None
70-
deleted_file = True
50+
for diff in ('\n' + text).split('\ndiff --git')[1:]:
51+
header = diff_header(diff)
7152

72-
m = re.search(r'^index ([0-9A-Fa-f]+)\.\.([0-9A-Fa-f]+) ?(.+)?$', lines.pop(0))
73-
if m:
74-
a_commit, b_commit, b_mode = m.groups()
75-
if b_mode:
76-
b_mode = b_mode.strip()
53+
a_path, b_path, old_mode, new_mode, new_file_mode, deleted_file_mode, \
54+
a_commit, b_commit, b_mode = header.groups()
55+
new_file, deleted_file = bool(new_file_mode), bool(deleted_file_mode)
7756

78-
diff_lines = []
79-
while lines and not re.search(r'^diff', lines[0]):
80-
diff_lines.append(lines.pop(0))
81-
82-
diff = "\n".join(diff_lines)
83-
diffs.append(Diff(repo, a_path, b_path, a_commit, b_commit, a_mode, b_mode, new_file, deleted_file, diff))
57+
diffs.append(Diff(repo, a_path, b_path, a_commit, b_commit,
58+
old_mode or deleted_file_mode, new_mode or new_file_mode or b_mode,
59+
new_file, deleted_file, diff[header.end():]))
8460

8561
return diffs
62+

test/git/test_commit.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -130,7 +130,7 @@ def test_diffs_on_initial_import(self, git):
130130
assert_equal('History.txt', diffs[0].a_path)
131131
assert_equal('History.txt', diffs[0].b_path)
132132
assert_equal(None, diffs[0].a_commit)
133-
assert_equal(None, diffs[0].b_mode)
133+
assert_equal('100644', diffs[0].b_mode)
134134
assert_equal('81d2c27608b352814cbe979a6acd678d30219678', diffs[0].b_commit.id)
135135
assert_equal(True, diffs[0].new_file)
136136
assert_equal(False, diffs[0].deleted_file)

0 commit comments

Comments
 (0)