Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit a5497c4

Browse files
committed
Streams returned by git cmd db are now containing all the data right away. This could cause several copies to exist, and makes the cmd implementation a bad choice if big files are involved
1 parent ce79835 commit a5497c4

File tree

2 files changed

+14
-4
lines changed

2 files changed

+14
-4
lines changed

git/db/cmd/base.py

Lines changed: 10 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
TagReference
3232
)
3333
from git.objects.commit import Commit
34+
from cStringIO import StringIO
3435
import re
3536
import os
3637
import sys
@@ -305,9 +306,15 @@ def info(self, sha):
305306
return OInfo(hex_to_bin(hexsha), typename, size)
306307

307308
def stream(self, sha):
308-
"""For now, all lookup is done by git itself"""
309-
hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
310-
return OStream(hex_to_bin(hexsha), typename, size, stream)
309+
"""For now, all lookup is done by git itself
310+
:note: As we don't know when the stream is actually read (and if it is
311+
stored for later use) we read the data rigth away and cache it.
312+
This has HUGE performance implication, both for memory as for
313+
reading/deserializing objects, but we have no other choice in order
314+
to make the database behaviour consistent with other implementations !"""
315+
316+
hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
317+
return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
311318

312319
def partial_to_complete_sha_hex(self, partial_hexsha):
313320
""":return: Full binary 20 byte sha from the given partial hexsha

git/db/complex.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,10 @@
88

99
class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
1010
"""A database which uses primarily the git command implementation, but falls back
11-
to pure python where it is more feasible"""
11+
to pure python where it is more feasible
12+
:note: To assure consistent behaviour across implementations, when calling the
13+
``stream()`` method a cache is created. This makes this implementation a bad
14+
choice when reading big files as these are streamed from memory in all cases."""
1215

1316
class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
1417
"""A database which fills in its missing implementation using the pure python

0 commit comments

Comments
 (0)