Streams returned by git cmd db are now containing all the data right away. This could cause several copies to exist, and makes the cmd implementation a bad choice if big files are involved

Byron · Byron · commit a5497c432fe8 · 2011-06-06T20:32:38.000+02:00
diff --git a/git/db/cmd/base.py b/git/db/cmd/base.py
@@ -31,6 +31,7 @@
 						TagReference
 					)
 from git.objects.commit import Commit
+from cStringIO import StringIO
 import re
 import os
 import sys
@@ -305,9 +306,15 @@ def info(self, sha):
 		return OInfo(hex_to_bin(hexsha), typename, size)
 		
 	def stream(self, sha):
-		"""For now, all lookup is done by git itself"""
-		hexsha, typename, size, stream = self._git.stream_object_data(bin_to_hex(sha))
-		return OStream(hex_to_bin(hexsha), typename, size, stream)
+		"""For now, all lookup is done by git itself
+		:note: As we don't know when the stream is actually read (and if it is 
+			stored for later use) we read the data rigth away and cache it.
+			This has HUGE performance implication, both for memory as for 
+			reading/deserializing objects, but we have no other choice in order
+			to make the database behaviour consistent with other implementations !"""
+		
+		hexsha, typename, size, data = self._git.get_object_data(bin_to_hex(sha))
+		return OStream(hex_to_bin(hexsha), typename, size, StringIO(data))
 		
 	def partial_to_complete_sha_hex(self, partial_hexsha):
 		""":return: Full binary 20 byte sha from the given partial hexsha
diff --git a/git/db/complex.py b/git/db/complex.py
@@ -8,7 +8,10 @@
 
 class CmdGitDB(CmdPartialGitDB, PurePartialGitDB):
 	"""A database which uses primarily the git command implementation, but falls back
-	to pure python where it is more feasible"""
+	to pure python where it is more feasible
+	:note: To assure consistent behaviour across implementations, when calling the 
+		``stream()`` method a cache is created. This makes this implementation a bad
+		choice when reading big files as these are streamed from memory in all cases."""
 
 class CmdCompatibilityGitDB(RepoCompatibilityInterface, CmdGitDB):
 	"""A database which fills in its missing implementation using the pure python