Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 0019d7d

Browse files
committed
Added unicode handling for author names. They will now be properly encoded into the byte stream, as well as decoded from it
1 parent 0f88fb9 commit 0019d7d

File tree

3 files changed

+47
-2
lines changed

3 files changed

+47
-2
lines changed

doc/source/changes.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,10 @@
22
Changelog
33
=========
44

5+
0.3.0 Beta 3
6+
============
7+
* Added unicode support for author names. Commit.author.name is now unicode instead of string.
8+
59
0.3.0 Beta 2
610
============
711
* Added python 2.4 support

lib/git/objects/commit.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,9 +368,14 @@ def _serialize(self, stream):
368368
write("parent %s\n" % p)
369369

370370
a = self.author
371+
aname = a.name
372+
if isinstance(aname, unicode):
373+
aname = aname.encode(self.encoding)
374+
# END handle unicode in name
375+
371376
c = self.committer
372377
fmt = "%s %s <%s> %s %s\n"
373-
write(fmt % ("author", a.name, a.email,
378+
write(fmt % ("author", aname, a.email,
374379
self.authored_date,
375380
altz_to_utctz_str(self.author_tz_offset)))
376381

@@ -425,12 +430,19 @@ def _deserialize(self, stream):
425430
readline()
426431
# END handle encoding
427432

433+
# decode the authors name
434+
try:
435+
self.author.name = self.author.name.decode(self.encoding)
436+
except UnicodeDecodeError:
437+
print >> sys.stderr, "Failed to decode author name: %s" % self.author.name
438+
# END handle author's encoding
439+
428440
# a stream from our data simply gives us the plain message
429441
# The end of our message stream is marked with a newline that we strip
430442
self.message = stream.read()
431443
try:
432444
self.message = self.message.decode(self.encoding)
433-
except Exception:
445+
except UnicodeDecodeError:
434446
print >> sys.stderr, "Failed to decode message: %s" % self.message
435447
# END exception handling
436448
return self

test/git/test_commit.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,32 @@ def test_serialization(self, rwrepo):
242242
# create all commits of our repo
243243
assert_commit_serialization(rwrepo, '0.1.6')
244244

245+
def test_serialization_unicode_support(self):
246+
assert Commit.default_encoding.lower() == 'utf-8'
247+
248+
# create a commit with unicode in the message, and the author's name
249+
# Verify its serialization and deserialization
250+
cmt = self.rorepo.commit('0.1.6')
251+
assert isinstance(cmt.message, unicode) # it automatically decodes it as such
252+
assert isinstance(cmt.author.name, unicode) # same here
253+
254+
cmt.message = "üäêèß".decode("utf-8")
255+
assert len(cmt.message) == 5
256+
257+
cmt.author.name = "äüß".decode("utf-8")
258+
assert len(cmt.author.name) == 3
259+
260+
cstream = StringIO()
261+
cmt._serialize(cstream)
262+
cstream.seek(0)
263+
assert len(cstream.getvalue())
264+
265+
ncmt = Commit(self.rorepo, cmt.binsha)
266+
ncmt._deserialize(cstream)
267+
268+
assert cmt.author.name == ncmt.author.name
269+
assert cmt.message == ncmt.message
270+
# actually, it can't be printed in a shell as repr wants to have ascii only
271+
# it appears
272+
cmt.author.__repr__()
273+

0 commit comments

Comments
 (0)