Thanks to visit codestin.com
Credit goes to github.com

Skip to content

Commit 102029d

Browse files
committed
Issue #2052: Add charset parameter to HtmlDiff.make_file().
1 parent fbd011d commit 102029d

6 files changed

Lines changed: 63 additions & 10 deletions

File tree

Doc/library/difflib.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,8 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
104104

105105
The following methods are public:
106106

107-
.. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
107+
.. method:: make_file(fromlines, tolines, fromdesc='', todesc='', context=False, \
108+
numlines=5, *, charset='utf-8')
108109
109110
Compares *fromlines* and *tolines* (lists of strings) and returns a string which
110111
is a complete HTML file containing a table showing line by line differences with
@@ -123,6 +124,10 @@ diffs. For comparing directories and files, see also, the :mod:`filecmp` module.
123124
the next difference highlight at the top of the browser without any leading
124125
context).
125126

127+
.. versionchanged:: 3.5
128+
*charset* keyword-only argument was added. The default charset of
129+
HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
130+
126131
.. method:: make_table(fromlines, tolines, fromdesc='', todesc='', context=False, numlines=5)
127132

128133
Compares *fromlines* and *tolines* (lists of strings) and returns a string which

Doc/whatsnew/3.5.rst

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -225,6 +225,14 @@ contextlib
225225
don't provide any options to redirect it.
226226
(Contributed by Berker Peksag in :issue:`22389`.)
227227

228+
difflib
229+
-------
230+
231+
* The charset of the HTML document generated by :meth:`difflib.HtmlDiff.make_file`
232+
can now be customized by using *charset* keyword-only parameter. The default
233+
charset of HTML document changed from ``'ISO-8859-1'`` to ``'utf-8'``.
234+
(Contributed by Berker Peksag in :issue:`2052`.)
235+
228236
distutils
229237
---------
230238

Lib/difflib.py

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1598,7 +1598,7 @@ def _line_pair_iterator():
15981598
15991599
<head>
16001600
<meta http-equiv="Content-Type"
1601-
content="text/html; charset=ISO-8859-1" />
1601+
content="text/html; charset=%(charset)s" />
16021602
<title></title>
16031603
<style type="text/css">%(styles)s
16041604
</style>
@@ -1685,8 +1685,8 @@ def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
16851685
self._linejunk = linejunk
16861686
self._charjunk = charjunk
16871687

1688-
def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
1689-
numlines=5):
1688+
def make_file(self, fromlines, tolines, fromdesc='', todesc='',
1689+
context=False, numlines=5, *, charset='utf-8'):
16901690
"""Returns HTML file of side by side comparison with change highlights
16911691
16921692
Arguments:
@@ -1701,13 +1701,16 @@ def make_file(self,fromlines,tolines,fromdesc='',todesc='',context=False,
17011701
When context is False, controls the number of lines to place
17021702
the "next" link anchors before the next change (so click of
17031703
"next" link jumps to just before the change).
1704+
charset -- charset of the HTML document
17041705
"""
17051706

1706-
return self._file_template % dict(
1707-
styles = self._styles,
1708-
legend = self._legend,
1709-
table = self.make_table(fromlines,tolines,fromdesc,todesc,
1710-
context=context,numlines=numlines))
1707+
return (self._file_template % dict(
1708+
styles=self._styles,
1709+
legend=self._legend,
1710+
table=self.make_table(fromlines, tolines, fromdesc, todesc,
1711+
context=context, numlines=numlines),
1712+
charset=charset
1713+
)).encode(charset, 'xmlcharrefreplace').decode(charset)
17111714

17121715
def _tab_newline_replace(self,fromlines,tolines):
17131716
"""Returns from/to line lists with tabs expanded and newlines removed.

Lib/test/test_difflib.py

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,20 @@ def test_added_tab_hint(self):
107107
5. Flat is better than nested.
108108
"""
109109

110+
patch914575_nonascii_from1 = """
111+
1. Beautiful is beTTer than ugly.
112+
2. Explicit is better than ımplıcıt.
113+
3. Simple is better than complex.
114+
4. Complex is better than complicated.
115+
"""
116+
117+
patch914575_nonascii_to1 = """
118+
1. Beautiful is better than ügly.
119+
3. Sımple is better than complex.
120+
4. Complicated is better than cömplex.
121+
5. Flat is better than nested.
122+
"""
123+
110124
patch914575_from2 = """
111125
\t\tLine 1: preceeded by from:[tt] to:[ssss]
112126
\t\tLine 2: preceeded by from:[sstt] to:[sssst]
@@ -223,6 +237,27 @@ def test_recursion_limit(self):
223237
new = [(i%2 and "K:%d" or "V:B:%d") % i for i in range(limit*2)]
224238
difflib.SequenceMatcher(None, old, new).get_opcodes()
225239

240+
def test_make_file_default_charset(self):
241+
html_diff = difflib.HtmlDiff()
242+
output = html_diff.make_file(patch914575_from1.splitlines(),
243+
patch914575_to1.splitlines())
244+
self.assertIn('content="text/html; charset=utf-8"', output)
245+
246+
def test_make_file_iso88591_charset(self):
247+
html_diff = difflib.HtmlDiff()
248+
output = html_diff.make_file(patch914575_from1.splitlines(),
249+
patch914575_to1.splitlines(),
250+
charset='iso-8859-1')
251+
self.assertIn('content="text/html; charset=iso-8859-1"', output)
252+
253+
def test_make_file_usascii_charset_with_nonascii_input(self):
254+
html_diff = difflib.HtmlDiff()
255+
output = html_diff.make_file(patch914575_nonascii_from1.splitlines(),
256+
patch914575_nonascii_to1.splitlines(),
257+
charset='us-ascii')
258+
self.assertIn('content="text/html; charset=us-ascii"', output)
259+
self.assertIn('&#305;mpl&#305;c&#305;t', output)
260+
226261

227262
class TestOutputFormat(unittest.TestCase):
228263
def test_tab_delimiter(self):

Lib/test/test_difflib_expect.html

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
<head>
88
<meta http-equiv="Content-Type"
9-
content="text/html; charset=ISO-8859-1" />
9+
content="text/html; charset=utf-8" />
1010
<title></title>
1111
<style type="text/css">
1212
table.diff {font-family:Courier; border:medium;}

Misc/NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,8 @@ Core and Builtins
1818
Library
1919
-------
2020

21+
- Issue #2052: Add charset parameter to HtmlDiff.make_file().
22+
2123
- Issue #23138: Fixed parsing cookies with absent keys or values in cookiejar.
2224
Patch by Demian Brecht.
2325

0 commit comments

Comments
 (0)