diff --git a/Lib/test/test_tools/msgfmt_data/.gitattributes b/Lib/test/test_tools/msgfmt_data/.gitattributes new file mode 100644 index 00000000000000..a3712e3a1f9dc2 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/.gitattributes @@ -0,0 +1,2 @@ +file1_fr_crlf.po eol=crlf +file2_fr_lf.po eol=lf diff --git a/Lib/test/test_tools/msgfmt_data/file12_fr.json b/Lib/test/test_tools/msgfmt_data/file12_fr.json new file mode 100644 index 00000000000000..7df2f0c4c468d8 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/file12_fr.json @@ -0,0 +1,32 @@ +[ + [ + "", + "Project-Id-Version: PACKAGE VERSION\nReport-Msgid-Bugs-To: \nPOT-Creation-Date: 2018-11-30 23:57+0100\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\nLast-Translator: FULL NAME \nLanguage-Team: French\nLanguage: fr\nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\nPlural-Forms: nplurals=2; plural=(n > 1);\n" + ], + [ + "Bye...", + "Au revoir ..." + ], + [ + "Hello!", + "Bonjour !" + ], + [ + "It's over.", + "C'est termin\u00e9." + ], + [ + [ + "{n} horse", + 0 + ], + "{n} cheval" + ], + [ + [ + "{n} horse", + 1 + ], + "{n} chevaux" + ] +] diff --git a/Lib/test/test_tools/msgfmt_data/file12_fr.mo b/Lib/test/test_tools/msgfmt_data/file12_fr.mo new file mode 100644 index 00000000000000..2a4c558b0ee639 Binary files /dev/null and b/Lib/test/test_tools/msgfmt_data/file12_fr.mo differ diff --git a/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.json b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.json new file mode 100644 index 00000000000000..1cb8fccc66535a --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.json @@ -0,0 +1,24 @@ +[ + [ + "", + "Project-Id-Version: PACKAGE VERSION\nReport-Msgid-Bugs-To: \nPOT-Creation-Date: 2018-11-30 23:46+0100\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\nLast-Translator: FULL NAME \nLanguage-Team: French\nLanguage: fr\nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\nPlural-Forms: nplurals=2; plural=(n > 1);\n" + ], + [ + "Hello!", + "Bonjour !" + ], + [ + [ + "{n} horse", + 0 + ], + "{n} cheval" + ], + [ + [ + "{n} horse", + 1 + ], + "{n} chevaux" + ] +] diff --git a/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.mo b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.mo new file mode 100644 index 00000000000000..8dd521283f1fa7 Binary files /dev/null and b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.mo differ diff --git a/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.po b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.po new file mode 100644 index 00000000000000..148ace19db4bb2 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/file1_fr_crlf.po @@ -0,0 +1,26 @@ +# Example of French translations, crlf end of lines +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2018-11-30 23:46+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: French\n" +"Language: fr\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +#: file1.py:6 +msgid "Hello!" +msgstr "Bonjour !" + +#: file1.py:7 +#, python-brace-format +msgid "{n} horse" +msgid_plural "{n} horses" +msgstr[0] "{n} cheval" +msgstr[1] "{n} chevaux" diff --git a/Lib/test/test_tools/msgfmt_data/file2_fr_lf.json b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.json new file mode 100644 index 00000000000000..e306d3f968825f --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.json @@ -0,0 +1,14 @@ +[ + [ + "", + "Project-Id-Version: PACKAGE VERSION\nReport-Msgid-Bugs-To: \nPOT-Creation-Date: 2018-11-30 23:57+0100\nPO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\nLast-Translator: FULL NAME \nLanguage-Team: French\nLanguage: fr\nMIME-Version: 1.0\nContent-Type: text/plain; charset=UTF-8\nContent-Transfer-Encoding: 8bit\nPlural-Forms: nplurals=2; plural=(n > 1);\n" + ], + [ + "Bye...", + "Au revoir ..." + ], + [ + "It's over.", + "C'est termin\u00e9." + ] +] diff --git a/Lib/test/test_tools/msgfmt_data/file2_fr_lf.mo b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.mo new file mode 100644 index 00000000000000..f85465b0d045e2 Binary files /dev/null and b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.mo differ diff --git a/Lib/test/test_tools/msgfmt_data/file2_fr_lf.po b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.po new file mode 100644 index 00000000000000..af8db8b3335f50 --- /dev/null +++ b/Lib/test/test_tools/msgfmt_data/file2_fr_lf.po @@ -0,0 +1,23 @@ +# Example of French translations, lf end of lines +# +msgid "" +msgstr "" +"Project-Id-Version: PACKAGE VERSION\n" +"Report-Msgid-Bugs-To: \n" +"POT-Creation-Date: 2018-11-30 23:57+0100\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: French\n" +"Language: fr\n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=UTF-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Plural-Forms: nplurals=2; plural=(n > 1);\n" + +#: file2.py:6 +msgid "It's over." +msgstr "C'est terminé." + +#: file2.py:7 +msgid "Bye..." +msgstr "Au revoir ..." diff --git a/Lib/test/test_tools/test_msgfmt.py b/Lib/test/test_tools/test_msgfmt.py index 7be606bbff606a..407779ca110b78 100644 --- a/Lib/test/test_tools/test_msgfmt.py +++ b/Lib/test/test_tools/test_msgfmt.py @@ -1,6 +1,17 @@ -"""Tests for the Tools/i18n/msgfmt.py tool.""" +"""Tests for the Tools/i18n/msgfmt.py tool. +These tests use data files (po and mo) in the msgfmt_data folder. +The mo files can be generated (if the po file changes, or if msgfmt.py +slightly changes its output format) by using the --snapshot-update flag +with this script: + + python test_msgfmt.py --snapshot-update +""" + +import filecmp import json +import os.path +import shutil import struct import sys import unittest @@ -11,7 +22,6 @@ from test.support.script_helper import assert_python_failure, assert_python_ok from test.test_tools import imports_under_tool, skip_if_missing, toolsdir - skip_if_missing('i18n') data_dir = (Path(__file__).parent / 'msgfmt_data').resolve() @@ -22,6 +32,10 @@ import msgfmt +def compile_many_messages(mo_file, *po_files): + assert_python_ok(msgfmt_py, '-o', mo_file, *po_files) + + def compile_messages(po_file, mo_file): assert_python_ok(msgfmt_py, '-o', mo_file, po_file) @@ -145,12 +159,6 @@ def test_generic_syntax_error(self): class POParserTest(unittest.TestCase): - @classmethod - def tearDownClass(cls): - # msgfmt uses a global variable to store messages, - # clear it after the tests. - msgfmt.MESSAGES.clear() - def test_strings(self): # Test that the PO parser correctly handles and unescape # strings in the PO file. @@ -202,9 +210,7 @@ def test_strings(self): # check the result. po = f'msgid {po_string}\nmsgstr "translation"' Path('messages.po').write_text(po) - # Reset the global MESSAGES dictionary - msgfmt.MESSAGES.clear() - msgfmt.make('messages.po', 'messages.mo') + msgfmt.make(('messages.po',), 'messages.mo') with open('messages.mo', 'rb') as f: actual = GNUTranslations(f) @@ -238,7 +244,7 @@ def test_strings(self): # Reset the global MESSAGES dictionary msgfmt.MESSAGES.clear() with self.assertRaises(Exception): - msgfmt.make('messages.po', 'messages.mo') + msgfmt.make(('messages.po',), 'messages.mo') class CLITest(unittest.TestCase): @@ -271,20 +277,83 @@ def test_nonexistent_file(self): assert_python_failure(msgfmt_py, 'nonexistent.po') +class MultiInputTest(unittest.TestCase): + """Tests for multiple input files + """ + + def test_both_with_outputfile(self): + """Test script with -o option and 2 input files + + The current behaviour is to merge entries having distinct ids + and keep last one if the same id occurs in multiple files. + + Here the first file has Windows endings (cflr) while second has + Unix endings (lf) + """ + with temp_cwd(None): + assert_python_ok(msgfmt_py, '-o', 'file12.mo', + data_dir / 'file1_fr_crlf.po', + data_dir / 'file2_fr_lf.po') + self.assertTrue(filecmp.cmp(data_dir / 'file12_fr.mo', + 'file12.mo')) + + def test_both_without_outputfile(self): + """Test script without -o option and 2 input files""" + + with temp_cwd(None): + shutil.copy(data_dir / 'file1_fr_crlf.po', '.') + shutil.copy(data_dir / 'file2_fr_lf.po', '.') + assert_python_ok(msgfmt_py, 'file1_fr_crlf.po', 'file2_fr_lf.po') + self.assertTrue(filecmp.cmp(data_dir / 'file1_fr_crlf.mo', + 'file1_fr_crlf.mo')) + self.assertTrue(filecmp.cmp(data_dir / 'file2_fr_lf.mo', + 'file2_fr_lf.mo')) + + +class PONamesTest(unittest.TestCase): + def test_no_extension(self): + with temp_cwd(None): + shutil.copy(data_dir / 'file1_fr_crlf.po', 'file1.fr.po') + assert_python_ok(msgfmt_py, 'file1.fr') + self.assertTrue(os.path.exists('file1.fr.mo')) + + def test_wrong_extension(self): + with temp_cwd(None): + shutil.copy(data_dir / 'file1_fr_crlf.po', 'file1_fr.pox') + assert_python_failure(msgfmt_py, 'file1_fr.pox') + self.assertFalse(os.path.exists('file1_fr.mo')) + self.assertFalse(os.path.exists('file1_fr.pox.mo')) + + @unittest.skipUnless(sys.platform.startswith("win"), "uppercase on Windows") + def test_MAJ_on_Windows(self): + with temp_cwd(None): + shutil.copy(data_dir / 'file1_fr_crlf.po', 'File1.PO') + assert_python_ok(msgfmt_py, 'FIle1.Po') + self.assertTrue(os.path.exists('file1.mo')) + + +def make_message_files(mo_file, *po_files): + compile_many_messages(mo_file, *po_files) + # Create a human-readable JSON file which is + # easier to review than the binary .mo file. + with open(mo_file, 'rb') as f: + translations = GNUTranslations(f) + catalog_file = mo_file.with_suffix('.json') + with open(catalog_file, 'w') as f: + data = translations._catalog.items() + data = sorted(data, key=lambda x: (isinstance(x[0], tuple), x[0])) + json.dump(data, f, indent=4) + f.write('\n') + + def update_catalog_snapshots(): for po_file in data_dir.glob('*.po'): mo_file = po_file.with_suffix('.mo') - compile_messages(po_file, mo_file) - # Create a human-readable JSON file which is - # easier to review than the binary .mo file. - with open(mo_file, 'rb') as f: - translations = GNUTranslations(f) - catalog_file = po_file.with_suffix('.json') - with open(catalog_file, 'w') as f: - data = translations._catalog.items() - data = sorted(data, key=lambda x: (isinstance(x[0], tuple), x[0])) - json.dump(data, f, indent=4) - f.write('\n') + make_message_files(mo_file, po_file) + # special processing for file12_fr.mo which results from 2 input files + make_message_files(data_dir / 'file12_fr.mo', + data_dir / 'file1_fr_crlf.po', + data_dir / 'file2_fr_lf.po') if __name__ == '__main__': diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-12-05-20-46-10.bpo-35335.qtIUBx.rst b/Misc/NEWS.d/next/Tools-Demos/2018-12-05-20-46-10.bpo-35335.qtIUBx.rst new file mode 100644 index 00000000000000..8d7cabef9064c3 --- /dev/null +++ b/Misc/NEWS.d/next/Tools-Demos/2018-12-05-20-46-10.bpo-35335.qtIUBx.rst @@ -0,0 +1,2 @@ +:program:`msgfmt.py` is now able to merge more than one po file into a compiled mo +file. When an entry exists in more than on input file, the last file wins. diff --git a/Tools/i18n/msgfmt.py b/Tools/i18n/msgfmt.py index cd5f1ed9f3e268..475caccdf598d6 100755 --- a/Tools/i18n/msgfmt.py +++ b/Tools/i18n/msgfmt.py @@ -6,15 +6,21 @@ This program converts a textual Uniforum-style message catalog (.po file) into a binary GNU catalog (.mo file). This is essentially the same function as the GNU msgfmt program, however, it is a simpler implementation. Currently it -does not handle plural forms but it does handle message contexts. +handles plural forms and message contexts, but does not generate a hash table. -Usage: msgfmt.py [OPTIONS] filename.po +Usage: msgfmt.py [OPTIONS] filename.po ... Options: -o file --output-file=file Specify the output file to write to. If omitted, output will go to a - file named filename.mo (based off the input file name). + file named filename.mo (based off the input file name(s)). + If more than one input file is given, and if an output file is passed + with -o option, then all the input files are merged. If keys are + repeated (common for "" key for the header) the one from the last file is used. + If more than one input file is given, and no -o option is present, then + every input file is compiled to its corresponding mo file (same name + with mo replacing po) -h --help @@ -47,29 +53,27 @@ def usage(code, msg=''): sys.exit(code) -def add(ctxt, id, str, fuzzy): +def add(ctxt, id, str, fuzzy, messages): "Add a non-fuzzy translation to the dictionary." - global MESSAGES if not fuzzy and str: if ctxt is None: - MESSAGES[id] = str + messages[id] = str else: - MESSAGES[b"%b\x04%b" % (ctxt, id)] = str + messages[b"%b\x04%b" % (ctxt, id)] = str -def generate(): +def generate(messages): "Return the generated output." - global MESSAGES # the keys are sorted in the .mo file - keys = sorted(MESSAGES.keys()) + keys = sorted(messages.keys()) offsets = [] ids = strs = b'' for id in keys: # For each string, we need size and file offset. Each string is NUL # terminated; the NUL does not count into the size. - offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) + offsets.append((len(ids), len(id), len(strs), len(messages[id]))) ids += id + b'\0' - strs += MESSAGES[id] + b'\0' + strs += messages[id] + b'\0' output = '' # The header is 7 32-bit unsigned integers. We don't use hash tables, so # the keys start right after the index tables. @@ -98,18 +102,48 @@ def generate(): return output -def make(filename, outfile): +def make(filenames, outfile): + """ Compiles one or more po files(s). + + filenames is an iterable of strings representing the input file(s). + outfile is a string for the name of an output file or None. + + If it is not None, the output file receives a merge of the input files. + If it is None, then for each file from filenames the name of the output + file is obtained by replacing the po extension with mo. + """ + if outfile is None: + # each PO file generates its corresponding MO file + for filename in filenames: + messages = {} + infile = process(filename, messages) + outfile = os.path.splitext(infile)[0] + '.mo' + output = generate(messages) + writefile(outfile, output) + else: + # all PO files are combined into one single output file + messages = {} + for filename in filenames: + process(filename, messages) + output = generate(messages) + writefile(outfile, output) + + +def process(infile, messages): + """Extracts the translations from a PO file into a dict + + Params: + infile: the path to a PO file - the .po extension is inferred if absent + messages: a dict that will be fed with the translations + + Returns: the actual input file path with a .po extension + """ ID = 1 STR = 2 CTXT = 3 - # Compute .mo name from .po name and arguments - if filename.endswith('.po'): - infile = filename - else: - infile = filename + '.po' - if outfile is None: - outfile = os.path.splitext(infile)[0] + '.mo' + if not os.path.normcase(infile).endswith('.po'): + infile += '.po' try: with open(infile, 'rb') as f: @@ -140,7 +174,7 @@ def make(filename, outfile): lno += 1 # If we get a comment line after a msgstr, this is a new entry if l[0] == '#' and section == STR: - add(msgctxt, msgid, msgstr, fuzzy) + add(msgctxt, msgid, msgstr, fuzzy, messages) section = msgctxt = None fuzzy = 0 # Record a fuzzy mark @@ -152,7 +186,7 @@ def make(filename, outfile): # Now we are in a msgid or msgctxt section, output previous section if l.startswith('msgctxt'): if section == STR: - add(msgctxt, msgid, msgstr, fuzzy) + add(msgctxt, msgid, msgstr, fuzzy, messages) section = CTXT l = l[7:] msgctxt = b'' @@ -169,7 +203,7 @@ def make(filename, outfile): charset = p.parsestr(msgstr.decode(encoding)).get_content_charset() if charset: encoding = charset - add(msgctxt, msgid, msgstr, fuzzy) + add(msgctxt, msgid, msgstr, fuzzy, messages) msgctxt = None section = ID l = l[5:] @@ -219,11 +253,10 @@ def make(filename, outfile): sys.exit(1) # Add last entry if section == STR: - add(msgctxt, msgid, msgstr, fuzzy) - - # Compute output - output = generate() + add(msgctxt, msgid, msgstr, fuzzy, messages) + return infile +def writefile(outfile, output): try: with open(outfile,"wb") as f: f.write(output) @@ -253,9 +286,7 @@ def main(): print('No input file given', file=sys.stderr) print("Try `msgfmt --help' for more information.", file=sys.stderr) return - - for filename in args: - make(filename, outfile) + make(args, outfile) if __name__ == '__main__':