From d4eac5339362d66edce895dc8f5230fd3d412e0d Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:09:33 +0300 Subject: [PATCH 01/25] Add definitions for MessageConcatenation and MessageMerge classes in pybabel * Define the MessageConcatenation class to mimic the functionality of GNU gettext's msgcat * Define the MessageMerge class to mimic the functionality of GNU gettext's msgmerge * Implement placeholders for the main interface functions --- babel/messages/frontend.py | 198 +++++++++++++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index f63dd9ded..a92697673 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -887,6 +887,200 @@ def run(self): return +class MessageConcatenation(CommandMixin): + description = 'concatenates and merges the specified PO files' + user_options = [ + ('input-files', None, ''), + ('files-from=', 'f', ''), + ('directory=', 'D', ''), + ('output-file=', 'o', ''), + ('less-than=', '<', ''), + ('more-than=', '>', ''), + ('unique', 'u', ''), + ('properties-input', 'P', ''), + ('stringtable-input', None, ''), + ('to-code=','t', ''), + ('use-first', None, ''), + ('lang=', None, ''), + ('color=', None, ''), + ('style=', None, ''), + ('no-escape', 'e', ''), + ('escape', 'E', ''), + ('force-po', None, ''), + ('indent', 'i', ''), + ('no-location', None, ''), + ('add-location', 'n', ''), + ('strict', None, ''), + ('properties-output', None, ''), + ('stringtable-output', None, ''), + ('width=', 'w', ''), + ('no-wrap', None, ''), + ('sort-output', 's', ''), + ('sort-by-file', 'F', ''), + ] + + as_args='input-files' + + boolean_options = [ + 'unique', + 'properties-input', + 'stringtable-input', + 'use-first', + 'no-escape', + 'escape', + 'force-po', + 'indent', + 'no-location', + 'add-location', + 'strict', + 'properties-output', + 'stringtable-output', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + option_choices = { + 'color': ('always', 'never', 'auto', 'html'), + } + + def initialize_options(self): + self.input_files = None + self.files_from = None + self.directory = None + self.output_file = None + self.less_than = None + self.more_than = None + self.unique = None + self.properties_input = None + self.stringtable_input = None + self.to_code = None + self.use_first = None + self.lang = None + self.color = None + self.color = None + self.style = None + self.no_escape = None + self.escape = None + self.force_po = None + self.indent = None + self.no_location = None + self.add_location = None + self.strict = None + self.properties_output = None + self.stringtable_output = None + self.width = None + self.no_wrap = None + self.sort_output = None + self.sort_by_file = None + + def finalize_options(self): + pass + + def run(self): + pass + + +class MessageMerge(CommandMixin): + description='combines two Uniforum-style PO files into one' + user_options=[ + ('input-files', None, ''), + ('directory=', 'D', ''), + ('compendium=', 'C', ''), + ('update', 'U', ''), + ('output-file=', 'o', ''), + ('backup=', None, ''), + ('suffix=', None, ''), + ('multi-domain', 'm', ''), + ('for-msgfmt', None, ''), + ('no-fuzzy-matching', 'N', ''), + ('previous', None, ''), + ('properties-input', 'P', ''), + ('stringtable-input', None, ''), + ('lang=', None, ''), + ('color=', None, ''), + ('style=', None, ''), + ('no-escape', 'e', ''), + ('escape', 'E', ''), + ('force-po', None, ''), + ('indent', 'i', ''), + ('no-location', None, ''), + ('add-location', 'n', ''), + ('strict', None, ''), + ('properties-output', None, ''), + ('stringtable-output', None, ''), + ('width=', 'w', ''), + ('no-wrap', None, ''), + ('sort-output', 's', ''), + ('sort-by-file', 'F', ''), + ] + + as_args='input-files' + + boolean_options = [ + 'update', + 'multi-domain', + 'for-msgfmt', + 'no-fuzzy-matching', + 'previous' + 'properties-input', + 'stringtable-input', + 'no-escape', + 'escape', + 'force-po', + 'indent', + 'no-location', + 'add-location', + 'strict', + 'properties-output', + 'stringtable-output', + 'no-wrap', + 'sort-output', + 'sort-by-file', + ] + + option_choices = { + 'color': ('always', 'never', 'auto', 'html'), + } + + def initialize_options(self): + self.input_files = None + self.directory = None + self.compendium = None + self.update = None + self.output_file = None + self.backup = None + self.suffix = None + self.multi_domain = None + self.for_msgfmt = None + self.no_fuzzy_matching = None + self.previous = None + self.properties_input = None + self.stringtable_input = None + self.lang = None + self.color = None + self.style = None + self.no_escape = None + self.escape = None + self.force_po = None + self.indent = None + self.no_location = None + self.add_location = None + self.strict = None + self.properties_output = None + self.stringtable_output = None + self.width = None + self.no_wrap = None + self.sort_output = None + self.sort_by_file = None + + def finalize_options(self): + pass + + def run(self): + pass + + class CommandLineInterface: """Command-line interface. @@ -901,6 +1095,8 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', + 'msgcat': 'concatenates and merges the specified PO files', + 'msgmerge': 'combines two Uniforum-style PO files into one', } command_classes = { @@ -908,6 +1104,8 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, + 'msgcat': MessageConcatenation, + 'msgmerge': MessageMerge, } log = None # Replaced on instance level From a5f6295ff9ce3319e0a71bce695273a5a647e5e7 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:18:25 +0300 Subject: [PATCH 02/25] Implement basic logic for concatenating catalogs * Add validation for main msgcat options - input_files, output_file * Temporarily set use_first option to true to avoid handling cases with different translations for the same messages --- babel/messages/frontend.py | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index a92697673..ff6bf3b43 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -975,10 +975,37 @@ def initialize_options(self): self.sort_by_file = None def finalize_options(self): - pass + if not self.input_files: + raise OptionError('you must specify the input files') + if not self.output_file: + raise OptionError('you must specify the output file') + + # временно всегда используется первый перевод + if self.use_first is None: + self.use_first = True def run(self): - pass + catalog = Catalog(fuzzy=False) + + for filenum, filename in enumerate(self.input_files): + with open(filename, 'r') as pofile: + template = read_po(pofile) + + if filenum == 0: + catalog.update(template) + continue + + for message in template: + if not message.id: + continue + + if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: + raise NotImplementedError() + + catalog[message.id] = message + + with open(self.output_file, 'wb') as outfile: + write_po(outfile, catalog) class MessageMerge(CommandMixin): From 6d3212b0203e419ec91aeb2ce83f30b2fe5ae150 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:32:07 +0300 Subject: [PATCH 03/25] Add options: unique, less-than, more-than, no-wrap, and width * Implement options unique, less-than, and more-than, and validate their dependencies with each other. * These options specify which messages to include in the output file. * Implement and validate options no-wrap and width. * Create a helper function _prepare that collects data on message occurrences across different catalogs. * Mark options that are already implemented # --- babel/messages/frontend.py | 64 ++++++++++++++++++++++++++++---------- 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index ff6bf3b43..8483bfd95 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -21,6 +21,7 @@ import sys import tempfile import warnings +from collections import OrderedDict, defaultdict from configparser import RawConfigParser from io import StringIO from typing import Any, BinaryIO, Iterable, Literal @@ -945,20 +946,20 @@ class MessageConcatenation(CommandMixin): } def initialize_options(self): - self.input_files = None + self.input_files = None # self.files_from = None self.directory = None - self.output_file = None - self.less_than = None - self.more_than = None - self.unique = None + self.output_file = None # + self.less_than = None # + self.more_than = 0 # + self.unique = False # self.properties_input = None self.stringtable_input = None self.to_code = None - self.use_first = None + # временно всегда используется первый перевод + self.use_first = True #~ self.lang = None self.color = None - self.color = None self.style = None self.no_escape = None self.escape = None @@ -969,8 +970,8 @@ def initialize_options(self): self.strict = None self.properties_output = None self.stringtable_output = None - self.width = None - self.no_wrap = None + self.width = None # + self.no_wrap = None # self.sort_output = None self.sort_by_file = None @@ -980,21 +981,44 @@ def finalize_options(self): if not self.output_file: raise OptionError('you must specify the output file') - # временно всегда используется первый перевод + if self.unique is None: + self.unique = False if self.use_first is None: self.use_first = True + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.more_than is None: + self.more_than = 0 + else: + self.more_than = int(self.more_than) + if self.less_than is not None: + self.less_than = int(self.less_than) + if self.unique: + self.less_than = 2 + + def _prepare(self): + self.message_count = defaultdict(int) + + for filename in self.input_files: + with open(filename, 'r') as pofile: + template = read_po(pofile) + for message in template: + self.message_count[message.id] += 1 + def run(self): catalog = Catalog(fuzzy=False) + self._prepare() - for filenum, filename in enumerate(self.input_files): + for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) - if filenum == 0: - catalog.update(template) - continue - for message in template: if not message.id: continue @@ -1002,10 +1026,16 @@ def run(self): if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: raise NotImplementedError() - catalog[message.id] = message + message_count = self.message_count[message.id] + if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): + catalog[message.id] = message with open(self.output_file, 'wb') as outfile: - write_po(outfile, catalog) + write_po( + outfile, + catalog, + width=self.width + ) class MessageMerge(CommandMixin): From 869576946d4f19dea7efea50065c613b8350beaa Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:32:34 +0300 Subject: [PATCH 04/25] Implement basic msgmerge logic for working with a compendium * Implement basic functionality of msgmerge * Use and validate the main options: input-files and output-file * Use and validate options: no-wrap and width * Use and validate options: sort-output and sort-by-file, both in msgmerge and msgcat * In the basic version of working with a compendium, a translation for a message is taken from the compendium only if the resulting catalog lacks a translation. --- babel/messages/frontend.py | 79 ++++++++++++++++++++++++++++++++------ 1 file changed, 67 insertions(+), 12 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 8483bfd95..4d60f5e27 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -972,8 +972,8 @@ def initialize_options(self): self.stringtable_output = None self.width = None # self.no_wrap = None # - self.sort_output = None - self.sort_by_file = None + self.sort_output = False # + self.sort_by_file = False # def finalize_options(self): if not self.input_files: @@ -1002,6 +1002,11 @@ def finalize_options(self): if self.unique: self.less_than = 2 + if self.sort_output is None: + self.sort_output = False + if self.sort_by_file is None: + self.sort_by_file = True + def _prepare(self): self.message_count = defaultdict(int) @@ -1034,7 +1039,9 @@ def run(self): write_po( outfile, catalog, - width=self.width + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, ) @@ -1101,11 +1108,11 @@ class MessageMerge(CommandMixin): } def initialize_options(self): - self.input_files = None + self.input_files = None # self.directory = None - self.compendium = None + self.compendium = None #~ self.update = None - self.output_file = None + self.output_file = None # self.backup = None self.suffix = None self.multi_domain = None @@ -1126,16 +1133,64 @@ def initialize_options(self): self.strict = None self.properties_output = None self.stringtable_output = None - self.width = None - self.no_wrap = None - self.sort_output = None - self.sort_by_file = None + self.width = None # + self.no_wrap = None # + self.sort_output = False # + self.sort_by_file = False # def finalize_options(self): - pass + if len(self.input_files) != 2: + raise OptionError('must be two po files') + if not self.output_file: + raise OptionError('you must specify the output file') + + if self.no_wrap and self.width: + raise OptionError("'--no-wrap' and '--width' are mutually exclusive") + if not self.no_wrap and not self.width: + self.width = 76 + elif self.width is not None: + self.width = int(self.width) + + if self.sort_output is None: + self.sort_output = False + if self.sort_by_file is None: + self.sort_by_file = True def run(self): - pass + def_file, ref_file = self.input_files + with open(def_file, 'r') as pofile: + def_catalog = read_po(pofile) + + with open(ref_file, 'r') as pofile: + ref_catalog = read_po(pofile) + + ref_catalog.mime_headers = def_catalog.mime_headers + ref_catalog.header_comment = def_catalog.header_comment + + for message in def_catalog: + if not message.id: + continue + if message.id in ref_catalog: + ref_catalog[message.id].string = message.string + else: + ref_catalog.obsolete[message.id] = message + + if self.compendium: + with open(self.compendium, 'r') as pofile: + compendium_catalog = read_po(pofile) + for message in compendium_catalog: + if message.id in ref_catalog and not ref_catalog[message.id].string: + ref_catalog[message.id].string = message.string + + ref_catalog.fuzzy = False + with open(self.output_file, 'wb') as outfile: + write_po( + outfile, + ref_catalog, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + ) class CommandLineInterface: From 47748893167501c3863f6736fdb61e2750c3ac06 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:33:10 +0300 Subject: [PATCH 05/25] Write tests for msgcat * Create basic tests to verify the functionality of msgcat, specifically the concatenation of catalogs, merging of message flags, locations, etc. * Remove the validation of options sort-output, sort-by-file, unique, use-first, as they are initialized in the function initialize_options. --- babel/messages/frontend.py | 13 +- tests/messages/frontend/test_concat_merge.py | 239 +++++++++++++++++++ 2 files changed, 240 insertions(+), 12 deletions(-) create mode 100644 tests/messages/frontend/test_concat_merge.py diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 4d60f5e27..c3ee491e0 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -926,7 +926,6 @@ class MessageConcatenation(CommandMixin): 'unique', 'properties-input', 'stringtable-input', - 'use-first', 'no-escape', 'escape', 'force-po', @@ -981,11 +980,6 @@ def finalize_options(self): if not self.output_file: raise OptionError('you must specify the output file') - if self.unique is None: - self.unique = False - if self.use_first is None: - self.use_first = True - if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") if not self.no_wrap and not self.width: @@ -1002,11 +996,6 @@ def finalize_options(self): if self.unique: self.less_than = 2 - if self.sort_output is None: - self.sort_output = False - if self.sort_by_file is None: - self.sort_by_file = True - def _prepare(self): self.message_count = defaultdict(int) @@ -1017,7 +1006,7 @@ def _prepare(self): self.message_count[message.id] += 1 def run(self): - catalog = Catalog(fuzzy=False) + catalog = Catalog() self._prepare() for filename in self.input_files: diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py new file mode 100644 index 000000000..e8f8355ae --- /dev/null +++ b/tests/messages/frontend/test_concat_merge.py @@ -0,0 +1,239 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import os +import unittest +from datetime import datetime + +import pytest +from freezegun import freeze_time + +from babel import __version__ as VERSION +from babel.dates import format_datetime +from babel.messages import Catalog, frontend, pofile +from babel.messages.frontend import OptionError +from babel.util import LOCALTZ +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA, data_dir, i18n_dir +from tests.messages.utils import Distribution + + +class ConcatanationMessagesTestCase(unittest.TestCase): + + def setUp(self): + self.olddir = os.getcwd() + os.chdir(data_dir) + + self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + self.cmd = frontend.MessageConcatenation(self.dist) + self.cmd.initialize_options() + + self.temp1 = f'{i18n_dir}/msgcat_temp1.po' + self.temp2 = f'{i18n_dir}/msgcat_temp2.po' + self.output_file = f'{i18n_dir}/msgcat.po' + + with open(self.temp1, 'wb') as file: + catalog = Catalog() + catalog.add('other1', string='Other 1', locations=[('simple.py', 1)], flags=['flag1000']) + catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) + catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) + catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + pofile.write_po(file, catalog) + + with open(self.temp2, 'wb') as file: + catalog = Catalog() + catalog.add('other3', string='Other 3', locations=[('hard.py', 1)]) + catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) + catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) + catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + pofile.write_po(file, catalog) + + def tearDown(self): + for file in [self.temp1, self.temp2, self.output_file]: + if os.path.isfile(file): + os.unlink(file) + + def test_no_input_files(self): + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def test_no_output_file(self): + self.cmd.input_files = ['project/i18n/messages.pot'] + with pytest.raises(OptionError): + self.cmd.finalize_options() + + @freeze_time("1994-11-11") + def test_default(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + +#: hard.py:1000 simple.py:1000 +#, flag2, flag3 +msgid "almost_same" +msgstr "Almost same" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_unique(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.unique = True + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + self.cmd.less_than = 2 + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_more_than(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.more_than = 1 + + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + +#: hard.py:1000 simple.py:1000 +#, flag2, flag3 +msgid "almost_same" +msgstr "Almost same" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content From 5a445c4823f0f61d747b0d75ee0f16110690a58f Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:33:34 +0300 Subject: [PATCH 06/25] Write tests for msgmerge * Create basic tests to verify the functionality of msgmerge, specifically the merging of messages and their integration with a compendium. * Remove the definition of sort-output and sort-by-file, and add an additional check for input-files. --- babel/messages/frontend.py | 8 +- tests/messages/frontend/test_concat_merge.py | 148 +++++++++++++++++++ 2 files changed, 150 insertions(+), 6 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index c3ee491e0..5ef12334b 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1128,7 +1128,7 @@ def initialize_options(self): self.sort_by_file = False # def finalize_options(self): - if len(self.input_files) != 2: + if not self.input_files or len(self.input_files) != 2: raise OptionError('must be two po files') if not self.output_file: raise OptionError('you must specify the output file') @@ -1140,11 +1140,6 @@ def finalize_options(self): elif self.width is not None: self.width = int(self.width) - if self.sort_output is None: - self.sort_output = False - if self.sort_by_file is None: - self.sort_by_file = True - def run(self): def_file, ref_file = self.input_files with open(def_file, 'r') as pofile: @@ -1167,6 +1162,7 @@ def run(self): if self.compendium: with open(self.compendium, 'r') as pofile: compendium_catalog = read_po(pofile) + for message in compendium_catalog: if message.id in ref_catalog and not ref_catalog[message.id].string: ref_catalog[message.id].string = message.string diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py index e8f8355ae..c163ddb62 100644 --- a/tests/messages/frontend/test_concat_merge.py +++ b/tests/messages/frontend/test_concat_merge.py @@ -237,3 +237,151 @@ def test_more_than(self): with open(self.output_file, 'r') as f: actual_content = f.read() assert expected_content == actual_content + + +class MergeMessagesTestCase(unittest.TestCase): + + @freeze_time("1994-11-11") + def setUp(self): + self.olddir = os.getcwd() + os.chdir(data_dir) + + self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + self.cmd = frontend.MessageMerge(self.dist) + self.cmd.initialize_options() + + self.temp_def = f'{i18n_dir}/msgmerge_def.po' + self.temp_ref = f'{i18n_dir}/msgmerge_ref.pot' + self.compendium = f'{i18n_dir}/compenidum.po' + self.output_file = f'{i18n_dir}/msgmerge.po' + + with open(self.temp_ref, 'wb') as file: + catalog = Catalog() + for word in ['word1', 'word2', 'word3', 'word4']: + catalog.add(word) + pofile.write_po(file, catalog) + + with open(self.temp_def, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Word 1') + catalog.add('word2', string='Word 2') + catalog.add('word3') + pofile.write_po(file, catalog) + + with open(self.compendium, 'wb') as file: + catalog = Catalog() + catalog.add('word4', string='Word 4') + catalog.add('word5', string='Word 5') + pofile.write_po(file, catalog) + + def tearDown(self): + for file in [self.temp_def, self.temp_ref, self.compendium, self.output_file]: + if os.path.isfile(file): + os.unlink(file) + + def test_no_input_files(self): + with pytest.raises(OptionError): + self.cmd.finalize_options() + + with pytest.raises(OptionError): + self.cmd.input_files = ['1'] + self.cmd.finalize_options() + + with pytest.raises(OptionError): + self.cmd.input_files = ['1', '2', '3'] + self.cmd.finalize_options() + + def test_no_output_file(self): + self.cmd.input_files = ['1', '2'] + with pytest.raises(OptionError): + self.cmd.finalize_options() + + @freeze_time("1994-11-11") + def test_default(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +#, fuzzy +msgid "word4" +msgstr "Word 2" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_compenidum(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = self.compendium + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "Word 4" + +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content From f7ddd856a8d118f7ca0e42d5b68603b1f1fa9b66 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:05 +0300 Subject: [PATCH 07/25] Add options update, backup, and c_overwrite for a different compendium handling logic * Implement `update` to update the source file instead of writing to the current output file * Implement `backup` to save a backup of the source file before making any updates * Implement `c_overwrite` to use a new mode of handling the compendium, where translations from the compendium overwrite messages in the output file --- babel/messages/frontend.py | 56 +++++++++++++++++++++----------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 5ef12334b..e7385fa7e 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1040,9 +1040,10 @@ class MessageMerge(CommandMixin): ('input-files', None, ''), ('directory=', 'D', ''), ('compendium=', 'C', ''), + ('c-overwrite', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), - ('backup=', None, ''), + ('backup', None, ''), ('suffix=', None, ''), ('multi-domain', 'm', ''), ('for-msgfmt', None, ''), @@ -1090,6 +1091,8 @@ class MessageMerge(CommandMixin): 'no-wrap', 'sort-output', 'sort-by-file', + 'c-overwrite', + 'backup', ] option_choices = { @@ -1100,13 +1103,14 @@ def initialize_options(self): self.input_files = None # self.directory = None self.compendium = None #~ - self.update = None + self.c_overwrite = False # + self.update = None # self.output_file = None # - self.backup = None - self.suffix = None + self.backup = False # + self.suffix = '~' # self.multi_domain = None self.for_msgfmt = None - self.no_fuzzy_matching = None + self.no_fuzzy_matching = None # self.previous = None self.properties_input = None self.stringtable_input = None @@ -1130,8 +1134,8 @@ def initialize_options(self): def finalize_options(self): if not self.input_files or len(self.input_files) != 2: raise OptionError('must be two po files') - if not self.output_file: - raise OptionError('you must specify the output file') + if not self.output_file and not self.update: + raise OptionError('you must specify the output file or update existing') if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") @@ -1142,36 +1146,38 @@ def finalize_options(self): def run(self): def_file, ref_file = self.input_files - with open(def_file, 'r') as pofile: - def_catalog = read_po(pofile) + if self.update and self.backup: + shutil.copy(def_file, def_file + self.suffix) + + with open(def_file, 'r') as pofile: + catalog = read_po(pofile) with open(ref_file, 'r') as pofile: ref_catalog = read_po(pofile) - - ref_catalog.mime_headers = def_catalog.mime_headers - ref_catalog.header_comment = def_catalog.header_comment - - for message in def_catalog: - if not message.id: - continue - if message.id in ref_catalog: - ref_catalog[message.id].string = message.string - else: - ref_catalog.obsolete[message.id] = message + catalog.update( + ref_catalog, + no_fuzzy_matching=self.no_fuzzy_matching + ) if self.compendium: with open(self.compendium, 'r') as pofile: compendium_catalog = read_po(pofile) for message in compendium_catalog: - if message.id in ref_catalog and not ref_catalog[message.id].string: - ref_catalog[message.id].string = message.string + current = catalog[message.id] + if message.id in catalog and (not current.string or current.fuzzy or self.c_overwrite): + if self.c_overwrite and not current.fuzzy and current.string: + catalog.obsolete[message.id] = current.clone() - ref_catalog.fuzzy = False - with open(self.output_file, 'wb') as outfile: + current.string = message.string + current.flags = [flag for flag in current.flags if flag != 'fuzzy'] + current.auto_comments.append(self.compendium) + + output_path = def_file if self.update else self.output_file + with open(output_path, 'wb') as outfile: write_po( outfile, - ref_catalog, + catalog, width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, From b00f215a5e628c8101343c245504c39a030d0e2e Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:20 +0300 Subject: [PATCH 08/25] Add test for msgmerge compendium overwrite mode with no comments * Implement a test for `msgmerge` that validates the new mode where compendium entries overwrite messages in the output PO file. * Include the `no_compendium_comment` option to ensure comments about translations sourced from the compendium are not included. * Utilize the `no-location` option to exclude location comments from the output. --- babel/messages/frontend.py | 12 +++- tests/messages/frontend/test_concat_merge.py | 72 +++++++++++++++++++- 2 files changed, 80 insertions(+), 4 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index e7385fa7e..148cab252 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1041,6 +1041,7 @@ class MessageMerge(CommandMixin): ('directory=', 'D', ''), ('compendium=', 'C', ''), ('c-overwrite', '', ''), + ('no-compendium-comment', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), ('backup', None, ''), @@ -1093,6 +1094,7 @@ class MessageMerge(CommandMixin): 'sort-by-file', 'c-overwrite', 'backup', + 'no-compendium-comment', ] option_choices = { @@ -1102,8 +1104,11 @@ class MessageMerge(CommandMixin): def initialize_options(self): self.input_files = None # self.directory = None + self.compendium = None #~ self.c_overwrite = False # + self.no_compendium_comment = None # + self.update = None # self.output_file = None # self.backup = False # @@ -1121,7 +1126,7 @@ def initialize_options(self): self.escape = None self.force_po = None self.indent = None - self.no_location = None + self.no_location = None # self.add_location = None self.strict = None self.properties_output = None @@ -1171,13 +1176,16 @@ def run(self): current.string = message.string current.flags = [flag for flag in current.flags if flag != 'fuzzy'] - current.auto_comments.append(self.compendium) + + if not self.no_compendium_comment: + current.auto_comments.append(self.compendium) output_path = def_file if self.update else self.output_file with open(output_path, 'wb') as outfile: write_po( outfile, catalog, + no_location=self.no_location, width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py index c163ddb62..4b51b00e4 100644 --- a/tests/messages/frontend/test_concat_merge.py +++ b/tests/messages/frontend/test_concat_merge.py @@ -270,6 +270,8 @@ def setUp(self): with open(self.compendium, 'wb') as file: catalog = Catalog() + catalog.add('word1', string='Comp Word 1') + catalog.add('word2', string='Comp Word 2') catalog.add('word4', string='Word 4') catalog.add('word5', string='Word 5') pofile.write_po(file, catalog) @@ -296,10 +298,18 @@ def test_no_output_file(self): with pytest.raises(OptionError): self.cmd.finalize_options() + self.cmd.output_file = '2' + self.cmd.finalize_options() + + self.cmd.output_file = None + self.cmd.update = True + self.cmd.finalize_options() + @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file + self.cmd.no_fuzzy_matching = True self.cmd.finalize_options() self.cmd.run() @@ -309,6 +319,7 @@ def test_default(self): # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. # +#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -331,9 +342,8 @@ def test_default(self): msgid "word3" msgstr "" -#, fuzzy msgid "word4" -msgstr "Word 2" +msgstr "" """ @@ -346,6 +356,8 @@ def test_compenidum(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file self.cmd.compendium = self.compendium + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True self.cmd.finalize_options() self.cmd.run() @@ -355,6 +367,7 @@ def test_compenidum(self): # This file is distributed under the same license as the PROJECT project. # FIRST AUTHOR , 1994. # +#, fuzzy msgid "" msgstr "" "Project-Id-Version: PROJECT VERSION\n" @@ -380,6 +393,61 @@ def test_compenidum(self): msgid "word4" msgstr "Word 4" +""" + + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + @freeze_time("1994-11-11") + def test_compendium_overwrite(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = self.compendium + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.c_overwrite = True + self.cmd.finalize_options() + self.cmd.run() + + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + expected_content = fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +msgid "word1" +msgstr "Comp Word 1" + +msgid "word2" +msgstr "Comp Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "Word 4" + +#~ msgid "word1" +#~ msgstr "Word 1" + +#~ msgid "word2" +#~ msgstr "Word 2" + """ with open(self.output_file, 'r') as f: From 0eab37d1b44d6f6e47fd0e5eb693e56080d110db Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:34:50 +0300 Subject: [PATCH 09/25] Refactor test for msgmerge with compendium-overwrite option * Implemented a helper function `_get_expected` to standardize the expected PO file structure. * Renamed the option `c-overwrite` to `compendium-overwrite` --- babel/messages/frontend.py | 10 +- tests/messages/frontend/test_concat_merge.py | 379 +++++++++++-------- 2 files changed, 226 insertions(+), 163 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 148cab252..2781d190c 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1040,7 +1040,7 @@ class MessageMerge(CommandMixin): ('input-files', None, ''), ('directory=', 'D', ''), ('compendium=', 'C', ''), - ('c-overwrite', '', ''), + ('compendium-overwrite', '', ''), ('no-compendium-comment', '', ''), ('update', 'U', ''), ('output-file=', 'o', ''), @@ -1092,7 +1092,7 @@ class MessageMerge(CommandMixin): 'no-wrap', 'sort-output', 'sort-by-file', - 'c-overwrite', + 'compendium-overwrite', 'backup', 'no-compendium-comment', ] @@ -1106,7 +1106,7 @@ def initialize_options(self): self.directory = None self.compendium = None #~ - self.c_overwrite = False # + self.compendium_overwrite = False # self.no_compendium_comment = None # self.update = None # @@ -1170,8 +1170,8 @@ def run(self): for message in compendium_catalog: current = catalog[message.id] - if message.id in catalog and (not current.string or current.fuzzy or self.c_overwrite): - if self.c_overwrite and not current.fuzzy and current.string: + if message.id in catalog and (not current.string or current.fuzzy or self.compendium_overwrite): + if self.compendium_overwrite and not current.fuzzy and current.string: catalog.obsolete[message.id] = current.clone() current.string = message.string diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py index 4b51b00e4..c4fcd4a0f 100644 --- a/tests/messages/frontend/test_concat_merge.py +++ b/tests/messages/frontend/test_concat_merge.py @@ -13,7 +13,7 @@ from __future__ import annotations import os -import unittest +import shutil from datetime import datetime import pytest @@ -28,14 +28,20 @@ from tests.messages.utils import Distribution -class ConcatanationMessagesTestCase(unittest.TestCase): +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield - def setUp(self): + +class TestConcatanateCatalog: + + def setup_method(self): self.olddir = os.getcwd() os.chdir(data_dir) self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.MessageConcatenation(self.dist) + self.cmd = frontend.ConcatenateCatalog(self.dist) self.cmd.initialize_options() self.temp1 = f'{i18n_dir}/msgcat_temp1.po' @@ -48,6 +54,7 @@ def setUp(self): catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals'), locations=[('simple.py', 2000)]) pofile.write_po(file, catalog) with open(self.temp2, 'wb') as file: @@ -56,13 +63,36 @@ def setUp(self): catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) pofile.write_po(file, catalog) - def tearDown(self): + def teardown_method(self): for file in [self.temp1, self.temp2, self.output_file]: if os.path.isfile(file): os.unlink(file) + def _get_expected(self, messages, fuzzy=False): + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + return fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +#{'\n#, fuzzy' if fuzzy else ''} +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +""" + messages + def test_no_input_files(self): with pytest.raises(OptionError): self.cmd.finalize_options() @@ -72,7 +102,6 @@ def test_no_output_file(self): with pytest.raises(OptionError): self.cmd.finalize_options() - @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -80,27 +109,64 @@ def test_default(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# + expected_content = self._get_expected(fr"""#: simple.py:1 +#, flag1000 +msgid "other1" +msgstr "Other 1" + +#: simple.py:10 +msgid "other2" +msgstr "Other 2" + +#: hard.py:100 simple.py:100 +#, flag1, flag1.2, flag4 +msgid "same" +msgstr "Same" + +#: hard.py:1000 simple.py:1000 +#, flag2, flag3, fuzzy +msgid "almost_same" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +"Almost same" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +"A bit same" + +#: hard.py:2000 simple.py:2000 #, fuzzy -msgid "" +msgid "plural" +msgid_plural "plurals" msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals other" + +#: hard.py:1 +msgid "other3" +msgstr "Other 3" + +#: hard.py:10 +msgid "other4" +msgstr "Other 4" + +""", fuzzy=True) -#: simple.py:1 + with open(self.output_file, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + def test_use_first(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.use_first = True + + self.cmd.finalize_options() + self.cmd.run() + + expected_content = self._get_expected(fr"""#: simple.py:1 #, flag1000 msgid "other1" msgstr "Other 1" @@ -119,6 +185,12 @@ def test_default(self): msgid "almost_same" msgstr "Almost same" +#: hard.py:2000 simple.py:2000 +msgid "plural" +msgid_plural "plurals" +msgstr[0] "Plural" +msgstr[1] "Plurals" + #: hard.py:1 msgid "other3" msgstr "Other 3" @@ -127,13 +199,12 @@ def test_default(self): msgid "other4" msgstr "Other 4" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_unique(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -142,27 +213,7 @@ def test_unique(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#: simple.py:1 + expected_content = self._get_expected(fr"""#: simple.py:1 #, flag1000 msgid "other1" msgstr "Other 1" @@ -179,7 +230,7 @@ def test_unique(self): msgid "other4" msgstr "Other 4" -""" +""") with open(self.output_file, 'r') as f: actual_content = f.read() @@ -193,7 +244,6 @@ def test_unique(self): actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_more_than(self): self.cmd.input_files = [self.temp1, self.temp2] self.cmd.output_file = self.output_file @@ -202,52 +252,47 @@ def test_more_than(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -#: hard.py:100 simple.py:100 + expected_content = self._get_expected(fr"""#: hard.py:100 simple.py:100 #, flag1, flag1.2, flag4 msgid "same" msgstr "Same" #: hard.py:1000 simple.py:1000 -#, flag2, flag3 +#, flag2, flag3, fuzzy msgid "almost_same" -msgstr "Almost same" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +"Almost same" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +"A bit same" -""" +#: hard.py:2000 simple.py:2000 +#, fuzzy +msgid "plural" +msgid_plural "plurals" +msgstr "" +"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals" +"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" +msgstr[0] "Plural" +msgstr[1] "Plurals other" + +""", fuzzy=True) with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content -class MergeMessagesTestCase(unittest.TestCase): +class TestMergeCatalog: - @freeze_time("1994-11-11") - def setUp(self): + def setup_method(self): self.olddir = os.getcwd() os.chdir(data_dir) self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.MessageMerge(self.dist) + self.cmd = frontend.MergeCatalog(self.dist) self.cmd.initialize_options() self.temp_def = f'{i18n_dir}/msgmerge_def.po' @@ -276,11 +321,41 @@ def setUp(self): catalog.add('word5', string='Word 5') pofile.write_po(file, catalog) - def tearDown(self): - for file in [self.temp_def, self.temp_ref, self.compendium, self.output_file]: - if os.path.isfile(file): + def teardown_method(self): + for file in [ + self.temp_def, + self.temp_def + '~', + self.temp_def + '.bac', + self.temp_ref, + self.compendium, + self.output_file + ]: + if os.path.exists(file) and os.path.isfile(file): os.unlink(file) + def _get_expected(self, messages): + date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') + return fr"""# Translations template for PROJECT. +# Copyright (C) 1994 ORGANIZATION +# This file is distributed under the same license as the PROJECT project. +# FIRST AUTHOR , 1994. +# +#, fuzzy +msgid "" +msgstr "" +"Project-Id-Version: PROJECT VERSION\n" +"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" +"POT-Creation-Date: {date}\n" +"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" +"Last-Translator: FULL NAME \n" +"Language-Team: LANGUAGE \n" +"MIME-Version: 1.0\n" +"Content-Type: text/plain; charset=utf-8\n" +"Content-Transfer-Encoding: 8bit\n" +"Generated-By: Babel {VERSION}\n" + +""" + messages + def test_no_input_files(self): with pytest.raises(OptionError): self.cmd.finalize_options() @@ -305,7 +380,6 @@ def test_no_output_file(self): self.cmd.update = True self.cmd.finalize_options() - @freeze_time("1994-11-11") def test_default(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file @@ -313,27 +387,7 @@ def test_default(self): self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Word 1" msgid "word2" @@ -345,43 +399,22 @@ def test_default(self): msgid "word4" msgstr "" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") def test_compenidum(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file - self.cmd.compendium = self.compendium + self.cmd.compendium = [self.compendium,] self.cmd.no_fuzzy_matching = True self.cmd.no_compendium_comment = True self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Word 1" msgid "word2" @@ -393,44 +426,23 @@ def test_compenidum(self): msgid "word4" msgstr "Word 4" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content - @freeze_time("1994-11-11") - def test_compendium_overwrite(self): + def test_compenidum_overwrite(self): self.cmd.input_files = [self.temp_def, self.temp_ref] self.cmd.output_file = self.output_file - self.cmd.compendium = self.compendium + self.cmd.compendium = [self.compendium,] self.cmd.no_fuzzy_matching = True self.cmd.no_compendium_comment = True - self.cmd.c_overwrite = True + self.cmd.compendium_overwrite = True self.cmd.finalize_options() self.cmd.run() - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - expected_content = fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -msgid "word1" + expected_content = self._get_expected(fr"""msgid "word1" msgstr "Comp Word 1" msgid "word2" @@ -448,8 +460,59 @@ def test_compendium_overwrite(self): #~ msgid "word2" #~ msgstr "Word 2" -""" +""") with open(self.output_file, 'r') as f: - actual_content = f.read() + actual_content = f.read() assert expected_content == actual_content + + def test_update(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.update = True + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + expected_content = self._get_expected(fr"""msgid "word1" +msgstr "Word 1" + +msgid "word2" +msgstr "Word 2" + +msgid "word3" +msgstr "" + +msgid "word4" +msgstr "" + +""") + + with open(self.temp_def, 'r') as f: + actual_content = f.read() + assert expected_content == actual_content + + def test_update_backup(self): + with open(self.temp_def, 'r') as f: + before_content = f.read() + + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.update = True + self.cmd.backup = True + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + assert os.path.exists(self.temp_def + '~') + with open(self.temp_def + '~', 'r') as f: + actual_content = f.read() + assert before_content == actual_content + + os.unlink(self.temp_def) + shutil.move(self.temp_def + '~', self.temp_def) + self.cmd.suffix = '.bac' + self.cmd.run() + + assert os.path.exists(self.temp_def + '.bac') + with open(self.temp_def + '.bac', 'r') as f: + actual_content = f.read() + assert before_content == actual_content From 7fb19e31f943f00973535b849c93a8f9cbec1ced Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:35:04 +0300 Subject: [PATCH 10/25] Create a catalog without fuzzy by default, remove add-location * Mark the catalog as fuzzy after msgcat and msgmerge if there is at least one fuzzy message * Remove add-location as it's unnecessary --- babel/messages/frontend.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 2781d190c..2595c3ccc 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -910,7 +910,6 @@ class MessageConcatenation(CommandMixin): ('force-po', None, ''), ('indent', 'i', ''), ('no-location', None, ''), - ('add-location', 'n', ''), ('strict', None, ''), ('properties-output', None, ''), ('stringtable-output', None, ''), @@ -931,7 +930,6 @@ class MessageConcatenation(CommandMixin): 'force-po', 'indent', 'no-location', - 'add-location', 'strict', 'properties-output', 'stringtable-output', @@ -964,8 +962,7 @@ def initialize_options(self): self.escape = None self.force_po = None self.indent = None - self.no_location = None - self.add_location = None + self.no_location = None # self.strict = None self.properties_output = None self.stringtable_output = None @@ -1006,12 +1003,14 @@ def _prepare(self): self.message_count[message.id] += 1 def run(self): - catalog = Catalog() + catalog = Catalog(fuzzy=False) self._prepare() for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) + if catalog.locale is None: + catalog.locale = template.locale for message in template: if not message.id: @@ -1024,6 +1023,7 @@ def run(self): if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): catalog[message.id] = message + catalog.fuzzy = any(message.fuzzy for message in catalog) with open(self.output_file, 'wb') as outfile: write_po( outfile, @@ -1031,6 +1031,7 @@ def run(self): width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, + no_location=self.no_location, ) @@ -1060,7 +1061,6 @@ class MessageMerge(CommandMixin): ('force-po', None, ''), ('indent', 'i', ''), ('no-location', None, ''), - ('add-location', 'n', ''), ('strict', None, ''), ('properties-output', None, ''), ('stringtable-output', None, ''), @@ -1085,7 +1085,6 @@ class MessageMerge(CommandMixin): 'force-po', 'indent', 'no-location', - 'add-location', 'strict', 'properties-output', 'stringtable-output', @@ -1127,7 +1126,6 @@ def initialize_options(self): self.force_po = None self.indent = None self.no_location = None # - self.add_location = None self.strict = None self.properties_output = None self.stringtable_output = None @@ -1175,11 +1173,13 @@ def run(self): catalog.obsolete[message.id] = current.clone() current.string = message.string - current.flags = [flag for flag in current.flags if flag != 'fuzzy'] + if current.fuzzy: + current.flags.remove('fuzzy') if not self.no_compendium_comment: current.auto_comments.append(self.compendium) + catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file with open(output_path, 'wb') as outfile: write_po( From 4709cb6e8ae4871e5dccb5f60f03a9089eacc0e9 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 20:35:28 +0300 Subject: [PATCH 11/25] Rename msgmerge to merge and msgcat to concat --- babel/messages/frontend.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 2595c3ccc..4ab431407 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -888,7 +888,7 @@ def run(self): return -class MessageConcatenation(CommandMixin): +class ConcatenationCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ ('input-files', None, ''), @@ -1035,7 +1035,7 @@ def run(self): ) -class MessageMerge(CommandMixin): +class MergeCatalog(CommandMixin): description='combines two Uniforum-style PO files into one' user_options=[ ('input-files', None, ''), @@ -1206,8 +1206,8 @@ class CommandLineInterface: 'extract': 'extract messages from source files and generate a POT file', 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', - 'msgcat': 'concatenates and merges the specified PO files', - 'msgmerge': 'combines two Uniforum-style PO files into one', + 'concat': 'concatenates and merges the specified PO files', + 'merge': 'combines two Uniforum-style PO files into one', } command_classes = { @@ -1215,8 +1215,8 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, - 'msgcat': MessageConcatenation, - 'msgmerge': MessageMerge, + 'concat': ConcatenationCatalog, + 'merge': MergeCatalog, } log = None # Replaced on instance level From 8b780d4479aedc44c9272b01029c661e0960dbd0 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 9 Dec 2024 22:00:00 +0300 Subject: [PATCH 12/25] Add discription to all options --- babel/messages/frontend.py | 120 ++++++++++++++++++++----------------- 1 file changed, 64 insertions(+), 56 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 4ab431407..fadba0fba 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -891,32 +891,38 @@ def run(self): class ConcatenationCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ - ('input-files', None, ''), - ('files-from=', 'f', ''), - ('directory=', 'D', ''), - ('output-file=', 'o', ''), - ('less-than=', '<', ''), - ('more-than=', '>', ''), - ('unique', 'u', ''), - ('properties-input', 'P', ''), - ('stringtable-input', None, ''), - ('to-code=','t', ''), - ('use-first', None, ''), - ('lang=', None, ''), - ('color=', None, ''), - ('style=', None, ''), - ('no-escape', 'e', ''), - ('escape', 'E', ''), - ('force-po', None, ''), - ('indent', 'i', ''), - ('no-location', None, ''), - ('strict', None, ''), - ('properties-output', None, ''), - ('stringtable-output', None, ''), - ('width=', 'w', ''), - ('no-wrap', None, ''), - ('sort-output', 's', ''), - ('sort-by-file', 'F', ''), + ('input-files', None, 'input files'), + ('files-from=', 'f', 'get list of input files from FILE'), + ('directory=', 'D', 'add DIRECTORY to list for input files search' + 'If input file is -, standard input is read.'), + ('output-file=', 'o', 'write output to specified file'), + ('less-than=', '<', 'print messages with less than this many' + 'definitions, defaults to infinite if not set'), + ('more-than=', '>', 'print messages with more than this many' + 'definitions, defaults to 0 if not set'), + ('unique', 'u', 'shorthand for --less-than=2, requests' + 'that only unique messages be printed'), + ('properties-input', 'P', 'input files are in Java .properties syntax'), + ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), + ('to-code=','t', 'encoding for output'), + ('use-first', None, 'use first available translation for each' + 'message, don\'t merge several translations'), + ('lang=', None, 'set 'Language' field in the header entry'), + ('color=', None, 'use colors and other text attributes always'), + ('style=', None, 'specify CSS style rule file for --color'), + ('no-escape', 'e', 'do not use C escapes in output (default)'), + ('escape', 'E', 'use C escapes in output, no extended chars'), + ('force-po', None, 'write PO file even if empty'), + ('indent', 'i', 'write the .po file using indented style'), + ('no-location', None, 'do not write \'#: filename:line\' lines'), + ('strict', None, 'write out strict Uniforum conforming .po file'), + ('properties-output', None, 'write out a Java .properties file'), + ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), + ('width=', 'w', 'set output page width'), + ('no-wrap', None, 'do not break long message lines, longer than' + 'the output page width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), ] as_args='input-files' @@ -953,7 +959,7 @@ def initialize_options(self): self.properties_input = None self.stringtable_input = None self.to_code = None - # временно всегда используется первый перевод + # the first translation is always used temporarily self.use_first = True #~ self.lang = None self.color = None @@ -1038,36 +1044,38 @@ def run(self): class MergeCatalog(CommandMixin): description='combines two Uniforum-style PO files into one' user_options=[ - ('input-files', None, ''), - ('directory=', 'D', ''), - ('compendium=', 'C', ''), - ('compendium-overwrite', '', ''), + ('input-files', None, 'def.po ref.pot'), + ('directory=', 'D', 'add DIRECTORY to list for input files search'), + ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), + ('compendium-overwrite', '', 'overwrite mode of compendium'), ('no-compendium-comment', '', ''), - ('update', 'U', ''), - ('output-file=', 'o', ''), - ('backup', None, ''), - ('suffix=', None, ''), - ('multi-domain', 'm', ''), - ('for-msgfmt', None, ''), - ('no-fuzzy-matching', 'N', ''), - ('previous', None, ''), - ('properties-input', 'P', ''), - ('stringtable-input', None, ''), - ('lang=', None, ''), - ('color=', None, ''), - ('style=', None, ''), - ('no-escape', 'e', ''), - ('escape', 'E', ''), - ('force-po', None, ''), - ('indent', 'i', ''), - ('no-location', None, ''), - ('strict', None, ''), - ('properties-output', None, ''), - ('stringtable-output', None, ''), - ('width=', 'w', ''), - ('no-wrap', None, ''), - ('sort-output', 's', ''), - ('sort-by-file', 'F', ''), + ('update', 'U', 'pdate def.po, do nothing if def.po already up to date'), + ('output-file=', 'o', 'write output to specified file, the results are written' + 'to standard output if no output file is specified'), + ('backup', None, 'make a backup of def.po'), + ('suffix=', None, 'override the usual backup suffix'), + ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), + ('for-msgfmt', None, 'produce output for 'msgfmt', not for a translator'), + ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), + ('previous', None, 'keep previous msgids of translated messages'), + ('properties-input', 'P', 'input files are in Java .properties syntax'), + ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), + ('lang=', None, 'set 'Language' field in the header entry'), + ('color=', None, 'use colors and other text attributes always'), + ('style=', None, 'specify CSS style rule file for --color'), + ('no-escape', 'e', 'do not use C escapes in output (default)'), + ('escape', 'E', 'use C escapes in output, no extended chars'), + ('force-po', None, 'write PO file even if empty'), + ('indent', 'i', 'indented output style'), + ('no-location', None, 'suppress \'#: filename:line\' lines'), + ('strict', None, 'strict Uniforum output style'), + ('properties-output', None, 'write out a Java .properties file'), + ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), + ('width=', 'w', 'set output page width'), + ('no-wrap', None, 'do not break long message lines, longer' + 'than the output page width, into several lines'), + ('sort-output', 's', 'generate sorted output'), + ('sort-by-file', 'F', 'sort output by file location'), ] as_args='input-files' From 80ab44a5b378eb9c1d0c7c93294fc865decf24f4 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 2 Mar 2025 17:33:37 +0300 Subject: [PATCH 13/25] Ability to specify multiple compendiums --- babel/messages/frontend.py | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index fadba0fba..dc99442a3 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -907,7 +907,7 @@ class ConcatenationCatalog(CommandMixin): ('to-code=','t', 'encoding for output'), ('use-first', None, 'use first available translation for each' 'message, don\'t merge several translations'), - ('lang=', None, 'set 'Language' field in the header entry'), + ('lang=', None, 'set \'Language\' field in the header entry'), ('color=', None, 'use colors and other text attributes always'), ('style=', None, 'specify CSS style rule file for --color'), ('no-escape', 'e', 'do not use C escapes in output (default)'), @@ -1055,12 +1055,12 @@ class MergeCatalog(CommandMixin): ('backup', None, 'make a backup of def.po'), ('suffix=', None, 'override the usual backup suffix'), ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), - ('for-msgfmt', None, 'produce output for 'msgfmt', not for a translator'), + ('for-msgfmt', None, 'produce output for \'msgfmt\', not for a translator'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), ('previous', None, 'keep previous msgids of translated messages'), ('properties-input', 'P', 'input files are in Java .properties syntax'), ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('lang=', None, 'set 'Language' field in the header entry'), + ('lang=', None, 'set \'Language\' field in the header entry'), ('color=', None, 'use colors and other text attributes always'), ('style=', None, 'specify CSS style rule file for --color'), ('no-escape', 'e', 'do not use C escapes in output (default)'), @@ -1078,7 +1078,11 @@ class MergeCatalog(CommandMixin): ('sort-by-file', 'F', 'sort output by file location'), ] - as_args='input-files' + as_args = 'input-files' + + multiple_value_options = ( + 'compendium' + ) boolean_options = [ 'update', @@ -1155,6 +1159,13 @@ def finalize_options(self): elif self.width is not None: self.width = int(self.width) + def _get_message_from_compendium(self, compendium): + for file_path in compendium: + with open(file_path, 'r') as pofile: + catalog = read_po(pofile) + for message in catalog: + yield message, file_path + def run(self): def_file, ref_file = self.input_files @@ -1171,10 +1182,7 @@ def run(self): ) if self.compendium: - with open(self.compendium, 'r') as pofile: - compendium_catalog = read_po(pofile) - - for message in compendium_catalog: + for message, compendium_path in self._get_message_from_compendium(self.compendium): current = catalog[message.id] if message.id in catalog and (not current.string or current.fuzzy or self.compendium_overwrite): if self.compendium_overwrite and not current.fuzzy and current.string: @@ -1185,7 +1193,7 @@ def run(self): current.flags.remove('fuzzy') if not self.no_compendium_comment: - current.auto_comments.append(self.compendium) + current.auto_comments.append(compendium_path) catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file From 8b00b029abd91dcd276ba2d0d0e3dd1c59348cc6 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 2 Mar 2025 20:00:25 +0300 Subject: [PATCH 14/25] Marking conflicting messages * Update _prepare function in ConcatenateCatalog to check conflicting messages and to not parse po-files twice * Add _conflicts field in Catalog to mark conflicts * Update tests --- babel/messages/catalog.py | 24 +++++++++++++++++- babel/messages/frontend.py | 51 ++++++++++++++++++++++++-------------- babel/messages/pofile.py | 47 +++++++++++++++++++++++++++++++---- 3 files changed, 97 insertions(+), 25 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 9a9739a72..bd8621804 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -12,13 +12,14 @@ import datetime import re +import os from collections.abc import Iterable, Iterator from copy import copy from difflib import SequenceMatcher from email import message_from_string from heapq import nlargest from string import Formatter -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, TypedDict from babel import __version__ as VERSION from babel.core import Locale, UnknownLocaleError @@ -357,6 +358,13 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') return str(s) +class ConflictInfo(TypedDict): + message: Message + file_name: str + project: str + version: str + + class Catalog: """Representation of a message catalog.""" @@ -400,6 +408,7 @@ def __init__( self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} + self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {} self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -780,6 +789,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: ) self._messages[key] = message + def add_conflict(self, message: Message, file_name: str, project: str, version: str): + key = message.id + if key not in self._conflicts: + self._conflicts[key] = [] + + self._conflicts[key].append({ + 'message': message, + 'file_name': file_name, + 'project': project, + 'version': version, + }) + message.flags |= {'fuzzy'} + def add( self, id: _MessageID, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index dc99442a3..7a26ab5c4 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -21,7 +21,7 @@ import sys import tempfile import warnings -from collections import OrderedDict, defaultdict +from collections import defaultdict from configparser import RawConfigParser from io import StringIO from typing import Any, BinaryIO, Iterable, Literal @@ -29,7 +29,7 @@ from babel import Locale, localedata from babel import __version__ as VERSION from babel.core import UnknownLocaleError -from babel.messages.catalog import DEFAULT_HEADER, Catalog +from babel.messages.catalog import DEFAULT_HEADER, Catalog, ConflictInfo from babel.messages.extract import ( DEFAULT_KEYWORDS, DEFAULT_MAPPING, @@ -960,7 +960,7 @@ def initialize_options(self): self.stringtable_input = None self.to_code = None # the first translation is always used temporarily - self.use_first = True #~ + self.use_first = False #~ self.lang = None self.color = None self.style = None @@ -1000,36 +1000,49 @@ def finalize_options(self): self.less_than = 2 def _prepare(self): - self.message_count = defaultdict(int) + templates: list[tuple[str, Catalog]] = [] + message_info = {} for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) for message in template: - self.message_count[message.id] += 1 + if message.id not in message_info: + message_info[message.id] = { + 'count': 0, + 'strings': set(), + } + message_info[message.id]['count'] += 1 + message_info[message.id]['strings'].add(message.string if isinstance(message.string, str) else tuple(message.string)) + templates.append((filename, template, )) + + return templates, message_info def run(self): catalog = Catalog(fuzzy=False) - self._prepare() + templates, message_info = self._prepare() - for filename in self.input_files: - with open(filename, 'r') as pofile: - template = read_po(pofile) - if catalog.locale is None: - catalog.locale = template.locale + for path, template in templates: + if catalog.locale is None: + catalog.locale = template.locale - for message in template: - if not message.id: - continue + for message in template: + if not message.id: + continue + + count = message_info[message.id]['count'] + diff_string_count = len(message_info[message.id]['strings']) + if count <= self.more_than or (self.less_than is not None and count >= self.less_than): + continue - if message.id in catalog and catalog[message.id].string != message.string and not self.use_first: - raise NotImplementedError() + if count > 1 and not self.use_first and diff_string_count > 1: + file_name = os.path.basename(path) + catalog.add_conflict(message, file_name, template.project, template.version) - message_count = self.message_count[message.id] - if message_count > self.more_than and (self.less_than is None or message_count < self.less_than): - catalog[message.id] = message + catalog[message.id] = message catalog.fuzzy = any(message.fuzzy for message in catalog) + with open(self.output_file, 'wb') as outfile: write_po( outfile, diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index b9678a924..f6e922d99 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -17,8 +17,8 @@ from typing import TYPE_CHECKING, Literal from babel.core import Locale -from babel.messages.catalog import Catalog, Message -from babel.util import TextWrapper +from babel.messages.catalog import Catalog, Message, ConflictInfo +from babel.util import TextWrapper, _cmp if TYPE_CHECKING: from typing import IO, AnyStr @@ -351,8 +351,11 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: continue if needs_decode: line = line.decode(self.catalog.charset) - if line[0] == '#': - if line[:2] == '#~': + if line.startswith('#'): + if line[1:].startswith('-'): + self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts') + + if line[1:].startswith('~'): self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: try: @@ -646,6 +649,37 @@ def _format_comment(comment, prefix=''): for line in comment_wrapper.wrap(comment): yield f"#{prefix} {line.strip()}\n" + def _format_conflict_comment(file, project, version, prefix=''): + comment = f"#-#-#-#-# {file} ({project} {version}) #-#-#-#-#" + yield f"{normalize(comment, prefix=prefix, width=width)}\n" + + def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], prefix=''): + for conflict in conflicts: + message = conflict['message'] + if message.context: + yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" + + if isinstance(key, (list, tuple)): + yield f"{prefix}msgid {normalize(key[0], prefix=prefix, width=width)}\n" + yield f"{prefix}msgid_plural {normalize(key[1], prefix=prefix, width=width)}\n" + else: + yield f"{prefix}msgid {normalize(key, prefix=prefix, width=width)}\n" + yield f"{prefix}msgstr {normalize('', prefix=prefix, width=width)}\n" + + for conflict in conflicts: + message = conflict['message'] + yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + if isinstance(key, (list, tuple)): + for idx in range(catalog.num_plurals): + try: + string = message.string[idx] + except IndexError: + string = '' + yield f"{prefix}msgstr[{idx:d}] {normalize(string, prefix=prefix, width=width)}\n" + else: + yield f"{normalize(message.string, prefix=prefix, width=width)}\n" + def _format_message(message, prefix=''): if isinstance(message.id, (list, tuple)): if message.context: @@ -717,7 +751,10 @@ def _format_message(message, prefix=''): norm_previous_id = normalize(message.previous_id[1], width=width) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') - yield from _format_message(message) + if len(conflicts := catalog._conflicts.get(message.id, [])) > 0: + yield from _format_conflict(message.id, conflicts) + else: + yield from _format_message(message) yield '\n' if not ignore_obsolete: From a28b263eed144925f8f6b73e5617d924018ed6d7 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 23 Mar 2025 18:43:23 +0300 Subject: [PATCH 15/25] Fix PR issues * Delete unused options * Fix multiline options comments * Replace backup logic in MergeCatalog * Rename to ConcatenateCatalog --- babel/messages/frontend.py | 182 +++++++++---------------------------- 1 file changed, 45 insertions(+), 137 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 7a26ab5c4..1c9ca8e4f 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -888,94 +888,51 @@ def run(self): return -class ConcatenationCatalog(CommandMixin): +class ConcatenateCatalog(CommandMixin): description = 'concatenates and merges the specified PO files' user_options = [ ('input-files', None, 'input files'), - ('files-from=', 'f', 'get list of input files from FILE'), - ('directory=', 'D', 'add DIRECTORY to list for input files search' - 'If input file is -, standard input is read.'), ('output-file=', 'o', 'write output to specified file'), ('less-than=', '<', 'print messages with less than this many' - 'definitions, defaults to infinite if not set'), - ('more-than=', '>', 'print messages with more than this many' + 'definitions, defaults to infinite if not set '), + ('more-than=', '>', 'print messages with more than this many ' 'definitions, defaults to 0 if not set'), - ('unique', 'u', 'shorthand for --less-than=2, requests' + ('unique', 'u', 'shorthand for --less-than=2, requests ' 'that only unique messages be printed'), - ('properties-input', 'P', 'input files are in Java .properties syntax'), - ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('to-code=','t', 'encoding for output'), - ('use-first', None, 'use first available translation for each' + ('use-first', None, 'use first available translation for each ' 'message, don\'t merge several translations'), - ('lang=', None, 'set \'Language\' field in the header entry'), - ('color=', None, 'use colors and other text attributes always'), - ('style=', None, 'specify CSS style rule file for --color'), - ('no-escape', 'e', 'do not use C escapes in output (default)'), - ('escape', 'E', 'use C escapes in output, no extended chars'), - ('force-po', None, 'write PO file even if empty'), - ('indent', 'i', 'write the .po file using indented style'), ('no-location', None, 'do not write \'#: filename:line\' lines'), - ('strict', None, 'write out strict Uniforum conforming .po file'), - ('properties-output', None, 'write out a Java .properties file'), - ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), ('width=', 'w', 'set output page width'), - ('no-wrap', None, 'do not break long message lines, longer than' + ('no-wrap', None, 'do not break long message lines, longer than ' 'the output page width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), ] - as_args='input-files' + as_args = 'input-files' boolean_options = [ 'unique', - 'properties-input', - 'stringtable-input', - 'no-escape', - 'escape', - 'force-po', - 'indent', + 'use-first', 'no-location', 'strict', - 'properties-output', - 'stringtable-output', 'no-wrap', 'sort-output', 'sort-by-file', ] - option_choices = { - 'color': ('always', 'never', 'auto', 'html'), - } - def initialize_options(self): - self.input_files = None # - self.files_from = None - self.directory = None - self.output_file = None # - self.less_than = None # - self.more_than = 0 # - self.unique = False # - self.properties_input = None - self.stringtable_input = None - self.to_code = None - # the first translation is always used temporarily - self.use_first = False #~ - self.lang = None - self.color = None - self.style = None - self.no_escape = None - self.escape = None - self.force_po = None - self.indent = None - self.no_location = None # - self.strict = None - self.properties_output = None - self.stringtable_output = None - self.width = None # - self.no_wrap = None # - self.sort_output = False # - self.sort_by_file = False # + self.input_files = None + self.output_file = None + self.less_than = None + self.more_than = 0 + self.unique = False + self.use_first = False + self.no_location = None + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False def finalize_options(self): if not self.input_files: @@ -1055,37 +1012,21 @@ def run(self): class MergeCatalog(CommandMixin): - description='combines two Uniforum-style PO files into one' + description='updates translation PO file by merging them with updated template POT file with using compendium' user_options=[ - ('input-files', None, 'def.po ref.pot'), - ('directory=', 'D', 'add DIRECTORY to list for input files search'), + ('input-files', None, 'def.po (obsolete translations) ref.pot (actual template)'), ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), ('compendium-overwrite', '', 'overwrite mode of compendium'), ('no-compendium-comment', '', ''), ('update', 'U', 'pdate def.po, do nothing if def.po already up to date'), - ('output-file=', 'o', 'write output to specified file, the results are written' + ('output-file=', 'o', 'write output to specified file, the results are written ' 'to standard output if no output file is specified'), ('backup', None, 'make a backup of def.po'), ('suffix=', None, 'override the usual backup suffix'), - ('multi-domain', 'm', 'apply ref.pot to each of the domains in def.po'), - ('for-msgfmt', None, 'produce output for \'msgfmt\', not for a translator'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), - ('previous', None, 'keep previous msgids of translated messages'), - ('properties-input', 'P', 'input files are in Java .properties syntax'), - ('stringtable-input', None, 'input files are in NeXTstep/GNUstep .strings syntax'), - ('lang=', None, 'set \'Language\' field in the header entry'), - ('color=', None, 'use colors and other text attributes always'), - ('style=', None, 'specify CSS style rule file for --color'), - ('no-escape', 'e', 'do not use C escapes in output (default)'), - ('escape', 'E', 'use C escapes in output, no extended chars'), - ('force-po', None, 'write PO file even if empty'), - ('indent', 'i', 'indented output style'), ('no-location', None, 'suppress \'#: filename:line\' lines'), - ('strict', None, 'strict Uniforum output style'), - ('properties-output', None, 'write out a Java .properties file'), - ('stringtable-output', None, 'write out a NeXTstep/GNUstep .strings file'), ('width=', 'w', 'set output page width'), - ('no-wrap', None, 'do not break long message lines, longer' + ('no-wrap', None, 'do not break long message lines, longer ' 'than the output page width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), @@ -1098,66 +1039,32 @@ class MergeCatalog(CommandMixin): ) boolean_options = [ + 'compendium-overwrite', + 'no-compendium-comment', 'update', - 'multi-domain', - 'for-msgfmt', + 'backup', 'no-fuzzy-matching', - 'previous' - 'properties-input', - 'stringtable-input', - 'no-escape', - 'escape', - 'force-po', - 'indent', 'no-location', - 'strict', - 'properties-output', - 'stringtable-output', 'no-wrap', 'sort-output', 'sort-by-file', - 'compendium-overwrite', - 'backup', - 'no-compendium-comment', ] - option_choices = { - 'color': ('always', 'never', 'auto', 'html'), - } - def initialize_options(self): - self.input_files = None # - self.directory = None - - self.compendium = None #~ - self.compendium_overwrite = False # - self.no_compendium_comment = None # - - self.update = None # - self.output_file = None # - self.backup = False # - self.suffix = '~' # - self.multi_domain = None - self.for_msgfmt = None - self.no_fuzzy_matching = None # - self.previous = None - self.properties_input = None - self.stringtable_input = None - self.lang = None - self.color = None - self.style = None - self.no_escape = None - self.escape = None - self.force_po = None - self.indent = None - self.no_location = None # - self.strict = None - self.properties_output = None - self.stringtable_output = None - self.width = None # - self.no_wrap = None # - self.sort_output = False # - self.sort_by_file = False # + self.input_files = None + self.compendium = None + self.compendium_overwrite = False + self.no_compendium_comment = False + self.update = False + self.output_file = None + self.backup = False + self.suffix = '~' + self.no_fuzzy_matching = False + self.no_location = False + self.width = None + self.no_wrap = False + self.sort_output = False + self.sort_by_file = False def finalize_options(self): if not self.input_files or len(self.input_files) != 2: @@ -1182,9 +1089,6 @@ def _get_message_from_compendium(self, compendium): def run(self): def_file, ref_file = self.input_files - if self.update and self.backup: - shutil.copy(def_file, def_file + self.suffix) - with open(def_file, 'r') as pofile: catalog = read_po(pofile) with open(ref_file, 'r') as pofile: @@ -1210,6 +1114,10 @@ def run(self): catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file + + if self.update and self.backup: + shutil.copy(def_file, def_file + self.suffix) + with open(output_path, 'wb') as outfile: write_po( outfile, @@ -1244,7 +1152,7 @@ class CommandLineInterface: 'extract': ExtractMessages, 'init': InitCatalog, 'update': UpdateCatalog, - 'concat': ConcatenationCatalog, + 'concat': ConcatenateCatalog, 'merge': MergeCatalog, } From debe95078d64773c0a419e8cb3d08b8c62be746d Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 7 Apr 2025 14:08:04 +0300 Subject: [PATCH 16/25] Add info about pybable concat and pybabel merge into docs --- babel/messages/frontend.py | 2 +- docs/cmdline.rst | 89 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 1 deletion(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 1c9ca8e4f..1d71ed50d 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -889,7 +889,7 @@ def run(self): class ConcatenateCatalog(CommandMixin): - description = 'concatenates and merges the specified PO files' + description = 'concatenates the specified PO files into single one' user_options = [ ('input-files', None, 'input files'), ('output-file=', 'o', 'write output to specified file'), diff --git a/docs/cmdline.rst b/docs/cmdline.rst index e1328fe8f..e8221dd0e 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -247,3 +247,92 @@ filename of the output file will be:: If neither the ``output_file`` nor the ``locale`` option is set, this command looks for all catalog files in the base directory that match the given domain, and updates each of them. + +concat +====== + +The `concat` command merges multiple PO files into a single one. If a message has +different translations in different PO files, the conflicting translations are +marked with a conflict comment:: + #-#-#-#-# (PROJECT VERSION) #-#-#-#-# +and the message itself is marked with a `fuzzy` flag:: + + $ pybabel concat --help + Usage: pybabel concat [options] + + concatenates the specified PO files into single one + + Options: + -h, --help show this help message and exit + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file + --less-than=NUMBER print messages with less than this many + definitions, defaults to infinite if not set + --more-than=NUMBER print messages with more than this many + definitions, defaults to 0 if not set + -u, unique shorthand for --less-than=2, requests + that only unique messages be printed + --use-first use first available translation for each + message, don't merge several translations + --no-location do not write '#: filename:line' lines + -w WIDTH, --width=WIDTH + set output page width + --no-wrap do not break long message lines, longer than + the output page width, into several lines + -s, --sort-output generate sorted output + -F, --sort-by-file sort output by file location + +merge +====== + +The `merge` command allows updating files using a compendium as a translation memory:: + + $ pybabel concat --help + Usage: pybabel merge [options] + + updates translation PO file by merging them with updated template + POT file with using compendium + + Options: + -C COMPENDIUM_FILE, --compendium=COMPENDIUM_FILE + additional library of message translations, may + be specified more than once + --compendium-overwrite + overwrite mode of compendium + --no-compendium-comment + do not add a comment indicating that the message is + taken from the compendium + -U, --update update def.po, do nothing if def.po already up to date, + -o OUTPUT_FILE, --output-file=OUTPUT_FILE + write output to specified file, the results are written + to standard output if no output file is specified + --backup make a backup of def.po + --suffix=SUFFIX override the usual backup suffix (default '~') + -N, --no-fuzzy-matching + do not use fuzzy matching + --no-location suppress '#: filename:line' lines' + -w WIDTH, --width=WIDTH + set output page width + --no-wrap do not break long message lines, longer + than the output page width, into several lines + -s, --sort-output generate sorted output + -F --sort-by-file sort output by file location + +The compendium can be used in two modes: +- Default mode: the translations from the compendium are used + only if they are missing in the output file. + +- Compendium overwrite mode: when using the ``compendium-overwrite`` option, translations + from the compendium take priority and replace those in the output file. If a translation + is used from the compendium, a comment noting the source is added + +The ``input-files`` option includes def.po, a file with obsolete translations, and ref.pot, +the current template file for updating translations. + +The ``compendium`` option can be specified multiple times to use several compendiums. + +The ``backup`` option is used to create a backup copy of the def.po file, which contains +obsolete translations + +The ``suffix`` option allows you to specify a custom suffix for the backup file +By default, a standard suffix ``~`` is appended to the backup file's name, From 13e33309da6183fc8c94ff0143b05d2746500375 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Mon, 21 Apr 2025 16:00:27 +0700 Subject: [PATCH 17/25] Add usage documentation for pybabel concat and merge commands * Includes .rst file with detailed use cases and practical examples for pybabel's concat and merge utilities, outlining common scenarios, options, and best practices for managing PO files. --- docs/concat_merge_usage.rst | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) create mode 100644 docs/concat_merge_usage.rst diff --git a/docs/concat_merge_usage.rst b/docs/concat_merge_usage.rst new file mode 100644 index 000000000..03c06d0ea --- /dev/null +++ b/docs/concat_merge_usage.rst @@ -0,0 +1,52 @@ +Usage scenarios +--------------- + +1. Merging Multiple PO Files (`concat`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel concat [options] ` +Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. + +**Example:** + +.. code-block:: shell + + pybabel concat -o merged.po module1.po module2.po module3.po + +**Features:** + +- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. +- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. +- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. +- Output can be sorted alphabetically or by source file (options `-s`, `-F`). + +**Typical Use Case:** + + A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. + + +2. Updating Translations with a Template and Compendium (`merge`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel merge [options] def.po ref.pot` +You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). + +**Example:** + +.. code-block:: shell + + pybabel merge -C my-compendium.po --backup def.po ref.pot + +**Features:** + +- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. +- By default, translations from the compendium are used only for new or missing entries in `def.po`. +- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). +- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). +- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). + +**Typical Use Case:** + + After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. From 49e60036622c7da5484e5658732550472f42801c Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 27 Apr 2025 15:08:30 +0300 Subject: [PATCH 18/25] Fix PR issues * Rename file_name to filename * Adding fuzzy flag to message parameterized in 'add_conflict' * Replace usage scenarious to cmdline.rst * Rename to ConcatenateCatalog --- babel/messages/catalog.py | 16 +++++----- babel/messages/frontend.py | 6 ++-- babel/messages/pofile.py | 4 +-- docs/cmdline.rst | 60 ++++++++++++++++++++++++++++++++++--- docs/concat_merge_usage.rst | 52 -------------------------------- 5 files changed, 69 insertions(+), 69 deletions(-) delete mode 100644 docs/concat_merge_usage.rst diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index bd8621804..f47fc7adc 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -14,6 +14,7 @@ import re import os from collections.abc import Iterable, Iterator +from collections import defaultdict from copy import copy from difflib import SequenceMatcher from email import message_from_string @@ -360,7 +361,7 @@ def _force_text(s: str | bytes, encoding: str = 'utf-8', errors: str = 'strict') class ConflictInfo(TypedDict): message: Message - file_name: str + filename: str project: str version: str @@ -408,7 +409,7 @@ def __init__( self.locale = locale self._header_comment = header_comment self._messages: dict[str | tuple[str, str], Message] = {} - self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = {} + self._conflicts: dict[str | tuple[str, str], list[ConflictInfo]] = defaultdict(list) self.project = project or 'PROJECT' self.version = version or 'VERSION' @@ -789,18 +790,17 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: ) self._messages[key] = message - def add_conflict(self, message: Message, file_name: str, project: str, version: str): + def add_conflict(self, message: Message, filename: str, project: str, version: str, fuzzy: bool = True): key = message.id - if key not in self._conflicts: - self._conflicts[key] = [] - self._conflicts[key].append({ 'message': message, - 'file_name': file_name, + 'filename': filename, 'project': project, 'version': version, }) - message.flags |= {'fuzzy'} + + if fuzzy: + message.flags |= {'fuzzy'} def add( self, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 1d71ed50d..33371849e 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -993,8 +993,8 @@ def run(self): continue if count > 1 and not self.use_first and diff_string_count > 1: - file_name = os.path.basename(path) - catalog.add_conflict(message, file_name, template.project, template.version) + filename = os.path.basename(path) + catalog.add_conflict(message, filename, template.project, template.version) catalog[message.id] = message @@ -1144,7 +1144,7 @@ class CommandLineInterface: 'init': 'create new message catalogs from a POT file', 'update': 'update existing message catalogs from a POT file', 'concat': 'concatenates and merges the specified PO files', - 'merge': 'combines two Uniforum-style PO files into one', + 'merge': 'combines two PO files into one', } command_classes = { diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index f6e922d99..bb4de1b8f 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -657,7 +657,7 @@ def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], for conflict in conflicts: message = conflict['message'] if message.context: - yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) yield f"{prefix}msgctxt {normalize(message.context, prefix=prefix, width=width)}\n" if isinstance(key, (list, tuple)): @@ -669,7 +669,7 @@ def _format_conflict(key: str | tuple[str, str], conflicts: list[ConflictInfo], for conflict in conflicts: message = conflict['message'] - yield from _format_conflict_comment(conflict['file_name'], conflict['project'], conflict['version'], prefix=prefix) + yield from _format_conflict_comment(conflict['filename'], conflict['project'], conflict['version'], prefix=prefix) if isinstance(key, (list, tuple)): for idx in range(catalog.num_plurals): try: diff --git a/docs/cmdline.rst b/docs/cmdline.rst index e8221dd0e..de28e1ddb 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -326,13 +326,65 @@ The compendium can be used in two modes: from the compendium take priority and replace those in the output file. If a translation is used from the compendium, a comment noting the source is added -The ``input-files`` option includes def.po, a file with obsolete translations, and ref.pot, +The ``input-files`` option accepts exactly two arguments: a file with obsolete translations, and the current template file for updating translations. The ``compendium`` option can be specified multiple times to use several compendiums. The ``backup`` option is used to create a backup copy of the def.po file, which contains -obsolete translations +obsolete translations. -The ``suffix`` option allows you to specify a custom suffix for the backup file -By default, a standard suffix ``~`` is appended to the backup file's name, +The ``suffix`` option allows you to specify a custom suffix for the backup file (defaulting to ``~``). + +pybable concat and merge usage scenarios +====== + +1. Merging Multiple PO Files (`concat`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel concat [options] ` +Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. + +**Example:** + +.. code-block:: shell + + pybabel concat -o merged.po module1.po module2.po module3.po + +**Features:** + +- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. +- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. +- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. +- Output can be sorted alphabetically or by source file (options `-s`, `-F`). + +**Typical Use Case:** + + A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. + + +2. Updating Translations with a Template and Compendium (`merge`) +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +**Usage:** +`pybabel merge [options] def.po ref.pot` +You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). + +**Example:** + +.. code-block:: shell + + pybabel merge -C my-compendium.po --backup def.po ref.pot + +**Features:** + +- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. +- By default, translations from the compendium are used only for new or missing entries in `def.po`. +- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). +- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). +- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). + +**Typical Use Case:** + + After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. diff --git a/docs/concat_merge_usage.rst b/docs/concat_merge_usage.rst deleted file mode 100644 index 03c06d0ea..000000000 --- a/docs/concat_merge_usage.rst +++ /dev/null @@ -1,52 +0,0 @@ -Usage scenarios ---------------- - -1. Merging Multiple PO Files (`concat`) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Usage:** -`pybabel concat [options] ` -Suppose you manage a project with several PO files for the same language (for example, modules or plugins have their own translations), and you want to combine them into a single file for further work or for delivery to translators. - -**Example:** - -.. code-block:: shell - - pybabel concat -o merged.po module1.po module2.po module3.po - -**Features:** - -- If the same string has different translations in different files, the resulting file for that string will include a special comment ``#-#-#-#-# (PROJECT VERSION) #-#-#-#-#`` and the message will be marked with the ``fuzzy`` flag—this is useful for later manual conflict resolution. -- You can keep only unique strings using the ``-u`` (`--less-than=2`) option. -- Use `--use-first` to take only the first encountered translation for each string, skipping automatic merging of multiple options. -- Output can be sorted alphabetically or by source file (options `-s`, `-F`). - -**Typical Use Case:** - - A project has translations from different teams. Before releasing, you need to gather all translations into one file, resolve possible conflicts, and provide the finalized version to translators for review. - - -2. Updating Translations with a Template and Compendium (`merge`) -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -**Usage:** -`pybabel merge [options] def.po ref.pot` -You need to update an existing translation file (`def.po`) based on a new template (`ref.pot`), reusing translations from an additional translation memory (compendium). - -**Example:** - -.. code-block:: shell - - pybabel merge -C my-compendium.po --backup def.po ref.pot - -**Features:** - -- The compendium (`-C`) allows you to pull translations from a shared translation memory. Multiple compendiums can be used. -- By default, translations from the compendium are used only for new or missing entries in `def.po`. -- The `--compendium-overwrite` option allows overwriting existing translations with those found in the compendium (helpful for terminology standardization). -- When a translation from the compendium is used, a comment is automatically added (this can be disabled with `--no-compendium-comment`). -- The `--backup` flag saves a backup copy of your file before updating (`~` suffix by default, configurable with `--suffix`). - -**Typical Use Case:** - - After a release, a new translation template is provided. The team decides to enrich the translation by leveraging a common compendium in order to improve quality and unify terms. The merge command is run with the compendium and backup options enabled. From f1618e5590c68d0ef325e7f11fa4fd8fac152f97 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Sun, 27 Apr 2025 15:19:12 +0300 Subject: [PATCH 19/25] Add '_conflicts' getter in catalog --- babel/messages/catalog.py | 3 +++ babel/messages/pofile.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index f47fc7adc..089b18b85 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -802,6 +802,9 @@ def add_conflict(self, message: Message, filename: str, project: str, version: s if fuzzy: message.flags |= {'fuzzy'} + def get_conflicts(self, id: _MessageID) -> list[ConflictInfo]: + return self._conflicts.get(id, []) + def add( self, id: _MessageID, diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index bb4de1b8f..edcb733bb 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -751,7 +751,7 @@ def _format_message(message, prefix=''): norm_previous_id = normalize(message.previous_id[1], width=width) yield from _format_comment(f'msgid_plural {norm_previous_id}', prefix='|') - if len(conflicts := catalog._conflicts.get(message.id, [])) > 0: + if len(conflicts := catalog.get_conflicts(message.id)) > 0: yield from _format_conflict(message.id, conflicts) else: yield from _format_message(message) From 6dd98e63b68d27df974b39ecc21603a38b9cb99f Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Wed, 8 Apr 2026 19:23:57 +0300 Subject: [PATCH 20/25] Remove fuzzy flag mutation from add_conflict The add_conflict method was mutating the message's fuzzy flag as a side effect, which is unexpected. The caller (ConcatenateCatalog.run) now explicitly sets the fuzzy flag when adding a conflicting message. --- babel/messages/catalog.py | 5 +---- babel/messages/frontend.py | 1 + 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 089b18b85..9168eb993 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -790,7 +790,7 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: ) self._messages[key] = message - def add_conflict(self, message: Message, filename: str, project: str, version: str, fuzzy: bool = True): + def add_conflict(self, message: Message, filename: str, project: str, version: str): key = message.id self._conflicts[key].append({ 'message': message, @@ -799,9 +799,6 @@ def add_conflict(self, message: Message, filename: str, project: str, version: s 'version': version, }) - if fuzzy: - message.flags |= {'fuzzy'} - def get_conflicts(self, id: _MessageID) -> list[ConflictInfo]: return self._conflicts.get(id, []) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 33371849e..f8dc41574 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -995,6 +995,7 @@ def run(self): if count > 1 and not self.use_first and diff_string_count > 1: filename = os.path.basename(path) catalog.add_conflict(message, filename, template.project, template.version) + message.flags |= {'fuzzy'} catalog[message.id] = message From ad322bdce77824918824e8580cc58e782f8f89c5 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Wed, 8 Apr 2026 19:36:28 +0300 Subject: [PATCH 21/25] Refactor ConcatenateCatalog: stdout output, mutual exclusivity, better help texts --- babel/messages/frontend.py | 64 ++++++++++++-------- tests/messages/frontend/test_concat_merge.py | 54 +++++++++++------ 2 files changed, 73 insertions(+), 45 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index f8dc41574..1dab83d3e 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -21,7 +21,7 @@ import sys import tempfile import warnings -from collections import defaultdict +from collections import Counter, defaultdict from configparser import RawConfigParser from io import StringIO from typing import Any, BinaryIO, Iterable, Literal @@ -892,19 +892,20 @@ class ConcatenateCatalog(CommandMixin): description = 'concatenates the specified PO files into single one' user_options = [ ('input-files', None, 'input files'), - ('output-file=', 'o', 'write output to specified file'), - ('less-than=', '<', 'print messages with less than this many' - 'definitions, defaults to infinite if not set '), + ('output-file=', 'o', 'write output to specified file, the results are written ' + 'to standard output if no output file is specified or if it is \'-\''), + ('less-than=', '<', 'print messages with less than this many ' + 'definitions, defaults to infinite if not set'), ('more-than=', '>', 'print messages with more than this many ' 'definitions, defaults to 0 if not set'), ('unique', 'u', 'shorthand for --less-than=2, requests ' 'that only unique messages be printed'), ('use-first', None, 'use first available translation for each ' 'message, don\'t merge several translations'), - ('no-location', None, 'do not write \'#: filename:line\' lines'), - ('width=', 'w', 'set output page width'), + ('no-location', None, 'do not include location comments with filename and line number'), + ('width=', 'w', 'set output line width (default 76)'), ('no-wrap', None, 'do not break long message lines, longer than ' - 'the output page width, into several lines'), + 'the output line width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), ] @@ -937,8 +938,6 @@ def initialize_options(self): def finalize_options(self): if not self.input_files: raise OptionError('you must specify the input files') - if not self.output_file: - raise OptionError('you must specify the output file') if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") @@ -953,31 +952,34 @@ def finalize_options(self): self.more_than = int(self.more_than) if self.less_than is not None: self.less_than = int(self.less_than) + if self.unique: + if self.less_than is not None or self.more_than: + raise OptionError("'--unique' is mutually exclusive with '--less-than' and '--more-than'") self.less_than = 2 - def _prepare(self): + def _collect_message_info(self): templates: list[tuple[str, Catalog]] = [] - message_info = {} + message_counts: Counter = Counter() + message_strings: dict[object, set] = defaultdict(set) for filename in self.input_files: with open(filename, 'r') as pofile: template = read_po(pofile) for message in template: - if message.id not in message_info: - message_info[message.id] = { - 'count': 0, - 'strings': set(), - } - message_info[message.id]['count'] += 1 - message_info[message.id]['strings'].add(message.string if isinstance(message.string, str) else tuple(message.string)) - templates.append((filename, template, )) + if not message.id: + continue + message_counts[message.id] += 1 + message_strings[message.id].add( + message.string if isinstance(message.string, str) else tuple(message.string) + ) + templates.append((filename, template)) - return templates, message_info + return templates, message_counts, message_strings def run(self): catalog = Catalog(fuzzy=False) - templates, message_info = self._prepare() + templates, message_counts, message_strings = self._collect_message_info() for path, template in templates: if catalog.locale is None: @@ -987,12 +989,11 @@ def run(self): if not message.id: continue - count = message_info[message.id]['count'] - diff_string_count = len(message_info[message.id]['strings']) + count = message_counts[message.id] if count <= self.more_than or (self.less_than is not None and count >= self.less_than): continue - if count > 1 and not self.use_first and diff_string_count > 1: + if count > 1 and not self.use_first and len(message_strings[message.id]) > 1: filename = os.path.basename(path) catalog.add_conflict(message, filename, template.project, template.version) message.flags |= {'fuzzy'} @@ -1001,15 +1002,26 @@ def run(self): catalog.fuzzy = any(message.fuzzy for message in catalog) - with open(self.output_file, 'wb') as outfile: + output_file = self.output_file + if not output_file or output_file == '-': write_po( - outfile, + sys.stdout.buffer, catalog, width=self.width, sort_by_file=self.sort_by_file, sort_output=self.sort_output, no_location=self.no_location, ) + else: + with open(output_file, 'wb') as outfile: + write_po( + outfile, + catalog, + width=self.width, + sort_by_file=self.sort_by_file, + sort_output=self.sort_output, + no_location=self.no_location, + ) class MergeCatalog(CommandMixin): diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py index c4fcd4a0f..31845f502 100644 --- a/tests/messages/frontend/test_concat_merge.py +++ b/tests/messages/frontend/test_concat_merge.py @@ -73,25 +73,27 @@ def teardown_method(self): def _get_expected(self, messages, fuzzy=False): date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - return fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -#{'\n#, fuzzy' if fuzzy else ''} -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -""" + messages + fuzzy_header = '\n#, fuzzy' if fuzzy else '' + return ( + "# Translations template for PROJECT.\n" + "# Copyright (C) 1994 ORGANIZATION\n" + "# This file is distributed under the same license as the PROJECT project.\n" + "# FIRST AUTHOR , 1994.\n" + "#" + fuzzy_header + "\n" + 'msgid ""\n' + 'msgstr ""\n' + '"Project-Id-Version: PROJECT VERSION\\n"\n' + '"Report-Msgid-Bugs-To: EMAIL@ADDRESS\\n"\n' + f'"POT-Creation-Date: {date}\\n"\n' + '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n' + '"Last-Translator: FULL NAME \\n"\n' + '"Language-Team: LANGUAGE \\n"\n' + '"MIME-Version: 1.0\\n"\n' + '"Content-Type: text/plain; charset=utf-8\\n"\n' + '"Content-Transfer-Encoding: 8bit\\n"\n' + f'"Generated-By: Babel {VERSION}\\n"\n' + "\n" + ) + messages def test_no_input_files(self): with pytest.raises(OptionError): @@ -99,6 +101,19 @@ def test_no_input_files(self): def test_no_output_file(self): self.cmd.input_files = ['project/i18n/messages.pot'] + self.cmd.finalize_options() # output_file not required; defaults to stdout + + def test_unique_exclusive_with_less_than(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.unique = True + self.cmd.less_than = 3 + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def test_unique_exclusive_with_more_than(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.unique = True + self.cmd.more_than = 1 with pytest.raises(OptionError): self.cmd.finalize_options() @@ -236,6 +251,7 @@ def test_unique(self): actual_content = f.read() assert expected_content == actual_content + self.cmd.unique = False self.cmd.less_than = 2 self.cmd.finalize_options() self.cmd.run() From 9644a86d47e5f4d449a7cd9b70a71d5f54c2970a Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Wed, 8 Apr 2026 19:41:36 +0300 Subject: [PATCH 22/25] Fix MergeCatalog options and update docs --- babel/messages/frontend.py | 54 +++++++++++++++++---------------- docs/cmdline.rst | 62 +++++++++++++++++--------------------- 2 files changed, 55 insertions(+), 61 deletions(-) diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 1dab83d3e..e225d2a90 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1025,22 +1025,22 @@ def run(self): class MergeCatalog(CommandMixin): - description='updates translation PO file by merging them with updated template POT file with using compendium' - user_options=[ - ('input-files', None, 'def.po (obsolete translations) ref.pot (actual template)'), + description = 'update a PO file by merging it with a newer POT template, optionally using a compendium' + user_options = [ + ('input-files', None, 'exactly two input files: def.po (obsolete translations); ref.pot (current template)'), ('compendium=', 'C', 'additional library of message translations, may be specified more than once'), - ('compendium-overwrite', '', 'overwrite mode of compendium'), - ('no-compendium-comment', '', ''), - ('update', 'U', 'pdate def.po, do nothing if def.po already up to date'), + ('compendium-overwrite', None, 'overwrite existing translations with compendium entries'), + ('no-compendium-comment', None, 'do not add a comment for translations taken from a compendium'), + ('update', 'U', 'update def.po, do nothing if def.po already up to date'), ('output-file=', 'o', 'write output to specified file, the results are written ' 'to standard output if no output file is specified'), ('backup', None, 'make a backup of def.po'), - ('suffix=', None, 'override the usual backup suffix'), + ('suffix=', None, 'use SUFFIX as backup suffix instead of ~ (tilde)'), ('no-fuzzy-matching', 'N', 'do not use fuzzy matching'), - ('no-location', None, 'suppress \'#: filename:line\' lines'), - ('width=', 'w', 'set output page width'), + ('no-location', None, 'do not include location comments with filename and line number'), + ('width=', 'w', 'set output line width (default 76)'), ('no-wrap', None, 'do not break long message lines, longer ' - 'than the output page width, into several lines'), + 'than the output line width, into several lines'), ('sort-output', 's', 'generate sorted output'), ('sort-by-file', 'F', 'sort output by file location'), ] @@ -1048,7 +1048,7 @@ class MergeCatalog(CommandMixin): as_args = 'input-files' multiple_value_options = ( - 'compendium' + 'compendium', ) boolean_options = [ @@ -1081,9 +1081,11 @@ def initialize_options(self): def finalize_options(self): if not self.input_files or len(self.input_files) != 2: - raise OptionError('must be two po files') + raise OptionError( + f'exactly two input files are required (def.po and ref.pot), got: {self.input_files!r}' + ) if not self.output_file and not self.update: - raise OptionError('you must specify the output file or update existing') + raise OptionError('you must specify the output file or use --update') if self.no_wrap and self.width: raise OptionError("'--no-wrap' and '--width' are mutually exclusive") @@ -1092,8 +1094,10 @@ def finalize_options(self): elif self.width is not None: self.width = int(self.width) - def _get_message_from_compendium(self, compendium): - for file_path in compendium: + def _get_messages_from_compendiums(self, compendium_paths): + if not compendium_paths: + return + for file_path in compendium_paths: with open(file_path, 'r') as pofile: catalog = read_po(pofile) for message in catalog: @@ -1111,19 +1115,17 @@ def run(self): no_fuzzy_matching=self.no_fuzzy_matching ) - if self.compendium: - for message, compendium_path in self._get_message_from_compendium(self.compendium): - current = catalog[message.id] - if message.id in catalog and (not current.string or current.fuzzy or self.compendium_overwrite): - if self.compendium_overwrite and not current.fuzzy and current.string: - catalog.obsolete[message.id] = current.clone() + for message, compendium_path in self._get_messages_from_compendiums(self.compendium): + if (current := catalog.get(message.id)) and (not current.string or current.fuzzy or self.compendium_overwrite): + if self.compendium_overwrite and not current.fuzzy and current.string: + catalog.obsolete[message.id] = current.clone() - current.string = message.string - if current.fuzzy: - current.flags.remove('fuzzy') + current.string = message.string + if current.fuzzy: + current.flags.remove('fuzzy') - if not self.no_compendium_comment: - current.auto_comments.append(compendium_path) + if not self.no_compendium_comment: + current.auto_comments.append(compendium_path) catalog.fuzzy = any(message.fuzzy for message in catalog) output_path = def_file if self.update else self.output_file diff --git a/docs/cmdline.rst b/docs/cmdline.rst index de28e1ddb..672bbfe70 100644 --- a/docs/cmdline.rst +++ b/docs/cmdline.rst @@ -251,11 +251,9 @@ and updates each of them. concat ====== -The `concat` command merges multiple PO files into a single one. If a message has +The ``concat`` command merges multiple PO files into one. If a message has different translations in different PO files, the conflicting translations are -marked with a conflict comment:: - #-#-#-#-# (PROJECT VERSION) #-#-#-#-# -and the message itself is marked with a `fuzzy` flag:: +marked with a conflict comment and the message itself is marked with a ``fuzzy`` flag:: $ pybabel concat --help Usage: pybabel concat [options] @@ -265,79 +263,73 @@ and the message itself is marked with a `fuzzy` flag:: Options: -h, --help show this help message and exit -o OUTPUT_FILE, --output-file=OUTPUT_FILE - write output to specified file + write output to specified file, the results are written + to standard output if no output file is specified or if it is '-' --less-than=NUMBER print messages with less than this many definitions, defaults to infinite if not set --more-than=NUMBER print messages with more than this many definitions, defaults to 0 if not set - -u, unique shorthand for --less-than=2, requests + -u, --unique shorthand for --less-than=2, requests that only unique messages be printed --use-first use first available translation for each message, don't merge several translations - --no-location do not write '#: filename:line' lines + --no-location do not include location comments with filename and line number -w WIDTH, --width=WIDTH - set output page width + set output line width (default 76) --no-wrap do not break long message lines, longer than - the output page width, into several lines + the output line width, into several lines -s, --sort-output generate sorted output -F, --sort-by-file sort output by file location merge -====== +===== -The `merge` command allows updating files using a compendium as a translation memory:: +The ``merge`` command allows updating files, optionally using a compendium as a translation memory:: - $ pybabel concat --help + $ pybabel merge --help Usage: pybabel merge [options] - updates translation PO file by merging them with updated template - POT file with using compendium + update a PO file by merging it with a newer POT template, optionally using a compendium Options: -C COMPENDIUM_FILE, --compendium=COMPENDIUM_FILE additional library of message translations, may be specified more than once --compendium-overwrite - overwrite mode of compendium + overwrite existing translations with compendium entries --no-compendium-comment - do not add a comment indicating that the message is - taken from the compendium - -U, --update update def.po, do nothing if def.po already up to date, + do not add a comment for translations taken from a compendium + -U, --update update def.po, do nothing if def.po already up to date -o OUTPUT_FILE, --output-file=OUTPUT_FILE write output to specified file, the results are written to standard output if no output file is specified --backup make a backup of def.po - --suffix=SUFFIX override the usual backup suffix (default '~') + --suffix=SUFFIX use SUFFIX as backup suffix instead of ~ (tilde) -N, --no-fuzzy-matching do not use fuzzy matching - --no-location suppress '#: filename:line' lines' + --no-location do not include location comments with filename and line number -w WIDTH, --width=WIDTH - set output page width + set output line width (default 76) --no-wrap do not break long message lines, longer - than the output page width, into several lines + than the output line width, into several lines -s, --sort-output generate sorted output - -F --sort-by-file sort output by file location - -The compendium can be used in two modes: -- Default mode: the translations from the compendium are used - only if they are missing in the output file. - -- Compendium overwrite mode: when using the ``compendium-overwrite`` option, translations - from the compendium take priority and replace those in the output file. If a translation - is used from the compendium, a comment noting the source is added + -F, --sort-by-file sort output by file location The ``input-files`` option accepts exactly two arguments: a file with obsolete translations, and the current template file for updating translations. The ``compendium`` option can be specified multiple times to use several compendiums. +The compendium can be used in two modes: -The ``backup`` option is used to create a backup copy of the def.po file, which contains -obsolete translations. +- Default mode: translations from the compendium are used only if they are missing in the output file. +- Compendium overwrite mode: when using the ``--compendium-overwrite`` option, translations + from the compendium take priority and replace those in the output file. +The ``backup`` option is used to create a backup copy of the def.po file before updating it. The ``suffix`` option allows you to specify a custom suffix for the backup file (defaulting to ``~``). -pybable concat and merge usage scenarios -====== +concat and merge usage scenarios +================================= 1. Merging Multiple PO Files (`concat`) ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ From 97f1e088186b84ae4d08d145b2ca3fd6aec8a124 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Wed, 8 Apr 2026 19:49:40 +0300 Subject: [PATCH 23/25] Add tests for ConcatenateCatalog and MergeCatalog --- tests/messages/frontend/test_concat_merge.py | 272 +++++++++++++++++++ 1 file changed, 272 insertions(+) diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py index 31845f502..e42c0c966 100644 --- a/tests/messages/frontend/test_concat_merge.py +++ b/tests/messages/frontend/test_concat_merge.py @@ -12,9 +12,12 @@ from __future__ import annotations +import io import os import shutil +import sys from datetime import datetime +from unittest.mock import patch import pytest from freezegun import freeze_time @@ -300,6 +303,137 @@ def test_more_than(self): actual_content = f.read() assert expected_content == actual_content + def test_no_wrap_width_exclusive(self): + self.cmd.input_files = [self.temp1] + self.cmd.no_wrap = True + self.cmd.width = 80 + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def _capture_stdout(self): + buf = io.BytesIO() + + class FakeStdout: + buffer = buf + + return FakeStdout(), buf + + def test_stdout_output(self): + self.cmd.input_files = [self.temp1] + self.cmd.finalize_options() + + fake_stdout, buf = self._capture_stdout() + with patch('sys.stdout', fake_stdout): + self.cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + assert 'msgstr "Other 1"' in content + assert 'msgid "same"' in content + + def test_stdout_dash(self): + self.cmd.input_files = [self.temp1] + self.cmd.output_file = '-' + self.cmd.finalize_options() + + fake_stdout, buf = self._capture_stdout() + with patch('sys.stdout', fake_stdout): + self.cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + + def test_same_string_no_conflict(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + same_block = [line for line in content.split('\n\n') if 'msgid "same"' in line] + assert same_block + block = same_block[0] + assert 'fuzzy' not in block + assert '#-#-#-#-#' not in block + assert 'msgstr "Same"' in block + + def test_no_location(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.no_location = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert '#: ' not in content + assert 'msgid "other1"' in content + + def test_sort_output(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.sort_output = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + msgid_positions = { + 'almost_same': content.index('msgid "almost_same"'), + 'other1': content.index('msgid "other1"'), + 'other2': content.index('msgid "other2"'), + 'other3': content.index('msgid "other3"'), + 'other4': content.index('msgid "other4"'), + 'same': content.index('msgid "same"'), + } + ordered = sorted(msgid_positions, key=msgid_positions.get) + assert ordered == ['almost_same', 'other1', 'other2', 'other3', 'other4', 'same'] + + def test_single_input_file(self): + self.cmd.input_files = [self.temp1] + self.cmd.output_file = self.output_file + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert 'msgid "other1"' in content + assert 'msgid "other2"' in content + assert 'msgid "same"' in content + assert '#-#-#-#-#' not in content + assert 'fuzzy' not in content + + def test_unique_exclusive_with_more_than_nonzero(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.unique = True + self.cmd.more_than = 0 + self.cmd.finalize_options() + + def test_less_than_equivalent_to_unique(self): + self.cmd.input_files = [self.temp1, self.temp2] + self.cmd.output_file = self.output_file + self.cmd.less_than = 2 + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + less_than_content = f.read() + + self.cmd.less_than = None + self.cmd.unique = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + unique_content = f.read() + + assert less_than_content == unique_content + class TestMergeCatalog: @@ -532,3 +666,141 @@ def test_update_backup(self): with open(self.temp_def + '.bac', 'r') as f: actual_content = f.read() assert before_content == actual_content + + def test_no_wrap_width_exclusive(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.no_wrap = True + self.cmd.width = 80 + with pytest.raises(OptionError): + self.cmd.finalize_options() + + def test_compendium_with_comment(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = [self.compendium] + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert f'#. {self.compendium}' in content + assert 'msgid "word4"' in content + assert 'msgstr "Word 4"' in content + + def test_compendium_does_not_overwrite_existing(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = [self.compendium] + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + blocks = content.split('\n\n') + word1_block = next((b for b in blocks if 'msgid "word1"' in b), None) + assert word1_block is not None + assert 'msgstr "Word 1"' in word1_block + assert 'Comp Word 1' not in word1_block + + def test_multiple_compendiums(self): + compendium2 = f'{i18n_dir}/compendium2.po' + try: + with open(compendium2, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 from comp2') + pofile.write_po(f, cat) + + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = [self.compendium, compendium2] + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert 'msgstr "Word 4"' in content + assert 'msgstr "Word 3 from comp2"' in content + finally: + if os.path.exists(compendium2): + os.unlink(compendium2) + + def test_compendium_fills_empty_translation(self): + compendium_with_word3 = f'{i18n_dir}/comp_word3.po' + try: + with open(compendium_with_word3, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 comp') + pofile.write_po(f, cat) + + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = [compendium_with_word3] + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert 'msgstr "Word 3 comp"' in content + finally: + if os.path.exists(compendium_with_word3): + os.unlink(compendium_with_word3) + + def test_obsolete_messages(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.no_fuzzy_matching = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert '#~ msgid' not in content + + extra_def = f'{i18n_dir}/extra_def.po' + try: + with open(extra_def, 'wb') as f: + cat = Catalog() + cat.add('word1', string='Word 1') + cat.add('old_word', string='Old Word') + pofile.write_po(f, cat) + + self.cmd.input_files = [extra_def, self.temp_ref] + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + assert '#~ msgid "old_word"' in content + assert '#~ msgstr "Old Word"' in content + finally: + if os.path.exists(extra_def): + os.unlink(extra_def) + + def test_compendium_not_applied_for_absent_messages(self): + self.cmd.input_files = [self.temp_def, self.temp_ref] + self.cmd.output_file = self.output_file + self.cmd.compendium = [self.compendium] + self.cmd.no_fuzzy_matching = True + self.cmd.no_compendium_comment = True + self.cmd.finalize_options() + self.cmd.run() + + with open(self.output_file, 'r') as f: + content = f.read() + + active_blocks = content.split('#~')[0] + assert 'word5' not in active_blocks From 39648462c625c8af0933d0174b09406fbaf604c8 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Fri, 10 Apr 2026 15:41:29 +0300 Subject: [PATCH 24/25] Address review comments in catalog, frontend and pofile - catalog: use _key_for(id, context) in add_conflict/get_conflicts to correctly handle messages with msgctxt; add docstrings to both methods - frontend: tighten type hints (Counter[_MessageID], dict[_MessageID, ...]); remove redundant early-return guard in _get_messages_from_compendiums; initialize self.compendium as empty list - pofile: replace .startswith() calls with slice comparisons for consistency --- babel/messages/catalog.py | 26 ++++++++++++++++++++++---- babel/messages/frontend.py | 11 ++++++----- babel/messages/pofile.py | 6 +++--- 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/babel/messages/catalog.py b/babel/messages/catalog.py index 9168eb993..63baf285d 100644 --- a/babel/messages/catalog.py +++ b/babel/messages/catalog.py @@ -790,8 +790,19 @@ def __setitem__(self, id: _MessageID, message: Message) -> None: ) self._messages[key] = message - def add_conflict(self, message: Message, filename: str, project: str, version: str): - key = message.id + def add_conflict(self, message: Message, filename: str, project: str, version: str) -> None: + """Record a conflicting translation for a message. + + When the same message ID has different translations across input files, + the conflicting entry is stored and the message is marked as fuzzy in + the output catalog. + + :param message: the conflicting :class:`Message` object + :param filename: the basename of the file where the conflict originates + :param project: the project name of the conflicting file + :param version: the project version of the conflicting file + """ + key = self._key_for(message.id, message.context) self._conflicts[key].append({ 'message': message, 'filename': filename, @@ -799,8 +810,15 @@ def add_conflict(self, message: Message, filename: str, project: str, version: s 'version': version, }) - def get_conflicts(self, id: _MessageID) -> list[ConflictInfo]: - return self._conflicts.get(id, []) + def get_conflicts(self, id: _MessageID, context: str | None = None) -> list[ConflictInfo]: + """Return all recorded conflicts for a message ID. + + :param id: the message ID to look up conflicts for + :param context: optional message context (msgctxt) + :return: list of :class:`ConflictInfo` dicts, or an empty list if none + """ + key = self._key_for(id, context) + return self._conflicts.get(key, []) def add( self, diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index e225d2a90..27315affe 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -24,7 +24,10 @@ from collections import Counter, defaultdict from configparser import RawConfigParser from io import StringIO -from typing import Any, BinaryIO, Iterable, Literal +from typing import TYPE_CHECKING, Any, BinaryIO, Iterable, Literal + +if TYPE_CHECKING: + from babel.messages.catalog import _MessageID from babel import Locale, localedata from babel import __version__ as VERSION @@ -960,8 +963,8 @@ def finalize_options(self): def _collect_message_info(self): templates: list[tuple[str, Catalog]] = [] - message_counts: Counter = Counter() - message_strings: dict[object, set] = defaultdict(set) + message_counts: Counter[_MessageID] = Counter() + message_strings: dict[_MessageID, set[str | tuple[str, ...]]] = defaultdict(set) for filename in self.input_files: with open(filename, 'r') as pofile: @@ -1095,8 +1098,6 @@ def finalize_options(self): self.width = int(self.width) def _get_messages_from_compendiums(self, compendium_paths): - if not compendium_paths: - return for file_path in compendium_paths: with open(file_path, 'r') as pofile: catalog = read_po(pofile) diff --git a/babel/messages/pofile.py b/babel/messages/pofile.py index edcb733bb..4002ce8b4 100644 --- a/babel/messages/pofile.py +++ b/babel/messages/pofile.py @@ -351,11 +351,11 @@ def parse(self, fileobj: IO[AnyStr] | Iterable[AnyStr]) -> None: continue if needs_decode: line = line.decode(self.catalog.charset) - if line.startswith('#'): - if line[1:].startswith('-'): + if line[:1] == '#': + if line[1:2] == '-': self._invalid_pofile(line, lineno, 'cannot parse po file with conflicts') - if line[1:].startswith('~'): + if line[1:2] == '~': self._process_message_line(lineno, line[2:].lstrip(), obsolete=True) else: try: From 3f6bbbbbd6d27cb45c8b3ee55d4eed4737633c41 Mon Sep 17 00:00:00 2001 From: Pavel Bozin Date: Fri, 10 Apr 2026 15:54:48 +0300 Subject: [PATCH 25/25] Refactor tests: split test_concat_merge.py into test_concat.py and test_merge.py - Use pytest fixtures (tmp_path, monkeypatch) instead of setup_method/teardown_method - Use pathlib.Path for file paths - Replace FakeStdout class with monkeypatch.setattr on sys.stdout - Replace snapshot assertions with targeted behavioral checks - Fix compendium typo (compenidum -> compendium) - Fix self.compendium default to [] so _get_messages_from_compendiums is safe without guard - Add test_conflicted_po_raises_on_read --- babel/messages/frontend.py | 2 +- tests/messages/frontend/test_concat.py | 334 ++++++++ tests/messages/frontend/test_concat_merge.py | 806 ------------------- tests/messages/frontend/test_merge.py | 329 ++++++++ 4 files changed, 664 insertions(+), 807 deletions(-) create mode 100644 tests/messages/frontend/test_concat.py delete mode 100644 tests/messages/frontend/test_concat_merge.py create mode 100644 tests/messages/frontend/test_merge.py diff --git a/babel/messages/frontend.py b/babel/messages/frontend.py index 27315affe..ff647bbbf 100644 --- a/babel/messages/frontend.py +++ b/babel/messages/frontend.py @@ -1068,7 +1068,7 @@ class MergeCatalog(CommandMixin): def initialize_options(self): self.input_files = None - self.compendium = None + self.compendium: list[str] = [] self.compendium_overwrite = False self.no_compendium_comment = False self.update = False diff --git a/tests/messages/frontend/test_concat.py b/tests/messages/frontend/test_concat.py new file mode 100644 index 000000000..a16b4d4d8 --- /dev/null +++ b/tests/messages/frontend/test_concat.py @@ -0,0 +1,334 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import contextlib +import io +import pathlib +import sys + +import pytest +from freezegun import freeze_time + +from babel.messages import Catalog, frontend, pofile +from babel.messages.frontend import OptionError +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA +from tests.messages.utils import Distribution + + +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield + + +@pytest.fixture +def concat_cmd(): + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + cmd = frontend.ConcatenateCatalog(dist) + cmd.initialize_options() + return cmd + + +@pytest.fixture +def po_files(tmp_path: pathlib.Path): + temp1 = tmp_path / 'msgcat_temp1.po' + temp2 = tmp_path / 'msgcat_temp2.po' + + with open(temp1, 'wb') as file: + catalog = Catalog() + catalog.add('other1', string='Other 1', locations=[('simple.py', 1)], flags=['flag1000']) + catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) + catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) + catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals'), locations=[('simple.py', 2000)]) + pofile.write_po(file, catalog) + + with open(temp2, 'wb') as file: + catalog = Catalog() + catalog.add('other3', string='Other 3', locations=[('hard.py', 1)]) + catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) + catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) + catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) + catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) + pofile.write_po(file, catalog) + + return temp1, temp2 + + +def test_no_input_files(concat_cmd): + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_no_output_file(concat_cmd): + concat_cmd.input_files = ['project/i18n/messages.pot'] + concat_cmd.finalize_options() # output_file not required; defaults to stdout + + +def test_unique_exclusive_with_less_than(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.less_than = 3 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_unique_exclusive_with_more_than(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.more_than = 1 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_default(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' in content + assert 'msgstr "Other 1"' in content + assert 'msgid "other3"' in content + + assert 'msgid "same"' in content + assert 'msgstr "Same"' in content + assert content.count('#-#-#-#-# msgcat_temp1.po') == 0 or 'msgid "same"' not in [ + block for block in content.split('\n\n') if '#-#-#-#-#' in block + ] + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' in almost_same_block + assert '#-#-#-#-#' in almost_same_block + assert 'Almost same' in almost_same_block + assert 'A bit same' in almost_same_block + + plural_block = next(b for b in content.split('\n\n') if 'msgid "plural"' in b) + assert 'fuzzy' in plural_block + assert '#-#-#-#-#' in plural_block + + +def test_use_first(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.use_first = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert '#-#-#-#-#' not in content + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' not in almost_same_block + assert 'msgstr "Almost same"' in almost_same_block + + plural_block = next(b for b in content.split('\n\n') if 'msgid "plural"' in b) + assert 'fuzzy' not in plural_block + assert 'msgstr[0] "Plural"' in plural_block + assert 'msgstr[1] "Plurals"' in plural_block + + +def test_unique(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.unique = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' in content + assert 'msgid "other2"' in content + assert 'msgid "other3"' in content + assert 'msgid "other4"' in content + assert 'msgid "same"' not in content + assert 'msgid "almost_same"' not in content + + +def test_less_than_equivalent_to_unique(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.less_than = 2 + concat_cmd.finalize_options() + concat_cmd.run() + less_than_content = output_file.read_text() + + concat_cmd.less_than = None + concat_cmd.unique = True + concat_cmd.finalize_options() + concat_cmd.run() + unique_content = output_file.read_text() + + assert less_than_content == unique_content + + +def test_more_than(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.more_than = 1 + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + + assert 'msgid "other1"' not in content + assert 'msgid "other3"' not in content + assert 'msgid "same"' in content + assert 'msgid "almost_same"' in content + assert 'msgid "plural"' in content + + almost_same_block = next(b for b in content.split('\n\n') if 'msgid "almost_same"' in b) + assert 'fuzzy' in almost_same_block + + +def test_no_wrap_width_exclusive(concat_cmd, po_files): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.no_wrap = True + concat_cmd.width = 80 + with pytest.raises(OptionError): + concat_cmd.finalize_options() + + +def test_stdout_output(concat_cmd, po_files, monkeypatch): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.finalize_options() + + buf = io.BytesIO() + monkeypatch.setattr(sys, 'stdout', type('FakeStdout', (), {'buffer': buf})()) + concat_cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + assert 'msgstr "Other 1"' in content + assert 'msgid "same"' in content + + +def test_stdout_dash(concat_cmd, po_files, monkeypatch): + temp1, _ = po_files + concat_cmd.input_files = [str(temp1)] + concat_cmd.output_file = '-' + concat_cmd.finalize_options() + + buf = io.BytesIO() + monkeypatch.setattr(sys, 'stdout', type('FakeStdout', (), {'buffer': buf})()) + concat_cmd.run() + + content = buf.getvalue().decode('utf-8') + assert 'msgid "other1"' in content + + +def test_same_string_no_conflict(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + same_block = next(b for b in content.split('\n\n') if 'msgid "same"' in b) + assert 'fuzzy' not in same_block + assert '#-#-#-#-#' not in same_block + assert 'msgstr "Same"' in same_block + + +def test_no_location(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.no_location = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + assert '#: ' not in content + assert 'msgid "other1"' in content + + +def test_sort_output(concat_cmd, po_files, tmp_path): + temp1, temp2 = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.output_file = str(output_file) + concat_cmd.sort_output = True + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + msgid_positions = { + 'almost_same': content.index('msgid "almost_same"'), + 'other1': content.index('msgid "other1"'), + 'other2': content.index('msgid "other2"'), + 'other3': content.index('msgid "other3"'), + 'other4': content.index('msgid "other4"'), + 'same': content.index('msgid "same"'), + } + ordered = sorted(msgid_positions, key=msgid_positions.get) + assert ordered == ['almost_same', 'other1', 'other2', 'other3', 'other4', 'same'] + + +def test_single_input_file(concat_cmd, po_files, tmp_path): + temp1, _ = po_files + output_file = tmp_path / 'msgcat.po' + concat_cmd.input_files = [str(temp1)] + concat_cmd.output_file = str(output_file) + concat_cmd.finalize_options() + concat_cmd.run() + + content = output_file.read_text() + assert 'msgid "other1"' in content + assert 'msgid "other2"' in content + assert 'msgid "same"' in content + assert '#-#-#-#-#' not in content + assert 'fuzzy' not in content + + +def test_unique_exclusive_with_more_than_nonzero(concat_cmd, po_files): + temp1, temp2 = po_files + concat_cmd.input_files = [str(temp1), str(temp2)] + concat_cmd.unique = True + concat_cmd.more_than = 0 + concat_cmd.finalize_options() + + +def test_conflicted_po_raises_on_read(tmp_path): + from babel.messages.pofile import PoFileError, read_po + + conflicted = tmp_path / 'conflicted.po' + conflicted.write_text( + 'msgid "hello"\n' + '#-#-#-#-# file1.po (PROJECT 1.0) #-#-#-#-#\n' + 'msgstr "Hello"\n' + ) + with pytest.raises(PoFileError): + with open(conflicted) as f: + read_po(f, abort_invalid=True) diff --git a/tests/messages/frontend/test_concat_merge.py b/tests/messages/frontend/test_concat_merge.py deleted file mode 100644 index e42c0c966..000000000 --- a/tests/messages/frontend/test_concat_merge.py +++ /dev/null @@ -1,806 +0,0 @@ -# -# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team -# All rights reserved. -# -# This software is licensed as described in the file LICENSE, which -# you should have received as part of this distribution. The terms -# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. -# -# This software consists of voluntary contributions made by many -# individuals. For the exact contribution history, see the revision -# history and logs, available at https://github.com/python-babel/babel/commits/master/. - -from __future__ import annotations - -import io -import os -import shutil -import sys -from datetime import datetime -from unittest.mock import patch - -import pytest -from freezegun import freeze_time - -from babel import __version__ as VERSION -from babel.dates import format_datetime -from babel.messages import Catalog, frontend, pofile -from babel.messages.frontend import OptionError -from babel.util import LOCALTZ -from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA, data_dir, i18n_dir -from tests.messages.utils import Distribution - - -@pytest.fixture(autouse=True) -def frozen_time(): - with freeze_time("1994-11-11"): - yield - - -class TestConcatanateCatalog: - - def setup_method(self): - self.olddir = os.getcwd() - os.chdir(data_dir) - - self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.ConcatenateCatalog(self.dist) - self.cmd.initialize_options() - - self.temp1 = f'{i18n_dir}/msgcat_temp1.po' - self.temp2 = f'{i18n_dir}/msgcat_temp2.po' - self.output_file = f'{i18n_dir}/msgcat.po' - - with open(self.temp1, 'wb') as file: - catalog = Catalog() - catalog.add('other1', string='Other 1', locations=[('simple.py', 1)], flags=['flag1000']) - catalog.add('other2', string='Other 2', locations=[('simple.py', 10)]) - catalog.add('same', string='Same', locations=[('simple.py', 100)], flags=['flag1', 'flag1.2']) - catalog.add('almost_same', string='Almost same', locations=[('simple.py', 1000)], flags=['flag2']) - catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals'), locations=[('simple.py', 2000)]) - pofile.write_po(file, catalog) - - with open(self.temp2, 'wb') as file: - catalog = Catalog() - catalog.add('other3', string='Other 3', locations=[('hard.py', 1)]) - catalog.add('other4', string='Other 4', locations=[('hard.py', 10)]) - catalog.add('almost_same', string='A bit same', locations=[('hard.py', 1000)], flags=['flag3']) - catalog.add('same', string='Same', locations=[('hard.py', 100)], flags=['flag4']) - catalog.add(('plural', 'plurals'), string=('Plural', 'Plurals other'), locations=[('hard.py', 2000)]) - pofile.write_po(file, catalog) - - def teardown_method(self): - for file in [self.temp1, self.temp2, self.output_file]: - if os.path.isfile(file): - os.unlink(file) - - def _get_expected(self, messages, fuzzy=False): - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - fuzzy_header = '\n#, fuzzy' if fuzzy else '' - return ( - "# Translations template for PROJECT.\n" - "# Copyright (C) 1994 ORGANIZATION\n" - "# This file is distributed under the same license as the PROJECT project.\n" - "# FIRST AUTHOR , 1994.\n" - "#" + fuzzy_header + "\n" - 'msgid ""\n' - 'msgstr ""\n' - '"Project-Id-Version: PROJECT VERSION\\n"\n' - '"Report-Msgid-Bugs-To: EMAIL@ADDRESS\\n"\n' - f'"POT-Creation-Date: {date}\\n"\n' - '"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\\n"\n' - '"Last-Translator: FULL NAME \\n"\n' - '"Language-Team: LANGUAGE \\n"\n' - '"MIME-Version: 1.0\\n"\n' - '"Content-Type: text/plain; charset=utf-8\\n"\n' - '"Content-Transfer-Encoding: 8bit\\n"\n' - f'"Generated-By: Babel {VERSION}\\n"\n' - "\n" - ) + messages - - def test_no_input_files(self): - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_no_output_file(self): - self.cmd.input_files = ['project/i18n/messages.pot'] - self.cmd.finalize_options() # output_file not required; defaults to stdout - - def test_unique_exclusive_with_less_than(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.unique = True - self.cmd.less_than = 3 - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_unique_exclusive_with_more_than(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.unique = True - self.cmd.more_than = 1 - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_default(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""#: simple.py:1 -#, flag1000 -msgid "other1" -msgstr "Other 1" - -#: simple.py:10 -msgid "other2" -msgstr "Other 2" - -#: hard.py:100 simple.py:100 -#, flag1, flag1.2, flag4 -msgid "same" -msgstr "Same" - -#: hard.py:1000 simple.py:1000 -#, flag2, flag3, fuzzy -msgid "almost_same" -msgstr "" -"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" -"Almost same" -"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" -"A bit same" - -#: hard.py:2000 simple.py:2000 -#, fuzzy -msgid "plural" -msgid_plural "plurals" -msgstr "" -"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" -msgstr[0] "Plural" -msgstr[1] "Plurals" -"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" -msgstr[0] "Plural" -msgstr[1] "Plurals other" - -#: hard.py:1 -msgid "other3" -msgstr "Other 3" - -#: hard.py:10 -msgid "other4" -msgstr "Other 4" - -""", fuzzy=True) - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_use_first(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.use_first = True - - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""#: simple.py:1 -#, flag1000 -msgid "other1" -msgstr "Other 1" - -#: simple.py:10 -msgid "other2" -msgstr "Other 2" - -#: hard.py:100 simple.py:100 -#, flag1, flag1.2, flag4 -msgid "same" -msgstr "Same" - -#: hard.py:1000 simple.py:1000 -#, flag2, flag3 -msgid "almost_same" -msgstr "Almost same" - -#: hard.py:2000 simple.py:2000 -msgid "plural" -msgid_plural "plurals" -msgstr[0] "Plural" -msgstr[1] "Plurals" - -#: hard.py:1 -msgid "other3" -msgstr "Other 3" - -#: hard.py:10 -msgid "other4" -msgstr "Other 4" - -""") - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_unique(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.unique = True - - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""#: simple.py:1 -#, flag1000 -msgid "other1" -msgstr "Other 1" - -#: simple.py:10 -msgid "other2" -msgstr "Other 2" - -#: hard.py:1 -msgid "other3" -msgstr "Other 3" - -#: hard.py:10 -msgid "other4" -msgstr "Other 4" - -""") - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - self.cmd.unique = False - self.cmd.less_than = 2 - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_more_than(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.more_than = 1 - - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""#: hard.py:100 simple.py:100 -#, flag1, flag1.2, flag4 -msgid "same" -msgstr "Same" - -#: hard.py:1000 simple.py:1000 -#, flag2, flag3, fuzzy -msgid "almost_same" -msgstr "" -"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" -"Almost same" -"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" -"A bit same" - -#: hard.py:2000 simple.py:2000 -#, fuzzy -msgid "plural" -msgid_plural "plurals" -msgstr "" -"#-#-#-#-# msgcat_temp1.po (PROJECT VERSION) #-#-#-#-#" -msgstr[0] "Plural" -msgstr[1] "Plurals" -"#-#-#-#-# msgcat_temp2.po (PROJECT VERSION) #-#-#-#-#" -msgstr[0] "Plural" -msgstr[1] "Plurals other" - -""", fuzzy=True) - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_no_wrap_width_exclusive(self): - self.cmd.input_files = [self.temp1] - self.cmd.no_wrap = True - self.cmd.width = 80 - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def _capture_stdout(self): - buf = io.BytesIO() - - class FakeStdout: - buffer = buf - - return FakeStdout(), buf - - def test_stdout_output(self): - self.cmd.input_files = [self.temp1] - self.cmd.finalize_options() - - fake_stdout, buf = self._capture_stdout() - with patch('sys.stdout', fake_stdout): - self.cmd.run() - - content = buf.getvalue().decode('utf-8') - assert 'msgid "other1"' in content - assert 'msgstr "Other 1"' in content - assert 'msgid "same"' in content - - def test_stdout_dash(self): - self.cmd.input_files = [self.temp1] - self.cmd.output_file = '-' - self.cmd.finalize_options() - - fake_stdout, buf = self._capture_stdout() - with patch('sys.stdout', fake_stdout): - self.cmd.run() - - content = buf.getvalue().decode('utf-8') - assert 'msgid "other1"' in content - - def test_same_string_no_conflict(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - same_block = [line for line in content.split('\n\n') if 'msgid "same"' in line] - assert same_block - block = same_block[0] - assert 'fuzzy' not in block - assert '#-#-#-#-#' not in block - assert 'msgstr "Same"' in block - - def test_no_location(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.no_location = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert '#: ' not in content - assert 'msgid "other1"' in content - - def test_sort_output(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.sort_output = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - msgid_positions = { - 'almost_same': content.index('msgid "almost_same"'), - 'other1': content.index('msgid "other1"'), - 'other2': content.index('msgid "other2"'), - 'other3': content.index('msgid "other3"'), - 'other4': content.index('msgid "other4"'), - 'same': content.index('msgid "same"'), - } - ordered = sorted(msgid_positions, key=msgid_positions.get) - assert ordered == ['almost_same', 'other1', 'other2', 'other3', 'other4', 'same'] - - def test_single_input_file(self): - self.cmd.input_files = [self.temp1] - self.cmd.output_file = self.output_file - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert 'msgid "other1"' in content - assert 'msgid "other2"' in content - assert 'msgid "same"' in content - assert '#-#-#-#-#' not in content - assert 'fuzzy' not in content - - def test_unique_exclusive_with_more_than_nonzero(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.unique = True - self.cmd.more_than = 0 - self.cmd.finalize_options() - - def test_less_than_equivalent_to_unique(self): - self.cmd.input_files = [self.temp1, self.temp2] - self.cmd.output_file = self.output_file - self.cmd.less_than = 2 - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - less_than_content = f.read() - - self.cmd.less_than = None - self.cmd.unique = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - unique_content = f.read() - - assert less_than_content == unique_content - - -class TestMergeCatalog: - - def setup_method(self): - self.olddir = os.getcwd() - os.chdir(data_dir) - - self.dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) - self.cmd = frontend.MergeCatalog(self.dist) - self.cmd.initialize_options() - - self.temp_def = f'{i18n_dir}/msgmerge_def.po' - self.temp_ref = f'{i18n_dir}/msgmerge_ref.pot' - self.compendium = f'{i18n_dir}/compenidum.po' - self.output_file = f'{i18n_dir}/msgmerge.po' - - with open(self.temp_ref, 'wb') as file: - catalog = Catalog() - for word in ['word1', 'word2', 'word3', 'word4']: - catalog.add(word) - pofile.write_po(file, catalog) - - with open(self.temp_def, 'wb') as file: - catalog = Catalog() - catalog.add('word1', string='Word 1') - catalog.add('word2', string='Word 2') - catalog.add('word3') - pofile.write_po(file, catalog) - - with open(self.compendium, 'wb') as file: - catalog = Catalog() - catalog.add('word1', string='Comp Word 1') - catalog.add('word2', string='Comp Word 2') - catalog.add('word4', string='Word 4') - catalog.add('word5', string='Word 5') - pofile.write_po(file, catalog) - - def teardown_method(self): - for file in [ - self.temp_def, - self.temp_def + '~', - self.temp_def + '.bac', - self.temp_ref, - self.compendium, - self.output_file - ]: - if os.path.exists(file) and os.path.isfile(file): - os.unlink(file) - - def _get_expected(self, messages): - date = format_datetime(datetime(1994, 11, 11, 00, 00), 'yyyy-MM-dd HH:mmZ', tzinfo=LOCALTZ, locale='en') - return fr"""# Translations template for PROJECT. -# Copyright (C) 1994 ORGANIZATION -# This file is distributed under the same license as the PROJECT project. -# FIRST AUTHOR , 1994. -# -#, fuzzy -msgid "" -msgstr "" -"Project-Id-Version: PROJECT VERSION\n" -"Report-Msgid-Bugs-To: EMAIL@ADDRESS\n" -"POT-Creation-Date: {date}\n" -"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n" -"Last-Translator: FULL NAME \n" -"Language-Team: LANGUAGE \n" -"MIME-Version: 1.0\n" -"Content-Type: text/plain; charset=utf-8\n" -"Content-Transfer-Encoding: 8bit\n" -"Generated-By: Babel {VERSION}\n" - -""" + messages - - def test_no_input_files(self): - with pytest.raises(OptionError): - self.cmd.finalize_options() - - with pytest.raises(OptionError): - self.cmd.input_files = ['1'] - self.cmd.finalize_options() - - with pytest.raises(OptionError): - self.cmd.input_files = ['1', '2', '3'] - self.cmd.finalize_options() - - def test_no_output_file(self): - self.cmd.input_files = ['1', '2'] - with pytest.raises(OptionError): - self.cmd.finalize_options() - - self.cmd.output_file = '2' - self.cmd.finalize_options() - - self.cmd.output_file = None - self.cmd.update = True - self.cmd.finalize_options() - - def test_default(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.no_fuzzy_matching = True - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""msgid "word1" -msgstr "Word 1" - -msgid "word2" -msgstr "Word 2" - -msgid "word3" -msgstr "" - -msgid "word4" -msgstr "" - -""") - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_compenidum(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium,] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""msgid "word1" -msgstr "Word 1" - -msgid "word2" -msgstr "Word 2" - -msgid "word3" -msgstr "" - -msgid "word4" -msgstr "Word 4" - -""") - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_compenidum_overwrite(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium,] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.compendium_overwrite = True - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""msgid "word1" -msgstr "Comp Word 1" - -msgid "word2" -msgstr "Comp Word 2" - -msgid "word3" -msgstr "" - -msgid "word4" -msgstr "Word 4" - -#~ msgid "word1" -#~ msgstr "Word 1" - -#~ msgid "word2" -#~ msgstr "Word 2" - -""") - - with open(self.output_file, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_update(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.update = True - self.cmd.no_fuzzy_matching = True - self.cmd.finalize_options() - self.cmd.run() - - expected_content = self._get_expected(fr"""msgid "word1" -msgstr "Word 1" - -msgid "word2" -msgstr "Word 2" - -msgid "word3" -msgstr "" - -msgid "word4" -msgstr "" - -""") - - with open(self.temp_def, 'r') as f: - actual_content = f.read() - assert expected_content == actual_content - - def test_update_backup(self): - with open(self.temp_def, 'r') as f: - before_content = f.read() - - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.update = True - self.cmd.backup = True - self.cmd.no_fuzzy_matching = True - self.cmd.finalize_options() - self.cmd.run() - - assert os.path.exists(self.temp_def + '~') - with open(self.temp_def + '~', 'r') as f: - actual_content = f.read() - assert before_content == actual_content - - os.unlink(self.temp_def) - shutil.move(self.temp_def + '~', self.temp_def) - self.cmd.suffix = '.bac' - self.cmd.run() - - assert os.path.exists(self.temp_def + '.bac') - with open(self.temp_def + '.bac', 'r') as f: - actual_content = f.read() - assert before_content == actual_content - - def test_no_wrap_width_exclusive(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.no_wrap = True - self.cmd.width = 80 - with pytest.raises(OptionError): - self.cmd.finalize_options() - - def test_compendium_with_comment(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium] - self.cmd.no_fuzzy_matching = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert f'#. {self.compendium}' in content - assert 'msgid "word4"' in content - assert 'msgstr "Word 4"' in content - - def test_compendium_does_not_overwrite_existing(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - blocks = content.split('\n\n') - word1_block = next((b for b in blocks if 'msgid "word1"' in b), None) - assert word1_block is not None - assert 'msgstr "Word 1"' in word1_block - assert 'Comp Word 1' not in word1_block - - def test_multiple_compendiums(self): - compendium2 = f'{i18n_dir}/compendium2.po' - try: - with open(compendium2, 'wb') as f: - cat = Catalog() - cat.add('word3', string='Word 3 from comp2') - pofile.write_po(f, cat) - - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium, compendium2] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert 'msgstr "Word 4"' in content - assert 'msgstr "Word 3 from comp2"' in content - finally: - if os.path.exists(compendium2): - os.unlink(compendium2) - - def test_compendium_fills_empty_translation(self): - compendium_with_word3 = f'{i18n_dir}/comp_word3.po' - try: - with open(compendium_with_word3, 'wb') as f: - cat = Catalog() - cat.add('word3', string='Word 3 comp') - pofile.write_po(f, cat) - - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [compendium_with_word3] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert 'msgstr "Word 3 comp"' in content - finally: - if os.path.exists(compendium_with_word3): - os.unlink(compendium_with_word3) - - def test_obsolete_messages(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.no_fuzzy_matching = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert '#~ msgid' not in content - - extra_def = f'{i18n_dir}/extra_def.po' - try: - with open(extra_def, 'wb') as f: - cat = Catalog() - cat.add('word1', string='Word 1') - cat.add('old_word', string='Old Word') - pofile.write_po(f, cat) - - self.cmd.input_files = [extra_def, self.temp_ref] - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - assert '#~ msgid "old_word"' in content - assert '#~ msgstr "Old Word"' in content - finally: - if os.path.exists(extra_def): - os.unlink(extra_def) - - def test_compendium_not_applied_for_absent_messages(self): - self.cmd.input_files = [self.temp_def, self.temp_ref] - self.cmd.output_file = self.output_file - self.cmd.compendium = [self.compendium] - self.cmd.no_fuzzy_matching = True - self.cmd.no_compendium_comment = True - self.cmd.finalize_options() - self.cmd.run() - - with open(self.output_file, 'r') as f: - content = f.read() - - active_blocks = content.split('#~')[0] - assert 'word5' not in active_blocks diff --git a/tests/messages/frontend/test_merge.py b/tests/messages/frontend/test_merge.py new file mode 100644 index 000000000..6e6c2f45f --- /dev/null +++ b/tests/messages/frontend/test_merge.py @@ -0,0 +1,329 @@ +# +# Copyright (C) 2007-2011 Edgewall Software, 2013-2025 the Babel team +# All rights reserved. +# +# This software is licensed as described in the file LICENSE, which +# you should have received as part of this distribution. The terms +# are also available at https://github.com/python-babel/babel/blob/master/LICENSE. +# +# This software consists of voluntary contributions made by many +# individuals. For the exact contribution history, see the revision +# history and logs, available at https://github.com/python-babel/babel/commits/master/. + +from __future__ import annotations + +import pathlib +import shutil + +import pytest +from freezegun import freeze_time + +from babel.messages import Catalog, frontend, pofile +from babel.messages.frontend import OptionError +from tests.messages.consts import TEST_PROJECT_DISTRIBUTION_DATA +from tests.messages.utils import Distribution + + +@pytest.fixture(autouse=True) +def frozen_time(): + with freeze_time("1994-11-11"): + yield + + +@pytest.fixture +def merge_cmd(): + dist = Distribution(TEST_PROJECT_DISTRIBUTION_DATA) + cmd = frontend.MergeCatalog(dist) + cmd.initialize_options() + return cmd + + +@pytest.fixture +def merge_files(tmp_path: pathlib.Path): + temp_def = tmp_path / 'msgmerge_def.po' + temp_ref = tmp_path / 'msgmerge_ref.pot' + compendium = tmp_path / 'compendium.po' + + with open(temp_ref, 'wb') as file: + catalog = Catalog() + for word in ['word1', 'word2', 'word3', 'word4']: + catalog.add(word) + pofile.write_po(file, catalog) + + with open(temp_def, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Word 1') + catalog.add('word2', string='Word 2') + catalog.add('word3') + pofile.write_po(file, catalog) + + with open(compendium, 'wb') as file: + catalog = Catalog() + catalog.add('word1', string='Comp Word 1') + catalog.add('word2', string='Comp Word 2') + catalog.add('word4', string='Word 4') + catalog.add('word5', string='Word 5') + pofile.write_po(file, catalog) + + return temp_def, temp_ref, compendium + + +def test_no_input_files(merge_cmd): + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + with pytest.raises(OptionError): + merge_cmd.input_files = ['1'] + merge_cmd.finalize_options() + + with pytest.raises(OptionError): + merge_cmd.input_files = ['1', '2', '3'] + merge_cmd.finalize_options() + + +def test_no_output_file(merge_cmd, merge_files): + temp_def, temp_ref, _ = merge_files + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + merge_cmd.output_file = str(temp_ref) + merge_cmd.finalize_options() + + merge_cmd.output_file = None + merge_cmd.update = True + merge_cmd.finalize_options() + + +def test_default(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + assert 'msgid "word1"' in content + assert 'msgstr "Word 1"' in content + assert 'msgid "word2"' in content + assert 'msgstr "Word 2"' in content + + assert 'msgid "word4"' in content + word4_block = next(b for b in content.split('\n\n') if 'msgid "word4"' in b) + assert 'msgstr ""' in word4_block + + +def test_compendium(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + assert 'msgstr "Word 4"' in content + + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b) + assert 'msgstr "Word 1"' in word1_block + assert 'Comp Word 1' not in word1_block + + +def test_compendium_overwrite(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.compendium_overwrite = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b and '#~' not in b) + assert 'msgstr "Comp Word 1"' in word1_block + + assert '#~ msgid "word1"' in content + assert '#~ msgstr "Word 1"' in content + + +def test_update(merge_cmd, merge_files): + temp_def, temp_ref, _ = merge_files + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.update = True + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = temp_def.read_text() + assert 'msgstr "Word 1"' in content + assert 'msgid "word4"' in content + + +def test_update_backup(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + before_content = temp_def.read_text() + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.update = True + merge_cmd.backup = True + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + backup = pathlib.Path(str(temp_def) + '~') + assert backup.exists() + assert backup.read_text() == before_content + + temp_def.unlink() + shutil.move(str(backup), str(temp_def)) + merge_cmd.suffix = '.bac' + merge_cmd.run() + + bac = pathlib.Path(str(temp_def) + '.bac') + assert bac.exists() + assert bac.read_text() == before_content + + +def test_no_wrap_width_exclusive(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_wrap = True + merge_cmd.width = 80 + with pytest.raises(OptionError): + merge_cmd.finalize_options() + + +def test_compendium_with_comment(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert f'#. {compendium}' in content + assert 'msgid "word4"' in content + assert 'msgstr "Word 4"' in content + + +def test_compendium_does_not_overwrite_existing(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + word1_block = next(b for b in content.split('\n\n') if 'msgid "word1"' in b) + assert 'msgstr "Word 1"' in word1_block + assert 'Comp Word 1' not in word1_block + + +def test_multiple_compendiums(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + compendium2 = tmp_path / 'compendium2.po' + output_file = tmp_path / 'msgmerge.po' + + with open(compendium2, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 from comp2') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium), str(compendium2)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert 'msgstr "Word 4"' in content + assert 'msgstr "Word 3 from comp2"' in content + + +def test_compendium_fills_empty_translation(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + compendium_with_word3 = tmp_path / 'comp_word3.po' + output_file = tmp_path / 'msgmerge.po' + + with open(compendium_with_word3, 'wb') as f: + cat = Catalog() + cat.add('word3', string='Word 3 comp') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium_with_word3)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert 'msgstr "Word 3 comp"' in content + + +def test_obsolete_messages(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, _ = merge_files + output_file = tmp_path / 'msgmerge.po' + + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.no_fuzzy_matching = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert '#~ msgid' not in content + + extra_def = tmp_path / 'extra_def.po' + with open(extra_def, 'wb') as f: + cat = Catalog() + cat.add('word1', string='Word 1') + cat.add('old_word', string='Old Word') + pofile.write_po(f, cat) + + merge_cmd.input_files = [str(extra_def), str(temp_ref)] + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + assert '#~ msgid "old_word"' in content + assert '#~ msgstr "Old Word"' in content + + +def test_compendium_not_applied_for_absent_messages(merge_cmd, merge_files, tmp_path): + temp_def, temp_ref, compendium = merge_files + output_file = tmp_path / 'msgmerge.po' + merge_cmd.input_files = [str(temp_def), str(temp_ref)] + merge_cmd.output_file = str(output_file) + merge_cmd.compendium = [str(compendium)] + merge_cmd.no_fuzzy_matching = True + merge_cmd.no_compendium_comment = True + merge_cmd.finalize_options() + merge_cmd.run() + + content = output_file.read_text() + active_section = content.split('#~')[0] + assert 'word5' not in active_section