From f55be30e18a186c29d51b288c9a24e6d1a9cea38 Mon Sep 17 00:00:00 2001 From: Lubomír Sedlář Date: Sep 05 2019 13:20:16 +0000 Subject: Fix decoding errors on Python 2 When changelogs contain non-ascii data, there could be a decoding problem on old Python version when the changelog is serialized. This was not exercised by the test suite. This partially reverts 86c951c4661d3af234f8a7b7e560ab88e12a3f2b, but only for Python 2. Signed-off-by: Lubomír Sedlář --- diff --git a/compose_utils/changelog.py b/compose_utils/changelog.py index abc7e34..958a519 100644 --- a/compose_utils/changelog.py +++ b/compose_utils/changelog.py @@ -14,6 +14,8 @@ import json import re from distutils.version import LooseVersion +import six + import kobo.pkgset from kobo.rpmlib import parse_nvra, make_nvr, make_nvra, get_changelogs_from_header from kobo.threads import ThreadPool, WorkerThread @@ -37,6 +39,23 @@ def formatsize(size): return '{0:.{1}f} {2}'.format(size, prec, chosen) +def to_utf8(text): + if six.PY3: + # Nothing needs to happen on Python 3, we already get correctly decoded + # data from kobo or rpm. + return text + if not isinstance(text, str): + # Text is already decoded to unicode, nothing to do... + return text + encodings = ["ascii", "utf8", "latin1", "latin2"] + for encoding in encodings: + try: + return text.decode(encoding) + except UnicodeDecodeError: + pass + return text.decode("ascii", "ignore") + + clogs = {} @@ -51,7 +70,7 @@ class SimpleRpmWrapperWithChangelog(kobo.pkgset.SimpleRpmWrapper): ts = kwargs.pop("ts", None) header = kobo.rpmlib.get_rpm_header(file_path, ts=ts) - self.summary = kobo.rpmlib.get_header_field(header, "summary") + self.summary = to_utf8(kobo.rpmlib.get_header_field(header, "summary")) if self.sourcerpm: key = self.sourcerpm @@ -96,10 +115,10 @@ def get_changelog_diff_from_headers(old, new, max_records=-1): result = [] try: old_time = old_changelog[0].time - old_nvr = LooseVersion(old_changelog[0].name.rsplit(None, 1)[-1]) + old_nvr = LooseVersion(to_utf8(old_changelog[0].name).rsplit(None, 1)[-1]) while new_changelog: entry = new_changelog.pop(0) - new_nvr = LooseVersion(entry.name.rsplit(None, 1)[-1]) + new_nvr = LooseVersion(to_utf8(entry.name).rsplit(None, 1)[-1]) if entry.time < old_time or ( entry.time == old_time and new_nvr <= old_nvr ): @@ -365,7 +384,9 @@ class ComposeChangelog(object): data["changelog"] = [] for i in get_changelog_diff_from_headers(old_package, new_package, max_logs): - data["changelog"].append("* %s %s\n%s" % (i.ctime, i.name, i.text)) + data["changelog"].append( + "* %s %s\n%s" % (i.ctime, to_utf8(i.name), to_utf8(i.text)) + ) # TODO: comps, system release # if rpm.versionCompare(old_package, new_package.header) == -1: diff --git a/tests/fixtures/changelog-data.json b/tests/fixtures/changelog-data.json index e54c8bc..62a7ef6 100644 --- a/tests/fixtures/changelog-data.json +++ b/tests/fixtures/changelog-data.json @@ -60,7 +60,7 @@ "dropped_rpms": ["pungi-ostree"], "size": 13, "size_change": 5, - "changelog": ["* Pungi changed.\n* A lot."] + "changelog": ["* Pungi changed.\n* A lot.\n* Příliš žluťoučký kůň."] } ], "downgraded_packages": [ diff --git a/tests/fixtures/verbose-full.txt b/tests/fixtures/verbose-full.txt index ab6e1c7..f53457d 100644 --- a/tests/fixtures/verbose-full.txt +++ b/tests/fixtures/verbose-full.txt @@ -35,6 +35,7 @@ Size change: 5 B Changelog: * Pungi changed. * A lot. + * Příliš žluťoučký kůň. diff --git a/tests/fixtures/verbose-short.txt b/tests/fixtures/verbose-short.txt index 5f20589..cf2aa0c 100644 --- a/tests/fixtures/verbose-short.txt +++ b/tests/fixtures/verbose-short.txt @@ -29,6 +29,7 @@ Size change: 5 B Changelog: * Pungi changed. * A lot. + * Příliš žluťoučký kůň. diff --git a/tests/test_changelog.py b/tests/test_changelog.py index ffda70e..c5b4603 100644 --- a/tests/test_changelog.py +++ b/tests/test_changelog.py @@ -10,6 +10,8 @@ except ImportError: from kobo.rpmlib import ChangelogEntry +import six + from .helpers import get_compose, get_fixture import productmd.compose @@ -28,7 +30,7 @@ DUMMY_FIREFOX = { 'old_rpms': ['Dummy-firefox'], 'common_rpms': ['Dummy-firefox'], 'rpms': ['Dummy-firefox'], - 'changelog': ['* Tue Mar 15 2016 Lubomír Sedlář - 1:0.1.0-1\n- new version'], + "changelog": [u"* Tue Mar 15 2016 Lubomír Sedlář - 1:0.1.0-1\n- new version"], 'added_rpms': [], 'dropped_rpms': [], 'nvr': 'Dummy-firefox-1:0.1.0-1', @@ -60,7 +62,7 @@ DUMMY_CLOUD_INIT = { 'old_rpms': ['cloud-init'], 'common_rpms': ['cloud-init'], 'rpms': ['cloud-init'], - 'changelog': ['* Tue Sep 05 2017 Lubomír Sedlář - 0.7.9-9.module_f8c7dcdc\n- First release'], + "changelog": [u"* Tue Sep 05 2017 Lubomír Sedlář - 0.7.9-9.module_f8c7dcdc\n- First release"], 'added_rpms': [], 'dropped_rpms': [], 'nvr': 'cloud-init-0.7.9-9.module_f8c7dcdc', @@ -210,19 +212,21 @@ class TestFormat(unittest.TestCase): changelog = ComposeChangelog() changelog._get_summary = mock.Mock(return_value=[]) with open(get_fixture('verbose-full.txt')) as f: - expected = f.read().split('\n') + expected = six.ensure_text(f.read(), encoding="utf-8").splitlines() self.maxDiff = None - self.assertEqual(changelog.get_verbose_log(self.data).split('\n'), - expected) + self.assertEqual( + changelog.get_verbose_log(self.data).splitlines(), expected + ) def test_verbose_short(self): changelog = ComposeChangelog() changelog._get_summary = mock.Mock(return_value=[]) with open(get_fixture('verbose-short.txt')) as f: - expected = f.read().split('\n') + expected = six.ensure_text(f.read(), encoding="utf-8").splitlines() self.maxDiff = None - self.assertEqual(changelog.get_verbose_log(self.data, shorten=True).split('\n'), - expected) + self.assertEqual( + changelog.get_verbose_log(self.data, shorten=True).splitlines(), expected + ) class TestCompareChangelogs(unittest.TestCase):