From 7f97e98fde32ca2e129309064e5b0838d56045b9 Mon Sep 17 00:00:00 2001 From: Adam Williamson Date: Oct 03 2024 18:54:14 +0000 Subject: Drop use of buildah for container manifests When trying to deploy the container stuff in production, we found that it blows up when running in an infra openshift container because the security policy prevents creation of a user namespace, which buildah does even for operations like 'login', 'manifest create' and 'manifest push'. To solve that, this removes all use of buildah. We construct the multi-arch manifest ourselves using code adapted from koji_flatpak (thanks Owen Taylor). We publish it with skopeo, not buildah. And we use skopeo inspect --raw instead of buildah inspect in the missing arch detection. Signed-off-by: Adam Williamson --- diff --git a/fedora-image-uploader/fedora_image_uploader/handler.py b/fedora-image-uploader/fedora_image_uploader/handler.py index f79a5f6..5358687 100644 --- a/fedora-image-uploader/fedora_image_uploader/handler.py +++ b/fedora-image-uploader/fedora_image_uploader/handler.py @@ -1,3 +1,4 @@ +import hashlib import json import logging import os @@ -86,7 +87,7 @@ class Uploader: prefix = registry_entry["credential_prefix"] user = os.environ[f"{prefix}USER"] password = os.environ[f"{prefix}PASSWORD"] - args = ["buildah", "login", f"-u={user}", "--password-stdin", registry] + args = ["skopeo", "login", f"-u={user}", "--password-stdin", registry] subprocess.run(args, text=True, check=True, timeout=15, input=password) def __call__(self, message: fm_message.Message): @@ -132,6 +133,95 @@ class Uploader: if self.container_repos: self.publish_container_manifests(ffrel) + def create_and_publish_container_manifest( + self, repo: str, refs: Iterable[tuple[str, str]], tags: Iterable[str] + ): + """ + Create a container manifest for a given set of refs. + + Mostly copied from + https://pagure.io/koji-flatpak/raw/bb45b055c47d199b21c6b163e33048dda7684289/ + f/koji_flatpak/plugins/flatpak_builder_plugin.py + License: LGPL v2.1 (not plus) + We do this ourselves instead of using podman or buildah because + they do not work in infra openshift containers. See + https://github.com/containers/buildah/issues/5750 for some + background. + """ + + def manifest_descriptor(ref, arch): + matched = False + for darch, oarch in DOCKER_ARCHES.items(): + if oarch == arch: + arch = darch + matched = True + break + if not matched: + raise ValueError(f"Could not find docker arch for arch {arch}") + inspargs = ["skopeo", "inspect", "--raw", f"docker://{ref}"] + manifest = _run(inspargs).stdout.encode(encoding="utf-8") + # load the result as JSON as a sanity check + try: + json.loads(manifest) + except (json.JSONDecodeError, TypeError) as err: + _log.error( + "Failed to retrieve valid manifest for %s arch %s - error: %s", ref, arch, err + ) + raise fm_exceptions.Nack() + return { + "mediaType": "application/vnd.oci.image.manifest.v1+json", + "digest": f"sha256:{hashlib.sha256(manifest).hexdigest()}", + "size": len(manifest), + "platform": { + "architecture": arch, + "os": "linux", + }, + } + + index = { + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.index.v1+json", + "manifests": [manifest_descriptor(ref, arch) for (ref, arch) in refs], + } + + index_contents = json.dumps(index, indent=4).encode("utf8") + index_contents_digest = hashlib.sha256(index_contents).hexdigest() + + archive_index = { + "schemaVersion": 2, + "mediaType": "application/vnd.oci.image.index.v1+json", + "manifests": [ + { + "mediaType": "application/vnd.oci.image.index.v1+json", + "digest": f"sha256:{index_contents_digest}", + "size": len(index_contents), + } + ], + } + + with tempfile.TemporaryDirectory() as workdir: + image_dir = os.path.join(workdir, "index_image") + blobdir = os.path.join(image_dir, "blobs/sha256") + os.makedirs(blobdir) + with open(os.path.join(blobdir, index_contents_digest), "wb") as f: + f.write(index_contents) + + with open(os.path.join(image_dir, "index.json"), "w") as f: + json.dump(archive_index, f, indent=4) + + for registry in self.conf["container"]["registries"]: + registry = registry["url"] + # something like "docker://registry.fedoraproject.org/fedora:40" + for target in [f"docker://{registry}/{repo}:{tag}" for tag in tags]: + pushargs = [ + "skopeo", + "copy", + f"oci:{image_dir}", + target, + "--multi-arch=index-only", + ] + _run(pushargs) + def publish_container_manifests(self, ffrel: ff_release.Release): """ Generate and publish multi-arch container manifests. These are @@ -155,8 +245,6 @@ class Uploader: maintag = tags[0] for repo, builtarches in self.container_repos.items(): - # our local name for the manifest we're creating - manref = f"localhost/fiu-temp-{repo}-{maintag}" # we don't have the image files any more, so we'll just # find them on the first registry firstreg = self.conf["container"]["registries"][0]["url"] @@ -171,28 +259,8 @@ class Uploader: # we assume all registries always have the same # content, so if we hit this failure, just return return - # wipe the manifest if it exists already - _run(("buildah", "rmi", manref), failok=True) - # create the manifest with all arches - createargs = ["buildah", "manifest", "create", manref] - createargs.extend(f"{firstregref}-{arch}" for arch in builtarches) - _run(createargs) - for registry in self.conf["container"]["registries"]: - registry = registry["url"] - # something like "registry.fedoraproject.org/fedora:40" - for target in [f"{registry}/{repo}:{tag}" for tag in tags]: - # push the manifest to this target - pushargs = ( - "buildah", - "manifest", - "push", - manref, - f"docker://{target}", - "--all", - ) - _run(pushargs) - # wipe the local copy - _run(("buildah", "rmi", manref), failok=True) + refs = [(f"{firstregref}-{arch}", arch) for arch in builtarches] + self.create_and_publish_container_manifest(repo, refs, tags) message = ContainerPublishedV1( topic=".".join( @@ -221,7 +289,7 @@ class Uploader: """ currarches = [] try: - ret = json.loads(_run(("buildah", "manifest", "inspect", source)).stdout) + ret = json.loads(_run(("skopeo", "inspect", "--raw", f"docker://{source}")).stdout) currarches = set([man["platform"]["architecture"] for man in ret["manifests"]]) currarches = [DOCKER_ARCHES[darch] for darch in currarches] except (json.JSONDecodeError, fm_exceptions.Nack): diff --git a/fedora-image-uploader/tests/conftest.py b/fedora-image-uploader/tests/conftest.py index ed8c43b..d7928b2 100644 --- a/fedora-image-uploader/tests/conftest.py +++ b/fedora-image-uploader/tests/conftest.py @@ -1,4 +1,6 @@ import os +import shutil +import tempfile from unittest import mock import pytest @@ -56,3 +58,24 @@ def azure_fm_conf(): }, ): yield + + +@pytest.fixture(scope="module") +def consistent_tempdir(): + """Mocks out mkdtemp with a version that always uses the same name, + and disables TemporaryDirectory's cleanup, so we can find and + examine stuff in the tempdir during tests. Clean up the directory + ourselves instead at times that are convenient to us. + """ + + def mkdtemp_same(*args, **kwargs): + dirname = os.path.join(tempfile.gettempdir(), "fiu-consistent-tempdir") + shutil.rmtree(dirname, ignore_errors=True) + os.mkdir(dirname, 0o700) + return os.path.abspath(dirname) + + with mock.patch("tempfile.mkdtemp", mkdtemp_same): + with mock.patch("tempfile.TemporaryDirectory.__exit__"): + yield + dirname = os.path.join(tempfile.gettempdir(), "fiu-consistent-tempdir") + shutil.rmtree(dirname, ignore_errors=True) diff --git a/fedora-image-uploader/tests/test_handler.py b/fedora-image-uploader/tests/test_handler.py index c300d85..1f5e8b6 100644 --- a/fedora-image-uploader/tests/test_handler.py +++ b/fedora-image-uploader/tests/test_handler.py @@ -1,6 +1,7 @@ import json import logging import os +import tempfile from unittest import mock import pytest @@ -82,8 +83,14 @@ from fedora_image_uploader.handler import _run ), ], ) -def test_containers(mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog, compose): +def test_containers( + mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog, compose, consistent_tempdir +): mock_subrun.return_value.returncode = 0 + # this is for the `skopeo inspect --raw` command we use to inspect + # the single-image manifests we just pushed when creating the + # multi-arch manifest + mock_subrun.return_value.stdout = '{"somejson": "something"}' message_file, cidorlabel, relnum, alias, milestone, expected_images = compose # mapping of reponames to expected image filename base strings repotoid = { @@ -131,27 +138,23 @@ def test_containers(mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog # we will check that every command we expect is in here, remove # them all, and assert it's empty at the end runcalls = [" ".join(call.args[0]) for call in mock_subrun.call_args_list] + for call in runcalls: + # for convenient debugging of failures + print(call) # also check calls to _missing_manifest_arches misscalls = mock_missing.call_args_list for exprepo, arches in expected_images.items(): - # find the manifest creation calls (these are just per repo, - # and get images from the first configured registry) - expmanref = f"localhost/fiu-temp-{exprepo}-{maintag}" expfirstregref = f"registry.fedoraproject.org/{exprepo}:{maintag}" - expcalls = ( - f"buildah rmi {expmanref}", - f"buildah manifest create {expmanref} " - + " ".join(f"{expfirstregref}-{arch}" for arch in arches), - f"buildah rmi {expmanref}", - ) - for call in expcalls: - assert call in runcalls - runcalls.remove(call) # find the expected _missing_manifest_arches call expmcall = mock.call(expfirstregref, arches) assert expmcall in misscalls misscalls.remove(expmcall) + for arch in arches: + # find the expected skopeo inspect calls used for manifest construction + expcall = f"skopeo inspect --raw docker://{expfirstregref}-{arch}" + assert expcall in runcalls + runcalls.remove(expcall) for registry in ["registry.fedoraproject.org", "quay.io/fedora"]: # find the expected calls to skopeo copy for arch in arches: @@ -169,14 +172,51 @@ def test_containers(mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog expcall = f"skopeo copy {expfn} docker://{registry}/{exprepo}:{maintag}-{arch}" assert expcall in runcalls runcalls.remove(expcall) - # find the expected calls to buildah for manifest publish - expcalls = [ - f"buildah manifest push {expmanref} docker://{registry}/{exprepo}:{exptag} --all" - for exptag in exptags - ] - for call in expcalls: - assert call in runcalls - runcalls.remove(call) + # find the expected skopeo calls for manifest publish, we cannot + # match these precisely as they have randomly-generated temporary + # directory names in them + for exptag in exptags: + mancall = [ + call + for call in runcalls + if call.startswith("skopeo copy oci:") + and call.endswith( + f"docker://{registry}/{exprepo}:{exptag} --multi-arch=index-only" + ) + ] + assert len(mancall) == 1 + runcalls.remove(mancall[0]) + + # check the multiarch manifest JSON is correct + manifestpath = os.path.join(tempfile.gettempdir(), "fiu-consistent-tempdir", "index_image") + indexfname = os.path.join(manifestpath, "index.json") + with open(indexfname, "r", encoding="utf-8") as indexfh: + index = json.load(indexfh) + # as our mock _run always returns the same fake manifest, the + # expected contents of the index depends only on the number of + # images (arches). we can only see the JSON for the *last* repo + # processed at this point + arches = list(expected_images.items())[-1][1] + if len(arches) == 3: + expdigest = "sha256:83f8fcc80dcc6661fad15a22e52db8c758b3be02b774584f26e0000207bf95ca" + explen = 1085 + elif len(arches) == 4: + expdigest = "sha256:da3ea76329f68466e4ebb8e64ed26af1f4df902faee63aed88ff39f40d20fe03" + explen = 1409 + else: + expdigest = f"FIXME PLEASE UPDATE FOR {str(len(arches))} ARCHES" + explen = 0 + assert index == { + "manifests": [ + { + "digest": expdigest, + "mediaType": "application/vnd.oci.image.index.v1+json", + "size": explen, + }, + ], + "mediaType": "application/vnd.oci.image.index.v1+json", + "schemaVersion": 2, + } assert len(runcalls) == 0 assert len(misscalls) == 0 @@ -186,7 +226,8 @@ def test_containers(mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog mock_missing.return_value = set(("x86_64",)) consumer(msg) runcalls = [" ".join(call.args[0]) for call in mock_subrun.call_args_list] - assert all("buildah" not in call for call in runcalls) + assert all("skopeo inspect" not in call for call in runcalls) + assert all("skopeo copy oci:" not in call for call in runcalls) assert ( caplog.records[-1].getMessage() == "Arches x86_64 in current manifest were not built, not publishing manifest" @@ -201,10 +242,14 @@ def test_containers(mock_subrun, _mock_login, mock_missing, fixtures_dir, caplog @mock.patch("subprocess.run") def test_containers_missing(mock_subrun, caplog): """Tests for the _missing_manifest_arches functionality.""" - # mock result of the `buildah inspect` command, this is an edited + # mock result of the `skopeo inspect` command, this is an edited # version of the real f40 response with two "real" manifest dicts. # The real response has eight dicts - four 'oci.image' and four - # 'docker.distribution' dicts, one of each for the four arches + # 'docker.distribution' dicts, one of each for the four arches. + # this is actually the output you'd get from + # `buildah manifest inspect`, which pretty-prints the JSON; + # `skopeo inspect --raw` gives the same data but minified, which + # is harder to read mock_subrun.return_value.stdout = """ { "schemaVersion": 2, @@ -246,7 +291,7 @@ def test_containers_missing(mock_subrun, caplog): # check the correct command was run assert mock_subrun.call_count == 1 misscmd = " ".join(mock_subrun.call_args[0][0]) - assert misscmd == "buildah manifest inspect registry.fedoraproject.org/fedora:40" + assert misscmd == "skopeo inspect --raw docker://registry.fedoraproject.org/fedora:40" # request with *more* arches should return empty set assert ( consumer._missing_manifest_arches(