From a76d8620d81065b93b3a42f0eb0ef8a056c4e91b Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Jun 19 2024 19:16:18 +0000 Subject: [PATCH 1/3] Add helper to fallibly publish messages Since all the handlers are doing this the same way, add a little helper to try publishing and log failures when exceptions occur. --- diff --git a/fedora-image-uploader/fedora_image_uploader/handler.py b/fedora-image-uploader/fedora_image_uploader/handler.py index 5c11043..5e7ac28 100644 --- a/fedora-image-uploader/fedora_image_uploader/handler.py +++ b/fedora-image-uploader/fedora_image_uploader/handler.py @@ -170,19 +170,7 @@ class Uploader: }, ) if self.conf["container"].get("publish_amqp_messages", False): - try: - api.publish(message) - except ( - fm_exceptions.PublishTimeout, - fm_exceptions.PublishReturned, - ) as e: - _log.warning("Unable to publish ContainerPublishV1 message: %s", str(e)) - except fm_exceptions.PublishForbidden as e: - _log.error( - "Unable to publish message to topic %s, permission denied: %s", - message.topic, - str(e), - ) + fallible_publish(message) def _missing_manifest_arches(self, source: str, builtarches: Iterable[str]) -> set: """ @@ -356,18 +344,7 @@ class Uploader: # Gate publishing behind a feature flag so we can roll out updates while getting # proper permissions for publishing. if self.conf["azure"].get("publish_amqp_messages", False): - try: - api.publish(message=message) - except (fm_exceptions.PublishTimeout, fm_exceptions.PublishReturned) as e: - # There's always tomorrow for a new image, rather than restarting the whole - # process, we'll skip publishing the message and try again next time. - _log.warning("Unable to publish AzurePublishV1 message: %s", str(e)) - except fm_exceptions.PublishForbidden as e: - _log.error( - "Unable to publish message to topic %s, permission denied: %s", - message.topic, - str(e), - ) + fallible_publish(message) try: self.azure_cleanup_images() except Exception: @@ -500,3 +477,23 @@ class Uploader: self.container_repos[repo].append(arch) else: self.container_repos[repo] = [arch] + + +def fallible_publish(message): + """ + Helper to publish AMQP messages fallibly. + + Rather than try really hard to publish every message, if the broker is unavailable it's + reasonable to just wait until the next image (which happens daily) to get built and try + again then. + """ + try: + api.publish(message=message) + except (fm_exceptions.PublishTimeout, fm_exceptions.PublishReturned) as e: + _log.warning("Unable to publish %s message: %s", message.__class__.__name__, str(e)) + except fm_exceptions.PublishForbidden as e: + _log.error( + "Unable to publish message to topic %s, permission denied: %s", + message.topic, + str(e), + ) From fd94476e5cda18660643a0537f33438fad5ff90f Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Jun 19 2024 19:17:23 +0000 Subject: [PATCH 2/3] Only include handlers if there's a configuration section for them All the handlers need configuration to work properly. This ensures handlers aren't called unless they have some configuration available which makes it easier to roll out updated images without needing to land production configuration changes first. --- diff --git a/fedora-image-uploader/fedora_image_uploader/handler.py b/fedora-image-uploader/fedora_image_uploader/handler.py index 5e7ac28..cf3fdf8 100644 --- a/fedora-image-uploader/fedora_image_uploader/handler.py +++ b/fedora-image-uploader/fedora_image_uploader/handler.py @@ -55,7 +55,13 @@ class Uploader: self.requests = Session() retry_config = Retry(total=5, backoff_factor=1) self.requests.mount("https://", adapters.HTTPAdapter(max_retries=retry_config)) - self.handlers = (self.handle_azure, self.handle_container) + handlers = { + "azure": self.handle_azure, + "container": self.handle_container, + } + self.handlers = [ + handler for conf_key, handler in handlers.items() if conf_key in self.conf.keys() + ] # tracks the container repos we got images for, for manifest # creation purposes self.container_repos = dict() @@ -432,7 +438,7 @@ class Uploader: def handle_container(self, image: dict, ffrel: ff_release.Release): """Handle container images.""" - registries = self.conf.get("container", {}).get("registries") + registries = self.conf["container"].get("registries") if not registries: # we can't do anything if no registries are configured return From 2dbc87241b0112c4a8545d9b40121deec47ed0de Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Jun 24 2024 20:27:32 +0000 Subject: [PATCH 3/3] Add initial AWS support This likely doesn't work, but needs to be deployed to stage to test. --- diff --git a/Containerfile b/Containerfile index d139b52..9e862e8 100644 --- a/Containerfile +++ b/Containerfile @@ -19,7 +19,8 @@ RUN python3 -m venv venv && \ # These are not yet in an upstream release, when azure.azcollection > 2.3.0 check applicability # https://github.com/ansible-collections/azure/pull/1466 -RUN venv/bin/ansible-galaxy collection install --force azure.azcollection && \ +RUN venv/bin/ansible-galaxy collection install --force amazon.aws && \ + venv/bin/ansible-galaxy collection install --force azure.azcollection && \ pushd ~/.ansible/collections/ansible_collections/azure/azcollection/ && \ patch -p1 < /srv/image-uploader/src/patches/azure-pr-1466.patch && \ popd diff --git a/fedora-image-uploader-messages/fedora_image_uploader_messages/__init__.py b/fedora-image-uploader-messages/fedora_image_uploader_messages/__init__.py index afdae06..61add0b 100644 --- a/fedora-image-uploader-messages/fedora_image_uploader_messages/__init__.py +++ b/fedora-image-uploader-messages/fedora_image_uploader_messages/__init__.py @@ -1,3 +1,7 @@ __version__ = "1.0.0" -from .publish import AzurePublishedV1, ContainerPublishedV1 # noqa: F401 +from .publish import ( # noqa: F401 + AwsPublishedV1, + AzurePublishedV1, + ContainerPublishedV1, +) diff --git a/fedora-image-uploader-messages/fedora_image_uploader_messages/publish.py b/fedora-image-uploader-messages/fedora_image_uploader_messages/publish.py index 19414cf..867b1ea 100644 --- a/fedora-image-uploader-messages/fedora_image_uploader_messages/publish.py +++ b/fedora-image-uploader-messages/fedora_image_uploader_messages/publish.py @@ -21,6 +21,65 @@ class _PublishedV1(message.Message): return "fedora-image-uploader" +class AwsPublishedV1(_PublishedV1): + """Published when an AWS AMI is created from an image.""" + + topic = ".".join([_PublishedV1.topic, "aws"]) + body_schema = { + "id": f"{SCHEMA_URL}/v1/{'.'.join([_PublishedV1.topic, 'aws'])}", + "$schema": "https://json-schema.org/draft/2019-09/schema", + "description": ( + "Schema for messages sent by fedora-image-uploader when a " + "new Amazon Web Services image is published." + ), + "type": "object", + "properties": { + "architecture": { + "type": "string", + "description": "The machine architecture of the image (x86_64, aarch64, etc).", + }, + "compose_id": { + "type": "string", + "description": "The compose ID this image was created from.", + }, + "image_name": { + "type": "string", + "description": "The name of the AMI.", + }, + "regions": { + "type": "object", + "description": ( + "A map of regions to AMI IDs. The object keys are the AWS region and " + "the value is the AMI ID." + ), + }, + }, + "required": [ + "architecture", + "compose_id", + "image_name", + "regions", + ], + } + + @property + def summary(self): + return ( + f"{self.app_name} published an AWS image from compose {self.body['compose_id']} as " + f"{self.body['image_name']}" + ) + + def __str__(self): + regions_and_ids = [f"{region} as {id}" for region, id in self.body["regions"].items()] + return ( + "A new image has been published to Amazon Web Services:\n\n" + f"\tArchitecture: {self.body['architecture']}\n" + f"\tCompose ID: {self.body['compose_id']}\n" + f"\tImage Name: {self.body['image_name']}\n" + f"\tRegions: {', '.join(regions_and_ids)}\n" + ) + + class AzurePublishedV1(_PublishedV1): """ Published when an image is uploaded to the Azure image gallery. diff --git a/fedora-image-uploader/fedora_image_uploader/handler.py b/fedora-image-uploader/fedora_image_uploader/handler.py index cf3fdf8..be22ba5 100644 --- a/fedora-image-uploader/fedora_image_uploader/handler.py +++ b/fedora-image-uploader/fedora_image_uploader/handler.py @@ -15,7 +15,11 @@ from azure.mgmt.compute import ComputeManagementClient from fedfind import exceptions as ff_exceptions from fedfind import helpers as ff_helpers from fedfind import release as ff_release -from fedora_image_uploader_messages import AzurePublishedV1, ContainerPublishedV1 +from fedora_image_uploader_messages import ( + AwsPublishedV1, + AzurePublishedV1, + ContainerPublishedV1, +) from fedora_messaging import api, config from fedora_messaging import exceptions as fm_exceptions from fedora_messaging import message as fm_message @@ -56,6 +60,7 @@ class Uploader: retry_config = Retry(total=5, backoff_factor=1) self.requests.mount("https://", adapters.HTTPAdapter(max_retries=retry_config)) handlers = { + "aws": self.handle_aws, "azure": self.handle_azure, "container": self.handle_container, } @@ -249,7 +254,9 @@ class Uploader: return image_dest - def run_playbook(self, playbook: str, variables: dict, workdir: str): + def run_playbook( + self, playbook: str, variables: dict, workdir: str + ) -> ansible_runner.runner.Runner: """ Execute Ansible playbook in workdir using variables. @@ -272,6 +279,54 @@ class Uploader: if result.rc != 0: _log.error(f"Playbook failed with return code {result.rc}") raise fm_exceptions.Nack() + return result + + def handle_aws(self, image: dict, ffrel: ff_release.Release): + """Handle AWS images.""" + if image.get("subvariant") != "Cloud_Base" or "AmazonEC2" not in image.get("path", ""): + return + + with tempfile.TemporaryDirectory() as workdir: + image_path = self.download_image(image, workdir, decompress=True) + date = ffrel.metadata["composeinfo"]["payload"]["compose"]["date"] + respin = ffrel.metadata["composeinfo"]["payload"]["compose"]["respin"] + ami_name = ( + f"Fedora-Cloud-Base-AmazonEC2.{image['arch']}-{ffrel.relnum}-{date}.{respin}", + ) + variables = { + "base_region": self.conf["aws"]["base_region"], + "s3_bucket_name": self.conf["aws"]["s3_bucket_name"], + "ami_description": self.conf["aws"]["ami_description"], + "ami_volume_dev_name": self.conf["aws"]["ami_volume_dev_name"], + "ami_volume_type": self.conf["aws"]["ami_volume_type"], + "ami_volume_size": self.conf["aws"]["ami_volume_size"], + "ami_regions": self.conf["aws"]["ami_regions"], + "ami_name": ami_name, + "architecture": image["arch"], + "image_source": image_path, + "exclude_from_latest": True, + "ansible_remote_tmp": workdir, + } + + playbook = os.path.join(PLAYBOOKS, "aws.yml") + run = self.run_playbook(playbook, variables, workdir) + # extract the AMI ids from the Ansible run + regions = dict() + for event in run.events: + if event["event"] == "runner_on_ok": + uploaded_ami = event["event_data"]["res"]["image_id"] + region = event["invocation"]["module_args"]["region"] + regions[region] = uploaded_ami + message = AwsPublishedV1( + body={ + "architecture": image["arch"], + "compose_id": ffrel.cid, + "image_name": ami_name, + "regions": regions, + }, + ) + if self.conf["aws"].get("publish_amqp_messages", False): + fallible_publish(message) def handle_azure(self, image: dict, ffrel: ff_release.Release): """ diff --git a/fedora-image-uploader/fedora_image_uploader/playbooks/aws.yml b/fedora-image-uploader/fedora_image_uploader/playbooks/aws.yml new file mode 100644 index 0000000..2c78020 --- /dev/null +++ b/fedora-image-uploader/fedora_image_uploader/playbooks/aws.yml @@ -0,0 +1,108 @@ +# This playbook expects the following environment variables to be set for authentication: +# - AWS_ACCESS_KEY_ID +# - AWS_SECRET_ACCESS_KEY + +--- +- name: Create Fedora AWS marketplace image + hosts: localhost + # defaults and values largely pulled from fedimg without much thought on my part + vars: + base_region: us-east-1 + # current names are fedora-s3-bucket-fedimg{-testing} + s3_bucket_name: fedora-image-uploads + # current format seems to be Fedora-Cloud-Base-AmazonEC2.x86_64-40-20240619.0 + ami_name: "Fedora Cloud" + ami_description: "Fedora Cloud" + ami_virt_type: "hvm" + ami_volume_dev_name: "/dev/sda1" + ami_volume_type: "gp3" + ami_volume_size: 7 + ami_regions: + - 'af-south-1' + - 'eu-north-1' + - 'ap-south-1' + - 'eu-west-3' + - 'eu-west-2' + - 'eu-south-1' + - 'eu-west-1' + - 'ap-northeast-3' + - 'ap-northeast-2' + - 'me-south-1' + - 'ap-northeast-1' + - 'sa-east-1' + - 'ca-central-1' + - 'ap-east-1' + - 'ap-southeast-1' + - 'ap-southeast-2' + - 'ap-southeast-3' + - 'eu-central-1' + - 'us-east-1' + - 'us-east-2' + - 'us-west-1' + - 'us-west-2' + architecture: "x86_64" + tasks: + + - name: Ensure S3 bucket exists + amazon.aws.s3_bucket: + state: present + name: "{{ s3_bucket_name }}" + region: "{{ base_region }}" + + - name: Checksum local file + register: local_image_file + ansible.builtin.stat: + path: "{{ image_source }}" + checksum_algorithm: sha256 + get_checksum: true + + - name: Set s3_object_name to .raw + ansible.builtin.set_fact: + s3_object_name: "{{ local_image_file.stat.checksum }}.raw" + + - name: Upload image to S3 bucket + register: s3_upload + amazon.aws.s3_object: + region: "{{ base_region }}" + src: "{{ image_path }}" + bucket: "{{ s3_bucket_name }}" + object: "{{ s3_object_name }}" + mode: put + overwrite: false + + # TODO unclear if this task or the next one is what I need. + - name: Import image to EC2 + amazon.aws.ec2_import_image: + region: "{{ base_region }}" + state: present + boot_mode: "uefi-preferred" + description: "{{ ami_description }}" + disk_containers: + description: "Fedora Cloud" + device_name: "{{ ami_volume_dev_name }}" + format: "raw" + user_bucket: + s3_bucket: "{{ s3_bucket_name }}" + s3_key: "{{ s3_object_name }}" + platform: "Linux" + + - name: Create AMI from S3 object + loop: "{{ ami_regions }}" + register: created_ami + amazon.aws.ec2_ami: + state: present + region: "{{ item }}" + name: "{{ ami_name }}" + boot_mode: "uefi-preferred" + description: "{{ ami_description }}" + image_location: "{{ s3_upload.url }}" + architecture: "{{ architecture }}" + virtualization_type: "{{ ami_virt_type }}" + enhanced_networking: true + device_mapping: + - device_name: "{{ ami_volume_dev_name }}" + volume_size: "{{ ami_volume_size }}" + volume_type: "{{ ami_volume_type }}" + delete_on_termination: true + launch_permissions: + group_names: ['all'] diff --git a/fedora-image-uploader/pyproject.toml b/fedora-image-uploader/pyproject.toml index 4fdac4f..8dd8a81 100644 --- a/fedora-image-uploader/pyproject.toml +++ b/fedora-image-uploader/pyproject.toml @@ -61,6 +61,8 @@ dependencies = [ "azure-mgmt-recoveryservicesbackup", "azure-mgmt-notificationhubs", "azure-mgmt-eventhub", + "boto3", + "botocore", "click", "fedora-messaging", "fedora-image-uploader-messages", diff --git a/fedora-messaging.toml.example b/fedora-messaging.toml.example index ac6a2bd..2516af6 100644 --- a/fedora-messaging.toml.example +++ b/fedora-messaging.toml.example @@ -56,6 +56,39 @@ storage_account_type = "Standard_LRS" [consumer_config.container] registries = ["registry.fedoraproject.org", "quay.io/fedora"] +[consumer_config.aws] +base_region = "us-east-1" +s3_bucket_name = "fedora-image-uploads" +ami_description = "Fedora Cloud base image." +ami_volume_dev_name = "/dev/sda1" +ami_volume_type = "gp3" +ami_volume_size = 7 +ami_regions = [ + "af-south-1", + "eu-north-1", + "ap-south-1", + "eu-west-3", + "eu-west-2", + "eu-south-1", + "eu-west-1", + "ap-northeast-3", + "ap-northeast-2", + "me-south-1", + "ap-northeast-1", + "sa-east-1", + "ca-central-1", + "ap-east-1", + "ap-southeast-1", + "ap-southeast-2", + "ap-southeast-3", + "eu-central-1", + "us-east-1", + "us-east-2", + "us-west-1", + "us-west-2", +] + + [qos] prefetch_size = 0 prefetch_count = 25