From 5c9104b6f4266852ebb92261ba6e04ecf9ada59c Mon Sep 17 00:00:00 2001 From: Cesar Munoz <56847527+LikeTheSalad@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:25:07 +0200 Subject: [PATCH 1/2] Adding retry mechanism to AWS lambda publishing --- .ci/publish-aws.sh | 95 ++++++++++++++++++++++++++++++++++------------ 1 file changed, 71 insertions(+), 24 deletions(-) diff --git a/.ci/publish-aws.sh b/.ci/publish-aws.sh index e260018992..c3258ec866 100755 --- a/.ci/publish-aws.sh +++ b/.ci/publish-aws.sh @@ -1,5 +1,5 @@ #!/usr/bin/env bash -set -euo pipefail +set -uo pipefail # # Publishes the created lambda layer zip to AWS as AWS Lambda Layers in every region. @@ -7,6 +7,7 @@ set -euo pipefail # # AWS_FOLDER is used for temporary output of publishing layers used to create the arn table. (Optional) # ELASTIC_LAYER_NAME is the name of the lambda layer e.g. elastic-apm-java-ver-3-44-1 for the git tag v3.44.1 (Required) +# MAX_RETRIES is the number of retries for transient failures (Optional, default: 3) # This needs to be set in GH actions @@ -16,6 +17,7 @@ export AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-eu-west-1} export AWS_FOLDER=${AWS_FOLDER:-.ci/.aws} FULL_LAYER_NAME=${ELASTIC_LAYER_NAME:?layer name not provided} +MAX_RETRIES=${MAX_RETRIES:-3} ALL_AWS_REGIONS=$(aws ec2 describe-regions --output json --no-cli-pager | jq -r '.Regions[].RegionName') rm -rf "${AWS_FOLDER}" @@ -25,30 +27,75 @@ zip_file="./apm-agent-lambda-layer/target/${FULL_LAYER_NAME}.zip" mv ./apm-agent-lambda-layer/target/elastic-apm-java-aws-lambda-layer-*.zip "${zip_file}" +failed_regions=() + +publish_to_region() { + local region=$1 + local attempt=1 + + while [ $attempt -le $MAX_RETRIES ]; do + echo "Publish ${FULL_LAYER_NAME} in ${region} (attempt ${attempt}/${MAX_RETRIES})" + if publish_output=$(aws lambda \ + --output json \ + publish-layer-version \ + --region="${region}" \ + --layer-name="${FULL_LAYER_NAME}" \ + --description="AWS Lambda Extension Layer for the Elastic APM Java Agent" \ + --license-info="Apache-2.0" \ + --compatible-runtimes java8.al2 java11 java17 java21 \ + --zip-file="fileb://${zip_file}" 2>&1); then + + echo "${publish_output}" > "${AWS_FOLDER}/${region}" + layer_version=$(echo "${publish_output}" | jq '.Version') + echo "Grant public layer access ${FULL_LAYER_NAME}:${layer_version} in ${region}" + + if grant_access_output=$(aws lambda \ + --output json \ + add-layer-version-permission \ + --region="${region}" \ + --layer-name="${FULL_LAYER_NAME}" \ + --action="lambda:GetLayerVersion" \ + --principal='*' \ + --statement-id="${FULL_LAYER_NAME}" \ + --version-number="${layer_version}" 2>&1); then + + echo "${grant_access_output}" > "${AWS_FOLDER}/.${region}-public" + return 0 + else + echo "WARNING: Failed to grant public access in ${region}: ${grant_access_output}" + fi + else + echo "WARNING: Failed to publish to ${region}: ${publish_output}" + fi + + attempt=$((attempt + 1)) + if [ $attempt -le $MAX_RETRIES ]; then + echo "Retrying in 10 seconds..." + sleep 10 + fi + done + + return 1 +} + for region in $ALL_AWS_REGIONS; do - echo "Publish ${FULL_LAYER_NAME} in ${region}" - publish_output=$(aws lambda \ - --output json \ - publish-layer-version \ - --region="${region}" \ - --layer-name="${FULL_LAYER_NAME}" \ - --description="AWS Lambda Extension Layer for the Elastic APM Java Agent" \ - --license-info="Apache-2.0" \ - --compatible-runtimes java8.al2 java11 java17 java21 \ - --zip-file="fileb://${zip_file}") - echo "${publish_output}" > "${AWS_FOLDER}/${region}" - layer_version=$(echo "${publish_output}" | jq '.Version') - echo "Grant public layer access ${FULL_LAYER_NAME}:${layer_version} in ${region}" - grant_access_output=$(aws lambda \ - --output json \ - add-layer-version-permission \ - --region="${region}" \ - --layer-name="${FULL_LAYER_NAME}" \ - --action="lambda:GetLayerVersion" \ - --principal='*' \ - --statement-id="${FULL_LAYER_NAME}" \ - --version-number="${layer_version}") - echo "${grant_access_output}" > "${AWS_FOLDER}/.${region}-public" + if ! publish_to_region "$region"; then + echo "ERROR: Failed to publish to ${region} after ${MAX_RETRIES} attempts, skipping" + failed_regions+=("$region") + fi done sh -c "./.ci/create-arn-table.sh" + +if [ ${#failed_regions[@]} -gt 0 ]; then + echo "" + echo "=========================================" + echo "WARNING: Failed to publish to the following regions:" + for region in "${failed_regions[@]}"; do + echo " - ${region}" + done + echo "=========================================" + echo "The ARN table has been generated for successful regions only." + echo "You may need to manually publish to the failed regions later." + exit 1 +fi From b900439c2c0a3756cf214472abc4eb7f47fa045e Mon Sep 17 00:00:00 2001 From: Cesar Munoz <56847527+LikeTheSalad@users.noreply.github.com> Date: Thu, 9 Apr 2026 16:29:49 +0200 Subject: [PATCH 2/2] Update max retry default --- .ci/publish-aws.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.ci/publish-aws.sh b/.ci/publish-aws.sh index c3258ec866..ea4d72758e 100755 --- a/.ci/publish-aws.sh +++ b/.ci/publish-aws.sh @@ -7,7 +7,7 @@ set -uo pipefail # # AWS_FOLDER is used for temporary output of publishing layers used to create the arn table. (Optional) # ELASTIC_LAYER_NAME is the name of the lambda layer e.g. elastic-apm-java-ver-3-44-1 for the git tag v3.44.1 (Required) -# MAX_RETRIES is the number of retries for transient failures (Optional, default: 3) +# MAX_RETRIES is the number of retries for transient failures (Optional, default: 1) # This needs to be set in GH actions @@ -17,7 +17,7 @@ export AWS_DEFAULT_REGION=${AWS_DEFAULT_REGION:-eu-west-1} export AWS_FOLDER=${AWS_FOLDER:-.ci/.aws} FULL_LAYER_NAME=${ELASTIC_LAYER_NAME:?layer name not provided} -MAX_RETRIES=${MAX_RETRIES:-3} +MAX_RETRIES=${MAX_RETRIES:-1} ALL_AWS_REGIONS=$(aws ec2 describe-regions --output json --no-cli-pager | jq -r '.Regions[].RegionName') rm -rf "${AWS_FOLDER}"