From b3bccd665540c5de2a89b87df123b112c1f05fc3 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Sun, 1 May 2022 12:52:57 -0400 Subject: [PATCH] fix(ci): garbage collect instances no matter previous steps status (#4255) * fix(ci): garbage collect instances no matter the status As we're not going to reuse test instances, the safest method to apply is to always delete this instances if they fail, get skipped or succeed running a workflow * Apply suggestions from code review Co-authored-by: Deirdre Connolly * docs(ci): imrpove comment Co-authored-by: Deirdre Connolly --- .github/workflows/test-full-sync.yml | 22 ++++++------- .github/workflows/test.yml | 47 +++++++++++----------------- 2 files changed, 27 insertions(+), 42 deletions(-) diff --git a/.github/workflows/test-full-sync.yml b/.github/workflows/test-full-sync.yml index 0065becd..418bcc3c 100644 --- a/.github/workflows/test-full-sync.yml +++ b/.github/workflows/test-full-sync.yml @@ -105,16 +105,6 @@ jobs: service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' - # Check if our destination compute instance exists and delete it - - name: Delete existing instance with same SHA - run: | - INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') - if [ -z "${INSTANCE}" ]; then - echo "No instance to delete" - else - gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet - fi - # Creates Compute Engine virtual machine instance w/ disks - name: Create GCP compute instance run: | @@ -211,8 +201,14 @@ jobs: --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" - name: Delete test instance - # Do not delete the instance if the sync timeouts in GitHub - if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }} + # If the `full-sync` step timeouts (+6 hours) the previous step (creating the image) willl be skipped. + # Even if the instance continues running, no image will be created, so it's better to delete it. + if: always() continue-on-error: true run: | - gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 754b132f..3c92896c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -199,18 +199,6 @@ jobs: service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' - # Check if our destination compute instance exists and delete it - - name: Delete existing instance with same SHA - id: delete-old-instance - if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' || github.event_name == 'push'}} - run: | - INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') - if [ -z "${INSTANCE}" ]; then - echo "No instance to delete" - else - gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet - fi - - name: Create GCP compute instance id: create-instance if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' || github.event_name == 'push'}} @@ -325,11 +313,18 @@ jobs: --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" - name: Delete test instance - # Do not delete the instance if the sync timeouts in GitHub - if: ${{ steps.sync-to-checkpoint.outcome == 'success' || steps.sync-to-checkpoint.outcome == 'failure' }} + # If the `sync-to-checkpoint` step timeouts (+6 hours) the previous step (creating the image) willl be skipped. + # Even if the instance continues running, no image will be created, so it's better to delete it. + if: always() continue-on-error: true run: | - gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi + # Test that Zebra syncs and fully validates a few thousand blocks from a cached post-checkpoint state test-stateful-sync: @@ -359,17 +354,6 @@ jobs: service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' - # Check if our destination compute instance exists and delete it - - name: Delete existing instance with same SHA - id: delete-old-instance - run: | - INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') - if [ -z "${INSTANCE}" ]; then - echo "No instance to delete" - else - gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet - fi - - name: Get disk state name from gcloud id: get-disk-name run: | @@ -456,8 +440,13 @@ jobs: exit ${EXIT_CODE} - name: Delete test instance - # Do not delete the instance if the sync timeouts in GitHub - if: ${{ steps.sync-past-checkpoint.outcome == 'success' || steps.sync-past-checkpoint.outcome == 'failure' }} + # We don't want to leave a failed instance in GCP using resources + if: always() continue-on-error: true run: | - gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi