From 03d123b42826d15b6c60808e4abdef0776c0ba36 Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Wed, 9 Mar 2022 17:10:05 -0400 Subject: [PATCH] refactor(test): cleanup GCP instances on a single PR (#3766) * refactor(test): reuse same GCP instance on a single PR This also ensures the deployments are faster, and we only delete the instance when merging or closing the PR, instead of doing it on each push to the PR * fix(deploy): add zone to updates * fix: typo * fix(ci): improve conditions for updates * fix(deploy): delete old deployments instead of reusing it * fix(deploy): keep delete command after run * fix(deploy): always create an instance * fix(deploy): delete disks on every delete command. * imp(ci): use better id name * Update .github/workflows/test.yml Co-authored-by: Deirdre Connolly * imp: handle errors correctly on deletion * fix: do not hide valid errors * fix: edge case where the container is not ready yet Co-authored-by: Deirdre Connolly --- .github/workflows/clean.yml | 40 ++++++++++++++++++++++++++ .github/workflows/test-full-sync.yml | 17 +++++++++-- .github/workflows/test.yml | 43 ++++++++++++++++++++++++---- 3 files changed, 92 insertions(+), 8 deletions(-) create mode 100644 .github/workflows/clean.yml diff --git a/.github/workflows/clean.yml b/.github/workflows/clean.yml new file mode 100644 index 00000000..8d3cb033 --- /dev/null +++ b/.github/workflows/clean.yml @@ -0,0 +1,40 @@ +name: Clean + +on: + workflow_dispatch: + pull_request: + branches: + - main + types: [ closed ] + +env: + NETWORK: Mainnet + PROJECT_ID: ${{ secrets.GCP_PROJECT_ID }} + REGION: us-central1 + ZONE: us-central1-a + +jobs: + delete: + name: Delete test deployments + runs-on: ubuntu-latest + needs: [ build ] + steps: + - uses: actions/checkout@v2.4.0 + with: + persist-credentials: false + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v0.5.0 + with: + credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} + + - name: Delete test instance + continue-on-error: true + run: | + TEST_INSTANCES=$(gcloud compute instances list --filter="${{ env.GITHUB_REF_SLUG_URL }}" --format='value(NAME)') + for instance in ${TEST_INSTANCES}; do gcloud compute instances delete $instance --zone "${{ env.ZONE }}" --delete-disks all --quiet; done \ No newline at end of file diff --git a/.github/workflows/test-full-sync.yml b/.github/workflows/test-full-sync.yml index d1270d2c..c595ddaf 100644 --- a/.github/workflows/test-full-sync.yml +++ b/.github/workflows/test-full-sync.yml @@ -135,6 +135,17 @@ jobs: with: credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} + # Check if our destination compute instance exists and delete it + - name: Delete existing instance with same SHA + id: delete-old-instance + run: | + INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi + # Creates Compute Engine virtual machine instance w/ disks - name: Create GCP compute instance id: create-instance @@ -159,7 +170,7 @@ jobs: # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915 - name: Get container name from logs id: get-container-name - if: steps.create-instance.outcome == 'success' + if: ${{ steps.create-instance.outcome == 'success' }} run: | INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "Using instance: $INSTANCE_ID" @@ -170,9 +181,11 @@ jobs: done CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "::set-output name=zebra_container::$CONTAINER_NAME" + sleep 60 - name: Full sync mainnet id: full-sync-mainnet + if: ${{ steps.create-instance.outcome == 'success' }} run: | gcloud compute ssh \ full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -188,4 +201,4 @@ jobs: if: ${{ steps.full-sync-mainnet.outcome == 'success' || steps.full-sync-mainnet.outcome == 'failure' }} continue-on-error: true run: | - gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}" + gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7ccf2565..f3caa79f 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -28,7 +28,7 @@ on: env: CARGO_INCREMENTAL: '1' - ZEBRA_SKIP_IPV6_TESTS: "1" + ZEBRA_SKIP_IPV6_TESTS: '1' RUST_BACKTRACE: full RUST_LIB_BACKTRACE: full COLORBT_SHOW_HIDDEN: '1' @@ -135,6 +135,11 @@ jobs: docker pull ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} docker run -e ZEBRA_SKIP_IPV6_TESTS --name zebrad-tests -t ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} cargo test --locked --release --features enable-sentry --workspace -- --include-ignored + # This test changes zebra-chain's activation heights, + # which can recompile all the Zebra crates, + # so we want its build products to be cached separately. + # + # Also, we don't want to accidentally use the fake heights in other tests. test-fake-activation-heights: name: Test with fake activation heights runs-on: ubuntu-latest @@ -241,6 +246,18 @@ jobs: with: credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} + # Check if our destination compute instance exists and delete it + - name: Delete existing instance with same SHA + id: delete-old-instance + if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' }} + run: | + INSTANCE=$(gcloud compute instances list --filter=regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi + - name: Create GCP compute instance id: create-instance if: ${{ steps.changed-files-specific.outputs.any_changed == 'true' || github.event.inputs.regenerate-disks == 'true' }} @@ -276,7 +293,7 @@ jobs: # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915 - name: Get container name from logs id: get-container-name - if: steps.create-instance.outcome == 'success' + if: ${{ steps.create-instance.outcome == 'success' }} run: | INSTANCE_ID=$(gcloud compute instances describe regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "Using instance: $INSTANCE_ID" @@ -287,10 +304,11 @@ jobs: done CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "::set-output name=zebra_container::$CONTAINER_NAME" + sleep 60 - name: Regenerate stateful disks logs id: sync-to-checkpoint - if: steps.create-instance.outcome == 'success' + if: ${{ steps.create-instance.outcome == 'success' }} run: | gcloud compute ssh \ regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ @@ -319,7 +337,7 @@ jobs: if: ${{ steps.sync-to-checkpoint.outcome == 'success' || steps.sync-to-checkpoint.outcome == 'failure' }} continue-on-error: true run: | - gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}" + gcloud compute instances delete "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet # Test that Zebra syncs and fully validates a few thousand blocks from a cached post-checkpoint state test-stateful-sync: @@ -345,6 +363,18 @@ jobs: with: credentials_json: ${{ secrets.GOOGLE_CREDENTIALS }} + # Check if our destination compute instance exists and delete it + - name: Delete existing instance with same SHA + id: delete-old-instance + if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}} + run: | + INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') + if [ -z "${INSTANCE}" ]; then + echo "No instance to delete" + else + gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet + fi + - name: Get disk state name from gcloud id: get-disk-name if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}} @@ -391,7 +421,7 @@ jobs: # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915 - name: Get container name from logs id: get-container-name - if: steps.create-instance.outcome == 'success' + if: ${{ steps.create-instance.outcome == 'success' }} run: | INSTANCE_ID=$(gcloud compute instances describe sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "Using instance: $INSTANCE_ID" @@ -402,6 +432,7 @@ jobs: done CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "::set-output name=zebra_container::$CONTAINER_NAME" + sleep 60 - name: Sync past mandatory checkpoint logs id: sync-past-checkpoint @@ -420,4 +451,4 @@ jobs: if: ${{ steps.sync-past-checkpoint.outcome == 'success' || steps.sync-past-checkpoint.outcome == 'failure' }} continue-on-error: true run: | - gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --delete-disks all --zone "${{ env.ZONE }}" + gcloud compute instances delete "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet