refactor(ci): use distinctive names for cached state disks (#4073)

* fix(ci): correctly use lowered network caps In the Test workflow we were using a different approach than the one being used in the Full sync test. Also, in the Full sync test the variable was LOWER_NET_NAME, but NETWORK was being used in the disk name, with caps. * imp(ci): get state version from local constants.rs * imp(ci): use the same get name approach * fix(ci): use the correct name for state version variable * imp(ci)!: use different disk names for cached states Disk states synced to canopy and synced to the chain tip should have different names to reference correctly on actual and coming tests the needed disk. * imp(ci): test-stateful-sync no longer depends on regenerate-stateful-disks * Apply suggestions from code review Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com> * fix(ci): use a better name for network string conversion * Revert "Apply suggestions from code review" This reverts commit cbbfaf4e9c507c424db9ef3d9d31f6a52819b0bf. * fix: do not get log information if sync was skipped * fix(ci): do not lower the variable name * fix(ci): use the same lowering case for network everywhere * test: more .dockerignore conditions * fix: use the right approach to lower caps * remove extra .dockerignore * trigger a change for stateful disk regeneration * imp(ci): use `checkpoint` as the disk reference * revert wrong delete * fix(ci): add INSTANCE_ID and correct logging message * imp(ci): add `v` prefix to state version number * fix(ci): remove typo from logging message to get the height Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>
2022-04-12 01:34:15 -04:00 · 2022-04-12 01:34:15 -04:00 · 47c1c01fcf
parent 831a2009bd
commit 47c1c01fcf
4 changed files with 78 additions and 51 deletions
--- a/docker/.dockerignore
+++ b/docker/.dockerignore
--- a/.github/workflows/test-full-sync.yml
+++ b/.github/workflows/test-full-sync.yml
@ -143,6 +143,10 @@ jobs:
      contents: 'read'
      id-token: 'write'
    steps:
+      - uses: actions/checkout@v3.0.0
+        with:
+          persist-credentials: false
+
      - name: Inject slug/short variables
        uses: rlespinasse/github-slug-action@v4
        with:
@ -150,7 +154,9 @@ jobs:

      - name: Downcase network name for disks
        run: |
-          echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
+          NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
+          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
+
      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
        id: auth
@ -203,20 +209,9 @@ jobs:
              sleep 10
          done

+          echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV

-      - name: Get state version from logs
-        run: |
-          STATE_VERSION=""
-
-          while [[ ${STATE_VERSION} == "" ]]; do
-              STATE_VERSION=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+Opened Zebra state cache.+v[0-9]+.+")' | grep -oE "v[0-9]+" || [[ $? == 1 ]] )
-              echo "STATE_VERSION: $STATE_VERSION"
-              sleep 10
-          done
-
-          echo "STATE_VERSION=$STATE_VERSION" >> $GITHUB_ENV
-
      - name: Full sync
        id: full-sync
        run: |
@ -237,6 +232,15 @@ jobs:

          exit ${EXIT_CODE}

+      - name: Get state version from constants.rs
+        run: |
+          STATE_VERSION=""
+
+          LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
+          echo "STATE_VERSION: $LOCAL_STATE_VERSION"
+
+          echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
+
      - name: Get sync height from logs
        run: |
          SYNC_HEIGHT=""
@ -253,7 +257,7 @@ jobs:
      # Force the image creation as the disk is still attached, even though is not being used by the container
      - name: Create image from state disk
        run: |
-          gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }} \
+          gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-tip \
          --force \
          --source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
          --source-disk-zone=${{ env.ZONE }} \
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@ -226,6 +226,7 @@ jobs:
          persist-credentials: false
          fetch-depth: '2'

+      # TODO move the `changed-files-specific` step to the build job for a better dependency tree
      # Only run this job if the database format version has (likely) changed.
      #
      # If we have accidentally changed the format, but not changed the version,
@ -247,7 +248,8 @@ jobs:

      - name: Downcase network name for disks
        run: |
-          echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
+          NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
+          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
@ -277,8 +279,8 @@ jobs:
          gcloud compute instances create-with-container "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
          --boot-disk-size 100GB \
          --boot-disk-type pd-ssd \
-          --create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy",size=100GB,type=pd-ssd \
-          --container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy" \
+          --create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint",size=100GB,type=pd-ssd \
+          --container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint" \
          --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          --container-restart-policy=never \
          --container-stdin \
@ -288,10 +290,10 @@ jobs:
          --container-arg="--locked" \
          --container-arg="--release" \
          --container-arg="--features" \
-          --container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \
+          --container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \
          --container-arg="--manifest-path" \
          --container-arg="zebrad/Cargo.toml" \
-          --container-arg="sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \
+          --container-arg="sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \
          --container-env=ZEBRA_SKIP_IPV6_TESTS=1 \
          --machine-type ${{ env.MACHINE_TYPE }} \
          --scopes cloud-platform \
@ -309,14 +311,16 @@ jobs:
        run: |
          INSTANCE_ID=$(gcloud compute instances describe regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
          echo "Using instance: $INSTANCE_ID"
+
+          CONTAINER_NAME=""
          while [[ ${CONTAINER_NAME} != *"regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
              CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
              echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
              sleep 10
          done
-          CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
-          echo "::set-output name=zebra_container::$CONTAINER_NAME"
-          sleep 60
+
+          echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
+          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV

      - name: Regenerate stateful disks
        id: sync-to-checkpoint
@ -327,7 +331,7 @@ jobs:
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
-          --command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}"
+          --command="docker logs --follow ${{ env.CONTAINER_NAME }}"

          EXIT_CODE=$(\
          gcloud compute ssh \
@ -335,21 +339,42 @@ jobs:
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
-          --command="docker wait ${{ env.ZEBRA_CONTAINER }}")
+          --command="docker wait ${{ env.CONTAINER_NAME }}")

          exit ${EXIT_CODE}
-        env:
-          ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }}

-      # Create image from disk that will be used to sync past mandatory checkpoint test
+      - name: Get state version from constants.rs
+        if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
+        run: |
+          STATE_VERSION=""
+
+          LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
+          echo "STATE_VERSION: $LOCAL_STATE_VERSION"
+
+          echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
+
+      - name: Get sync height from logs
+        if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
+        run: |
+          SYNC_HEIGHT=""
+
+          while [[ ${SYNC_HEIGHT} == "" ]]; do
+              SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+flushing database to disk height.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] )
+              echo "SYNC_HEIGHT: $SYNC_HEIGHT"
+              sleep 10
+          done
+
+          echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
+
+      # Create image from disk that will be used for the sync past mandatory checkpoint test
      # Force the image creation as the disk is still attached even though is not being used by the container
      - name: Create image from state disk
        # Only run if the earlier step succeeds
-        if: steps.sync-to-checkpoint.outcome == 'success'
+        if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
        run: |
-          gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
+          gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-checkpoint \
          --force \
-          --source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
+          --source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \
          --source-disk-zone=${{ env.ZONE }} \
          --storage-location=us \
          --description="Created from head branch ${{ env.GITHUB_HEAD_REF_SLUG_URL }} targeting ${{ env.GITHUB_BASE_REF_SLUG }} from PR ${{ env.GITHUB_REF_SLUG_URL }} with commit ${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA }}"
@ -377,7 +402,8 @@ jobs:

      - name: Downcase network name for disks
        run: |
-          echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
+          NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
+          echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV

      # Setup gcloud CLI
      - name: Authenticate to Google Cloud
@ -391,7 +417,6 @@ jobs:
      # Check if our destination compute instance exists and delete it
      - name: Delete existing instance with same SHA
        id: delete-old-instance
-        if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
        run: |
          INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
          if [ -z "${INSTANCE}" ]; then
@ -402,12 +427,12 @@ jobs:

      - name: Get disk state name from gcloud
        id: get-disk-name
-        if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
        run: |
-          output=$(gcloud compute images list --filter="zebrad-cache" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
-          echo "Disk: $output"
-          echo "Description: $(gcloud compute images describe $output --format='value(DESCRIPTION)')"
-          echo "::set-output name=sha::$output"
+          GCP_STATE_DISK=$(gcloud compute images list --filter="name~zebrad-cache AND name~-checkpoint" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+          echo "Disk: $GCP_STATE_DISK"
+          echo "Description: $(gcloud compute images describe $GCP_STATE_DISK --format='value(DESCRIPTION)')"
+
+          echo "CACHED_DISK_NAME=$GCP_STATE_DISK" >> $GITHUB_ENV

      # Creates Compute Engine virtual machine instance w/ disks
      - name: Create GCP compute instance
@ -416,8 +441,8 @@ jobs:
          gcloud compute instances create-with-container "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
          --boot-disk-size 100GB \
          --boot-disk-type pd-ssd \
-          --create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy,size=100GB,type=pd-ssd \
-          --container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
+          --create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint,size=100GB,type=pd-ssd \
+          --container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \
          --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
          --container-restart-policy=never \
          --container-stdin \
@ -427,18 +452,16 @@ jobs:
          --container-arg="--locked" \
          --container-arg="--release" \
          --container-arg="--features" \
-          --container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \
+          --container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \
          --container-arg="--manifest-path" \
          --container-arg="zebrad/Cargo.toml" \
-          --container-arg="sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \
+          --container-arg="sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \
          --container-env=ZEBRA_SKIP_IPV6_TESTS=1 \
          --machine-type ${{ env.MACHINE_TYPE }} \
          --scopes cloud-platform \
          --metadata=google-monitoring-enabled=true,google-logging-enabled=true \
          --tags zebrad \
          --zone "${{ env.ZONE }}"
-        env:
-          CACHED_DISK_NAME: ${{ steps.get-disk-name.outputs.sha }}

      # TODO: this approach is very mesy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY
      # This TODO relates to the following issues:
@ -450,14 +473,16 @@ jobs:
        run: |
          INSTANCE_ID=$(gcloud compute instances describe sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
          echo "Using instance: $INSTANCE_ID"
+
+          CONTAINER_NAME=""
          while [[ ${CONTAINER_NAME} != *"sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
              CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
              echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
              sleep 10
          done
-          CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
-          echo "::set-output name=zebra_container::$CONTAINER_NAME"
-          sleep 60
+
+          echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
+          echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV

      - name: Sync past mandatory checkpoint
        id: sync-past-checkpoint
@ -467,7 +492,7 @@ jobs:
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
-          --command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}"
+          --command="docker logs --follow ${{ env.CONTAINER_NAME }}"

          EXIT_CODE=$(\
          gcloud compute ssh \
@ -475,11 +500,9 @@ jobs:
          --zone ${{ env.ZONE }} \
          --quiet \
          --ssh-flag="-o ServerAliveInterval=5" \
-          --command="docker wait ${{ env.ZEBRA_CONTAINER }}")
+          --command="docker wait ${{ env.CONTAINER_NAME }}")

          exit ${EXIT_CODE}
-        env:
-          ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }}

      - name: Delete test instance
        # Do not delete the instance if the sync timeouts in GitHub
--- a/zebra-state/src/constants.rs
+++ b/zebra-state/src/constants.rs
@ -12,7 +12,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
 ///
 /// For the best chain, coinbase spends are only allowed from blocks at or below
 /// the finalized tip. For other chains, coinbase spends can use outputs from
-/// early non-finalized blocks, or finalized blocks. But if that chain becomes
+/// early non-finalized blocks or finalized blocks. But if that chain becomes
 /// the best chain, all non-finalized blocks past the [`MAX_BLOCK_REORG_HEIGHT`]
 /// will be finalized. This includes all mature coinbase outputs.
 pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;