From 47c1c01fcf48333cf9fc1753f50588f72608651c Mon Sep 17 00:00:00 2001 From: Gustavo Valverde Date: Tue, 12 Apr 2022 01:34:15 -0400 Subject: [PATCH] refactor(ci): use distinctive names for cached state disks (#4073) * fix(ci): correctly use lowered network caps In the Test workflow we were using a different approach than the one being used in the Full sync test. Also, in the Full sync test the variable was LOWER_NET_NAME, but NETWORK was being used in the disk name, with caps. * imp(ci): get state version from local constants.rs * imp(ci): use the same get name approach * fix(ci): use the correct name for state version variable * imp(ci)!: use different disk names for cached states Disk states synced to canopy and synced to the chain tip should have different names to reference correctly on actual and coming tests the needed disk. * imp(ci): test-stateful-sync no longer depends on regenerate-stateful-disks * Apply suggestions from code review Co-authored-by: Deirdre Connolly * fix(ci): use a better name for network string conversion * Revert "Apply suggestions from code review" This reverts commit cbbfaf4e9c507c424db9ef3d9d31f6a52819b0bf. * fix: do not get log information if sync was skipped * fix(ci): do not lower the variable name * fix(ci): use the same lowering case for network everywhere * test: more .dockerignore conditions * fix: use the right approach to lower caps * remove extra .dockerignore * trigger a change for stateful disk regeneration * imp(ci): use `checkpoint` as the disk reference * revert wrong delete * fix(ci): add INSTANCE_ID and correct logging message * imp(ci): add `v` prefix to state version number * fix(ci): remove typo from logging message to get the height Co-authored-by: Deirdre Connolly --- docker/.dockerignore => .dockerignore | 0 .github/workflows/test-full-sync.yml | 32 +++++---- .github/workflows/test.yml | 95 +++++++++++++++++---------- zebra-state/src/constants.rs | 2 +- 4 files changed, 78 insertions(+), 51 deletions(-) rename docker/.dockerignore => .dockerignore (100%) diff --git a/docker/.dockerignore b/.dockerignore similarity index 100% rename from docker/.dockerignore rename to .dockerignore diff --git a/.github/workflows/test-full-sync.yml b/.github/workflows/test-full-sync.yml index 8c301c8c..5aeeeb2f 100644 --- a/.github/workflows/test-full-sync.yml +++ b/.github/workflows/test-full-sync.yml @@ -143,6 +143,10 @@ jobs: contents: 'read' id-token: 'write' steps: + - uses: actions/checkout@v3.0.0 + with: + persist-credentials: false + - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4 with: @@ -150,7 +154,9 @@ jobs: - name: Downcase network name for disks run: | - echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV + NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }} + echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV + # Setup gcloud CLI - name: Authenticate to Google Cloud id: auth @@ -203,20 +209,9 @@ jobs: sleep 10 done + echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV - - name: Get state version from logs - run: | - STATE_VERSION="" - - while [[ ${STATE_VERSION} == "" ]]; do - STATE_VERSION=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+Opened Zebra state cache.+v[0-9]+.+")' | grep -oE "v[0-9]+" || [[ $? == 1 ]] ) - echo "STATE_VERSION: $STATE_VERSION" - sleep 10 - done - - echo "STATE_VERSION=$STATE_VERSION" >> $GITHUB_ENV - - name: Full sync id: full-sync run: | @@ -237,6 +232,15 @@ jobs: exit ${EXIT_CODE} + - name: Get state version from constants.rs + run: | + STATE_VERSION="" + + LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1) + echo "STATE_VERSION: $LOCAL_STATE_VERSION" + + echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV + - name: Get sync height from logs run: | SYNC_HEIGHT="" @@ -253,7 +257,7 @@ jobs: # Force the image creation as the disk is still attached, even though is not being used by the container - name: Create image from state disk run: | - gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }} \ + gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-tip \ --force \ --source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --source-disk-zone=${{ env.ZONE }} \ diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index baf23a3f..283cc249 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -226,6 +226,7 @@ jobs: persist-credentials: false fetch-depth: '2' + # TODO move the `changed-files-specific` step to the build job for a better dependency tree # Only run this job if the database format version has (likely) changed. # # If we have accidentally changed the format, but not changed the version, @@ -247,7 +248,8 @@ jobs: - name: Downcase network name for disks run: | - echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV + NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }} + echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV # Setup gcloud CLI - name: Authenticate to Google Cloud @@ -277,8 +279,8 @@ jobs: gcloud compute instances create-with-container "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --boot-disk-size 100GB \ --boot-disk-type pd-ssd \ - --create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy",size=100GB,type=pd-ssd \ - --container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy" \ + --create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint",size=100GB,type=pd-ssd \ + --container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint" \ --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ --container-restart-policy=never \ --container-stdin \ @@ -288,10 +290,10 @@ jobs: --container-arg="--locked" \ --container-arg="--release" \ --container-arg="--features" \ - --container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \ + --container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \ --container-arg="--manifest-path" \ --container-arg="zebrad/Cargo.toml" \ - --container-arg="sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \ + --container-arg="sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \ --container-env=ZEBRA_SKIP_IPV6_TESTS=1 \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ @@ -309,14 +311,16 @@ jobs: run: | INSTANCE_ID=$(gcloud compute instances describe regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "Using instance: $INSTANCE_ID" + + CONTAINER_NAME="" while [[ ${CONTAINER_NAME} != *"regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}" sleep 10 done - CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") - echo "::set-output name=zebra_container::$CONTAINER_NAME" - sleep 60 + + echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV + echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV - name: Regenerate stateful disks id: sync-to-checkpoint @@ -327,7 +331,7 @@ jobs: --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=5" \ - --command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}" + --command="docker logs --follow ${{ env.CONTAINER_NAME }}" EXIT_CODE=$(\ gcloud compute ssh \ @@ -335,21 +339,42 @@ jobs: --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=5" \ - --command="docker wait ${{ env.ZEBRA_CONTAINER }}") + --command="docker wait ${{ env.CONTAINER_NAME }}") exit ${EXIT_CODE} - env: - ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }} - # Create image from disk that will be used to sync past mandatory checkpoint test + - name: Get state version from constants.rs + if: ${{ steps.sync-to-checkpoint.outcome == 'success' }} + run: | + STATE_VERSION="" + + LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1) + echo "STATE_VERSION: $LOCAL_STATE_VERSION" + + echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV + + - name: Get sync height from logs + if: ${{ steps.sync-to-checkpoint.outcome == 'success' }} + run: | + SYNC_HEIGHT="" + + while [[ ${SYNC_HEIGHT} == "" ]]; do + SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+flushing database to disk height.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] ) + echo "SYNC_HEIGHT: $SYNC_HEIGHT" + sleep 10 + done + + echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV + + # Create image from disk that will be used for the sync past mandatory checkpoint test # Force the image creation as the disk is still attached even though is not being used by the container - name: Create image from state disk # Only run if the earlier step succeeds - if: steps.sync-to-checkpoint.outcome == 'success' + if: ${{ steps.sync-to-checkpoint.outcome == 'success' }} run: | - gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \ + gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-checkpoint \ --force \ - --source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \ + --source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \ --source-disk-zone=${{ env.ZONE }} \ --storage-location=us \ --description="Created from head branch ${{ env.GITHUB_HEAD_REF_SLUG_URL }} targeting ${{ env.GITHUB_BASE_REF_SLUG }} from PR ${{ env.GITHUB_REF_SLUG_URL }} with commit ${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA }}" @@ -377,7 +402,8 @@ jobs: - name: Downcase network name for disks run: | - echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV + NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }} + echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV # Setup gcloud CLI - name: Authenticate to Google Cloud @@ -391,7 +417,6 @@ jobs: # Check if our destination compute instance exists and delete it - name: Delete existing instance with same SHA id: delete-old-instance - if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}} run: | INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') if [ -z "${INSTANCE}" ]; then @@ -402,12 +427,12 @@ jobs: - name: Get disk state name from gcloud id: get-disk-name - if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}} run: | - output=$(gcloud compute images list --filter="zebrad-cache" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) - echo "Disk: $output" - echo "Description: $(gcloud compute images describe $output --format='value(DESCRIPTION)')" - echo "::set-output name=sha::$output" + GCP_STATE_DISK=$(gcloud compute images list --filter="name~zebrad-cache AND name~-checkpoint" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1) + echo "Disk: $GCP_STATE_DISK" + echo "Description: $(gcloud compute images describe $GCP_STATE_DISK --format='value(DESCRIPTION)')" + + echo "CACHED_DISK_NAME=$GCP_STATE_DISK" >> $GITHUB_ENV # Creates Compute Engine virtual machine instance w/ disks - name: Create GCP compute instance @@ -416,8 +441,8 @@ jobs: gcloud compute instances create-with-container "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --boot-disk-size 100GB \ --boot-disk-type pd-ssd \ - --create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy,size=100GB,type=pd-ssd \ - --container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \ + --create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint,size=100GB,type=pd-ssd \ + --container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \ --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ --container-restart-policy=never \ --container-stdin \ @@ -427,18 +452,16 @@ jobs: --container-arg="--locked" \ --container-arg="--release" \ --container-arg="--features" \ - --container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \ + --container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \ --container-arg="--manifest-path" \ --container-arg="zebrad/Cargo.toml" \ - --container-arg="sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \ + --container-arg="sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \ --container-env=ZEBRA_SKIP_IPV6_TESTS=1 \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ --metadata=google-monitoring-enabled=true,google-logging-enabled=true \ --tags zebrad \ --zone "${{ env.ZONE }}" - env: - CACHED_DISK_NAME: ${{ steps.get-disk-name.outputs.sha }} # TODO: this approach is very mesy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY # This TODO relates to the following issues: @@ -450,14 +473,16 @@ jobs: run: | INSTANCE_ID=$(gcloud compute instances describe sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "Using instance: $INSTANCE_ID" + + CONTAINER_NAME="" while [[ ${CONTAINER_NAME} != *"sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}" sleep 10 done - CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") - echo "::set-output name=zebra_container::$CONTAINER_NAME" - sleep 60 + + echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV + echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV - name: Sync past mandatory checkpoint id: sync-past-checkpoint @@ -467,7 +492,7 @@ jobs: --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=5" \ - --command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}" + --command="docker logs --follow ${{ env.CONTAINER_NAME }}" EXIT_CODE=$(\ gcloud compute ssh \ @@ -475,11 +500,9 @@ jobs: --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=5" \ - --command="docker wait ${{ env.ZEBRA_CONTAINER }}") + --command="docker wait ${{ env.CONTAINER_NAME }}") exit ${EXIT_CODE} - env: - ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }} - name: Delete test instance # Do not delete the instance if the sync timeouts in GitHub diff --git a/zebra-state/src/constants.rs b/zebra-state/src/constants.rs index 5a44e0bb..4144d25b 100644 --- a/zebra-state/src/constants.rs +++ b/zebra-state/src/constants.rs @@ -12,7 +12,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY; /// /// For the best chain, coinbase spends are only allowed from blocks at or below /// the finalized tip. For other chains, coinbase spends can use outputs from -/// early non-finalized blocks, or finalized blocks. But if that chain becomes +/// early non-finalized blocks or finalized blocks. But if that chain becomes /// the best chain, all non-finalized blocks past the [`MAX_BLOCK_REORG_HEIGHT`] /// will be finalized. This includes all mature coinbase outputs. pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;