refactor(ci): use distinctive names for cached state disks (#4073)

* fix(ci): correctly use lowered network caps

In the Test workflow we were using a different approach than the one being used in the Full sync test.

Also, in the Full sync test the variable was LOWER_NET_NAME, but NETWORK was being used in the disk name, with caps.

* imp(ci): get state version from local constants.rs

* imp(ci): use the same get name approach

* fix(ci): use the correct name for state version variable

* imp(ci)!: use different disk names for cached states

Disk states synced to canopy and synced to the chain tip should have different names to reference correctly on actual and coming tests the needed disk.

* imp(ci): test-stateful-sync no longer depends on regenerate-stateful-disks

* Apply suggestions from code review

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>

* fix(ci): use a better name for network string conversion

* Revert "Apply suggestions from code review"

This reverts commit cbbfaf4e9c507c424db9ef3d9d31f6a52819b0bf.

* fix: do not get log information if sync was skipped

* fix(ci): do not lower the variable name

* fix(ci): use the same lowering case for network everywhere

* test: more .dockerignore conditions

* fix: use the right approach to lower caps

* remove extra .dockerignore

* trigger a change for stateful disk regeneration

* imp(ci): use `checkpoint` as the disk reference

* revert wrong delete

* fix(ci): add INSTANCE_ID and correct logging message

* imp(ci): add `v` prefix to state version number

* fix(ci): remove typo from logging message to get the height

Co-authored-by: Deirdre Connolly <durumcrustulum@gmail.com>
This commit is contained in:
Gustavo Valverde 2022-04-12 01:34:15 -04:00 committed by GitHub
parent 831a2009bd
commit 47c1c01fcf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 78 additions and 51 deletions

View File

@ -143,6 +143,10 @@ jobs:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.0
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
@ -150,7 +154,9 @@ jobs:
- name: Downcase network name for disks
run: |
echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
@ -203,20 +209,9 @@ jobs:
sleep 10
done
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
- name: Get state version from logs
run: |
STATE_VERSION=""
while [[ ${STATE_VERSION} == "" ]]; do
STATE_VERSION=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+Opened Zebra state cache.+v[0-9]+.+")' | grep -oE "v[0-9]+" || [[ $? == 1 ]] )
echo "STATE_VERSION: $STATE_VERSION"
sleep 10
done
echo "STATE_VERSION=$STATE_VERSION" >> $GITHUB_ENV
- name: Full sync
id: full-sync
run: |
@ -237,6 +232,15 @@ jobs:
exit ${EXIT_CODE}
- name: Get state version from constants.rs
run: |
STATE_VERSION=""
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
- name: Get sync height from logs
run: |
SYNC_HEIGHT=""
@ -253,7 +257,7 @@ jobs:
# Force the image creation as the disk is still attached, even though is not being used by the container
- name: Create image from state disk
run: |
gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }} \
gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-tip \
--force \
--source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--source-disk-zone=${{ env.ZONE }} \

View File

@ -226,6 +226,7 @@ jobs:
persist-credentials: false
fetch-depth: '2'
# TODO move the `changed-files-specific` step to the build job for a better dependency tree
# Only run this job if the database format version has (likely) changed.
#
# If we have accidentally changed the format, but not changed the version,
@ -247,7 +248,8 @@ jobs:
- name: Downcase network name for disks
run: |
echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
@ -277,8 +279,8 @@ jobs:
gcloud compute instances create-with-container "regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy",size=100GB,type=pd-ssd \
--container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy" \
--create-disk name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint",size=100GB,type=pd-ssd \
--container-mount-disk mount-path='/zebrad-cache',name="zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint" \
--container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
--container-restart-policy=never \
--container-stdin \
@ -288,10 +290,10 @@ jobs:
--container-arg="--locked" \
--container-arg="--release" \
--container-arg="--features" \
--container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \
--container-arg="enable-sentry,test_sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \
--container-arg="--manifest-path" \
--container-arg="zebrad/Cargo.toml" \
--container-arg="sync_to_mandatory_checkpoint_${{ env.lower_net_name }}" \
--container-arg="sync_to_mandatory_checkpoint_${{ env.NETWORK }}" \
--container-env=ZEBRA_SKIP_IPV6_TESTS=1 \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
@ -309,14 +311,16 @@ jobs:
run: |
INSTANCE_ID=$(gcloud compute instances describe regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "Using instance: $INSTANCE_ID"
CONTAINER_NAME=""
while [[ ${CONTAINER_NAME} != *"regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
sleep 10
done
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-regenerate-disk-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "::set-output name=zebra_container::$CONTAINER_NAME"
sleep 60
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
- name: Regenerate stateful disks
id: sync-to-checkpoint
@ -327,7 +331,7 @@ jobs:
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}"
--command="docker logs --follow ${{ env.CONTAINER_NAME }}"
EXIT_CODE=$(\
gcloud compute ssh \
@ -335,21 +339,42 @@ jobs:
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker wait ${{ env.ZEBRA_CONTAINER }}")
--command="docker wait ${{ env.CONTAINER_NAME }}")
exit ${EXIT_CODE}
env:
ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }}
# Create image from disk that will be used to sync past mandatory checkpoint test
- name: Get state version from constants.rs
if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
run: |
STATE_VERSION=""
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
- name: Get sync height from logs
if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
run: |
SYNC_HEIGHT=""
while [[ ${SYNC_HEIGHT} == "" ]]; do
SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+flushing database to disk height.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] )
echo "SYNC_HEIGHT: $SYNC_HEIGHT"
sleep 10
done
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
# Create image from disk that will be used for the sync past mandatory checkpoint test
# Force the image creation as the disk is still attached even though is not being used by the container
- name: Create image from state disk
# Only run if the earlier step succeeds
if: steps.sync-to-checkpoint.outcome == 'success'
if: ${{ steps.sync-to-checkpoint.outcome == 'success' }}
run: |
gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
gcloud compute images create zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ env.SYNC_HEIGHT }}-checkpoint \
--force \
--source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
--source-disk=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \
--source-disk-zone=${{ env.ZONE }} \
--storage-location=us \
--description="Created from head branch ${{ env.GITHUB_HEAD_REF_SLUG_URL }} targeting ${{ env.GITHUB_BASE_REF_SLUG }} from PR ${{ env.GITHUB_REF_SLUG_URL }} with commit ${{ env.GITHUB_EVENT_PULL_REQUEST_HEAD_SHA }}"
@ -377,7 +402,8 @@ jobs:
- name: Downcase network name for disks
run: |
echo LOWER_NET_NAME="${{ github.event.inputs.network || env.NETWORK }}" | awk '{print tolower($0)}' >> $GITHUB_ENV
NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
@ -391,7 +417,6 @@ jobs:
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
id: delete-old-instance
if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
run: |
INSTANCE=$(gcloud compute instances list --filter=sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
@ -402,12 +427,12 @@ jobs:
- name: Get disk state name from gcloud
id: get-disk-name
if: ${{ needs.regenerate-stateful-disks.outputs.any_changed != 'true' || github.event.inputs.regenerate-disks != 'true'}}
run: |
output=$(gcloud compute images list --filter="zebrad-cache" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
echo "Disk: $output"
echo "Description: $(gcloud compute images describe $output --format='value(DESCRIPTION)')"
echo "::set-output name=sha::$output"
GCP_STATE_DISK=$(gcloud compute images list --filter="name~zebrad-cache AND name~-checkpoint" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
echo "Disk: $GCP_STATE_DISK"
echo "Description: $(gcloud compute images describe $GCP_STATE_DISK --format='value(DESCRIPTION)')"
echo "CACHED_DISK_NAME=$GCP_STATE_DISK" >> $GITHUB_ENV
# Creates Compute Engine virtual machine instance w/ disks
- name: Create GCP compute instance
@ -416,8 +441,8 @@ jobs:
gcloud compute instances create-with-container "sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy,size=100GB,type=pd-ssd \
--container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.lower_net_name }}-canopy \
--create-disk=image=${{ env.CACHED_DISK_NAME }},name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint,size=100GB,type=pd-ssd \
--container-mount-disk=mount-path='/zebrad-cache',name=zebrad-cache-${{ env.GITHUB_SHA_SHORT }}-${{ env.NETWORK }}-checkpoint \
--container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
--container-restart-policy=never \
--container-stdin \
@ -427,18 +452,16 @@ jobs:
--container-arg="--locked" \
--container-arg="--release" \
--container-arg="--features" \
--container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \
--container-arg="enable-sentry,test_sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \
--container-arg="--manifest-path" \
--container-arg="zebrad/Cargo.toml" \
--container-arg="sync_past_mandatory_checkpoint_${{ env.lower_net_name }}" \
--container-arg="sync_past_mandatory_checkpoint_${{ env.NETWORK }}" \
--container-env=ZEBRA_SKIP_IPV6_TESTS=1 \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
--tags zebrad \
--zone "${{ env.ZONE }}"
env:
CACHED_DISK_NAME: ${{ steps.get-disk-name.outputs.sha }}
# TODO: this approach is very mesy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY
# This TODO relates to the following issues:
@ -450,14 +473,16 @@ jobs:
run: |
INSTANCE_ID=$(gcloud compute instances describe sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "Using instance: $INSTANCE_ID"
CONTAINER_NAME=""
while [[ ${CONTAINER_NAME} != *"sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
sleep 10
done
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-sync-checkpoint-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "::set-output name=zebra_container::$CONTAINER_NAME"
sleep 60
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
- name: Sync past mandatory checkpoint
id: sync-past-checkpoint
@ -467,7 +492,7 @@ jobs:
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker logs --follow ${{ env.ZEBRA_CONTAINER }}"
--command="docker logs --follow ${{ env.CONTAINER_NAME }}"
EXIT_CODE=$(\
gcloud compute ssh \
@ -475,11 +500,9 @@ jobs:
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker wait ${{ env.ZEBRA_CONTAINER }}")
--command="docker wait ${{ env.CONTAINER_NAME }}")
exit ${EXIT_CODE}
env:
ZEBRA_CONTAINER: ${{ steps.get-container-name.outputs.zebra_container }}
- name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub

View File

@ -12,7 +12,7 @@ pub use zebra_chain::transparent::MIN_TRANSPARENT_COINBASE_MATURITY;
///
/// For the best chain, coinbase spends are only allowed from blocks at or below
/// the finalized tip. For other chains, coinbase spends can use outputs from
/// early non-finalized blocks, or finalized blocks. But if that chain becomes
/// early non-finalized blocks or finalized blocks. But if that chain becomes
/// the best chain, all non-finalized blocks past the [`MAX_BLOCK_REORG_HEIGHT`]
/// will be finalized. This includes all mature coinbase outputs.
pub const MAX_BLOCK_REORG_HEIGHT: u32 = MIN_TRANSPARENT_COINBASE_MATURITY - 1;