Zebra/.github/workflows/test-full-sync.yml

219 lines
9.1 KiB
YAML

name: Full sync test
on:
workflow_dispatch:
inputs:
network:
default: 'Mainnet'
description: 'Network to deploy: Mainnet or Testnet'
required: true
checkpoint_sync:
default: 'true'
description: 'Configures `zebrad` to use as many checkpoints as possible'
required: true
pull_request:
branches:
- main
paths:
# code and tests (including full sync acceptance test changes)
# TODO: ignore changes in test code that isn't used in the full sync test
- '**/*.rs'
# hard-coded checkpoints
# TODO: ignore changes to proptest seed .txt files
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# workflow definitions
- 'docker/**'
- '.github/workflows/test-full-sync.yml'
push:
branches:
- main
paths:
# code and tests (including full sync acceptance test changes)
# TODO: ignore changes in test code that isn't used in the full sync test
- '**/*.rs'
# hard-coded checkpoints
# TODO: ignore changes to proptest seed .txt files
- '**/*.txt'
# dependencies
- '**/Cargo.toml'
- '**/Cargo.lock'
# workflow definitions
- 'docker/**'
- '.github/workflows/test-full-sync.yml'
- '.github/workflows/docker-image-build.yml'
env:
NETWORK: Mainnet
PROJECT_ID: zealous-zebra
IMAGE_NAME: zebrad-test
GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra
REGION: us-central1
ZONE: us-central1-a
MACHINE_TYPE: c2d-standard-16
jobs:
build:
# TODO add `startsWith(github.head_ref, 'mergify/merge-queue/')` to the condition to
# only run on Mergify head branches, and on manual dispatch:
# https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1
if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }}
uses: ./.github/workflows/docker-image-build.yml
with:
dockerfile_path: ./docker/Dockerfile
dockerfile_target: tester
image_name: zebrad-test
network: Mainnet
checkpoint_sync: true
rust_backtrace: full
rust_lib_backtrace: full
colorbt_show_hidden: '1'
zebra_skip_ipv6_tests: '1'
rust_log: info
# Test that Zebra can run a full mainnet sync after a PR is approved
test-full-sync:
name: Test full Mainnet sync
runs-on: ubuntu-latest
needs: [build]
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3.0.2
with:
persist-credentials: false
- name: Inject slug/short variables
uses: rlespinasse/github-slug-action@v4
with:
short-length: 7
- name: Downcase network name for disks
run: |
NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }}
echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV
# Setup gcloud CLI
- name: Authenticate to Google Cloud
id: auth
uses: google-github-actions/auth@v0.7.1
with:
workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc'
service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com'
token_format: 'access_token'
# Check if our destination compute instance exists and delete it
- name: Delete existing instance with same SHA
run: |
INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)')
if [ -z "${INSTANCE}" ]; then
echo "No instance to delete"
else
gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet
fi
# Creates Compute Engine virtual machine instance w/ disks
- name: Create GCP compute instance
run: |
gcloud compute instances create-with-container "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \
--boot-disk-size 100GB \
--boot-disk-type pd-ssd \
--container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \
--container-restart-policy=never \
--container-stdin \
--container-tty \
--container-env=TEST_FULL_SYNC=1,ZEBRA_FORCE_USE_COLOR=1,FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 \
--machine-type ${{ env.MACHINE_TYPE }} \
--scopes cloud-platform \
--metadata=google-monitoring-enabled=true,google-logging-enabled=true \
--tags zebrad \
--zone "${{ env.ZONE }}"
# TODO: this approach is very messy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY
# This TODO relates to the following issues:
# https://github.com/actions/runner/issues/241
# https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915
#
# Deploying a zebra container might take more than 30 seconds to completely start, so we're adding a timer at the end
# of this step before starting the following ones
- name: Get container name from logs
run: |
INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)')
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
CONTAINER_NAME=""
while [[ ${CONTAINER_NAME} != *"full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do
CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.")
echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}"
sleep 10
done
echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV
echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV
sleep 90
- name: Full sync
id: full-sync
run: |
for RETRY in 1 2 3 4; do
gcloud compute ssh \
full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=15" \
--command="docker logs --follow ${{ env.CONTAINER_NAME }}" \
|| echo "ssh disconnected $RETRY times"
done
EXIT_CODE=$(\
gcloud compute ssh \
full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--zone ${{ env.ZONE }} \
--quiet \
--ssh-flag="-o ServerAliveInterval=5" \
--command="docker wait ${{ env.CONTAINER_NAME }}")
exit ${EXIT_CODE}
- name: Get state version from constants.rs
run: |
STATE_VERSION=""
LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1)
echo "STATE_VERSION: $LOCAL_STATE_VERSION"
echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV
- name: Get sync height from logs
run: |
SYNC_HEIGHT=""
while [[ ${SYNC_HEIGHT} == "" ]]; do
SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+finished initial sync to chain tip.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] )
echo "SYNC_HEIGHT: $SYNC_HEIGHT"
sleep 10
done
echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV
# Create image from disk
# Force the image creation as the disk is still attached, even though is not being used by the container
- name: Create image from state disk
run: |
gcloud compute images create zebrad-cache-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-tip \
--force \
--source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
--source-disk-zone=${{ env.ZONE }} \
--storage-location=us \
--description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
- name: Delete test instance
# Do not delete the instance if the sync timeouts in GitHub
if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }}
continue-on-error: true
run: |
gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet