name: Full sync test on: workflow_dispatch: inputs: network: default: 'Mainnet' description: 'Network to deploy: Mainnet or Testnet' required: true checkpoint_sync: default: 'true' description: 'Configures `zebrad` to use as many checkpoints as possible' required: true pull_request: branches: - main paths: # code and tests (including full sync acceptance test changes) # TODO: ignore changes in test code that isn't used in the full sync test - '**/*.rs' # hard-coded checkpoints # TODO: ignore changes to proptest seed .txt files - '**/*.txt' # dependencies - '**/Cargo.toml' - '**/Cargo.lock' # workflow definitions - 'docker/**' - '.github/workflows/test-full-sync.yml' push: branches: - main paths: # code and tests (including full sync acceptance test changes) # TODO: ignore changes in test code that isn't used in the full sync test - '**/*.rs' # hard-coded checkpoints # TODO: ignore changes to proptest seed .txt files - '**/*.txt' # dependencies - '**/Cargo.toml' - '**/Cargo.lock' # workflow definitions - 'docker/**' - '.github/workflows/test-full-sync.yml' - '.github/workflows/docker-image-build.yml' env: NETWORK: Mainnet PROJECT_ID: zealous-zebra IMAGE_NAME: zebrad-test GAR_BASE: us-docker.pkg.dev/zealous-zebra/zebra REGION: us-central1 ZONE: us-central1-a MACHINE_TYPE: c2d-standard-16 jobs: build: # TODO add `startsWith(github.head_ref, 'mergify/merge-queue/')` to the condition to # only run on Mergify head branches, and on manual dispatch: # https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#running-your-workflow-based-on-the-head-or-base-branch-of-a-pull-request-1 if: ${{ github.event_name == 'push' || github.event_name == 'workflow_dispatch' }} uses: ./.github/workflows/docker-image-build.yml with: dockerfile_path: ./docker/Dockerfile dockerfile_target: tester image_name: zebrad-test network: Mainnet checkpoint_sync: true rust_backtrace: full rust_lib_backtrace: full colorbt_show_hidden: '1' zebra_skip_ipv6_tests: '1' rust_log: info # Test that Zebra can run a full mainnet sync after a PR is approved test-full-sync: name: Test full Mainnet sync runs-on: ubuntu-latest needs: [build] permissions: contents: 'read' id-token: 'write' steps: - uses: actions/checkout@v3.0.2 with: persist-credentials: false - name: Inject slug/short variables uses: rlespinasse/github-slug-action@v4 with: short-length: 7 - name: Downcase network name for disks run: | NETWORK_CAPS=${{ github.event.inputs.network || env.NETWORK }} echo "NETWORK=${NETWORK_CAPS,,}" >> $GITHUB_ENV # Setup gcloud CLI - name: Authenticate to Google Cloud id: auth uses: google-github-actions/auth@v0.7.1 with: workload_identity_provider: 'projects/143793276228/locations/global/workloadIdentityPools/github-actions/providers/github-oidc' service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' # Check if our destination compute instance exists and delete it - name: Delete existing instance with same SHA run: | INSTANCE=$(gcloud compute instances list --filter=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --format='value(NAME)') if [ -z "${INSTANCE}" ]; then echo "No instance to delete" else gcloud compute instances delete "${INSTANCE}" --zone "${{ env.ZONE }}" --delete-disks all --quiet fi # Creates Compute Engine virtual machine instance w/ disks - name: Create GCP compute instance run: | gcloud compute instances create-with-container "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" \ --boot-disk-size 100GB \ --boot-disk-type pd-ssd \ --container-image ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ --container-restart-policy=never \ --container-stdin \ --container-tty \ --container-env=TEST_FULL_SYNC=1,ZEBRA_FORCE_USE_COLOR=1,FULL_SYNC_MAINNET_TIMEOUT_MINUTES=600 \ --machine-type ${{ env.MACHINE_TYPE }} \ --scopes cloud-platform \ --metadata=google-monitoring-enabled=true,google-logging-enabled=true \ --tags zebrad \ --zone "${{ env.ZONE }}" # TODO: this approach is very messy, but getting the just created container name is very error prone and GCP doesn't have a workaround for this without requiring a TTY # This TODO relates to the following issues: # https://github.com/actions/runner/issues/241 # https://www.googlecloudcommunity.com/gc/Infrastructure-Compute-Storage/SSH-into-Compute-Container-not-easily-possible/td-p/170915 # # Deploying a zebra container might take more than 30 seconds to completely start, so we're adding a timer at the end # of this step before starting the following ones - name: Get container name from logs run: | INSTANCE_ID=$(gcloud compute instances describe full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} --zone ${{ env.ZONE }} --format='value(id)') echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV CONTAINER_NAME="" while [[ ${CONTAINER_NAME} != *"full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}"* ]]; do CONTAINER_NAME=$(gcloud logging read 'log_name=projects/${{ env.PROJECT_ID }}/logs/cos_system AND jsonPayload.MESSAGE:full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}' --format='value(jsonPayload.MESSAGE)' --limit=1 | grep -o '...-full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-....' | tr -d "'.") echo "Using container: ${CONTAINER_NAME} from instance: ${INSTANCE_ID}" sleep 10 done echo "INSTANCE_ID=$INSTANCE_ID" >> $GITHUB_ENV echo "CONTAINER_NAME=$CONTAINER_NAME" >> $GITHUB_ENV sleep 90 - name: Full sync id: full-sync run: | for RETRY in 1 2 3 4; do gcloud compute ssh \ full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=15" \ --command="docker logs --follow ${{ env.CONTAINER_NAME }}" \ || echo "ssh disconnected $RETRY times" done EXIT_CODE=$(\ gcloud compute ssh \ full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --zone ${{ env.ZONE }} \ --quiet \ --ssh-flag="-o ServerAliveInterval=5" \ --command="docker wait ${{ env.CONTAINER_NAME }}") exit ${EXIT_CODE} - name: Get state version from constants.rs run: | STATE_VERSION="" LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" $GITHUB_WORKSPACE/zebra-state/src/constants.rs | grep -oE "[0-9]+" | tail -n1) echo "STATE_VERSION: $LOCAL_STATE_VERSION" echo "STATE_VERSION=$LOCAL_STATE_VERSION" >> $GITHUB_ENV - name: Get sync height from logs run: | SYNC_HEIGHT="" while [[ ${SYNC_HEIGHT} == "" ]]; do SYNC_HEIGHT=$(gcloud logging read --format='value(jsonPayload.MESSAGE)' --order="desc" --limit=1 '(resource.labels.instance_id="${{ env.INSTANCE_ID }}" AND jsonPayload.message=~".+finished initial sync to chain tip.+Height\([0-9]+\).+")' | grep -oE 'Height\([0-9]+\)' | grep -oE '[0-9]+' || [[ $? == 1 ]] ) echo "SYNC_HEIGHT: $SYNC_HEIGHT" sleep 10 done echo "SYNC_HEIGHT=$SYNC_HEIGHT" >> $GITHUB_ENV # Create image from disk # Force the image creation as the disk is still attached, even though is not being used by the container - name: Create image from state disk run: | gcloud compute images create zebrad-cache-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-tip \ --force \ --source-disk=full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ --source-disk-zone=${{ env.ZONE }} \ --storage-location=us \ --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}" - name: Delete test instance # Do not delete the instance if the sync timeouts in GitHub if: ${{ steps.full-sync.outcome == 'success' || steps.full-sync.outcome == 'failure' }} continue-on-error: true run: | gcloud compute instances delete "full-sync-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}" --zone "${{ env.ZONE }}" --delete-disks all --quiet