From 38206373b4acd68a6386b1fd4a2ad793d6500c86 Mon Sep 17 00:00:00 2001 From: teor Date: Thu, 8 Jun 2023 01:35:32 +1000 Subject: [PATCH] fix(ci): Add jobs to allow the full sync to finish before the first stable release (#6846) * Delete an unused CI job that was previously partially deleted * Add 2 more jobs to the full sync test * Increase Rust test time: current expected time is 60 hours --- .github/workflows/deploy-gcp-tests.yml | 192 ++++++++++++++++++------- zebrad/tests/common/sync.rs | 2 +- 2 files changed, 139 insertions(+), 55 deletions(-) diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml index 6386a791..6aaf754c 100644 --- a/.github/workflows/deploy-gcp-tests.yml +++ b/.github/workflows/deploy-gcp-tests.yml @@ -570,59 +570,6 @@ jobs: ${{ vars.GAR_BASE }}/${{ vars.CI_IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }} \ " - - # follow the logs of the test we just launched, up to Sapling activation (or the test finishing) - # - # If `inputs.is_long_test` is `false`, this job is skipped. - logs-sprout: - name: Log ${{ inputs.test_id }} test (sprout) - # We run exactly one of without-cached-state or with-cached-state, and we always skip the other one. - needs: [ launch-with-cached-state, launch-without-cached-state ] - # If the previous job fails, we still want to show the logs. - if: ${{ !cancelled() && inputs.is_long_test }} - runs-on: ubuntu-latest - permissions: - contents: 'read' - id-token: 'write' - steps: - - uses: actions/checkout@v3.5.2 - with: - persist-credentials: false - fetch-depth: '2' - # We can't use the standard Rust problem matchers on these jobs, - # because they produce a lot of output. - # - # TODO: create a custom matcher config for these specific jobs - #- uses: r7kamura/rust-problem-matchers@v1.3.0 - - - name: Inject slug/short variables - uses: rlespinasse/github-slug-action@v4 - with: - short-length: 7 - - # Install our SSH secret - - name: Install private SSH key - uses: shimataro/ssh-key-action@v2.5.1 - with: - key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} - name: google_compute_engine - known_hosts: unnecessary - - - name: Generate public SSH key - run: ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub - - # Setup gcloud CLI - - name: Authenticate to Google Cloud - id: auth - uses: google-github-actions/auth@v1.1.1 - with: - retries: '3' - workload_identity_provider: '${{ vars.GCP_WIF }}' - service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' - - - name: Set up Cloud SDK - uses: google-github-actions/setup-gcloud@v1.1.1 - # follow the logs of the test we just launched, up to Canopy activation (or the test finishing) # # If `inputs.is_long_test` is `false`, this job is skipped. @@ -1382,11 +1329,148 @@ jobs: -e 'test result:.*finished in' \ " + # follow the logs of the test we just launched, up to block 2,030,000 or later + # (or the test finishing) + # + # We chose this height because it was about 4.5 hours from the last job, in June 2023. + logs-2030k: + name: Log ${{ inputs.test_id }} test (2030k) + needs: [ logs-1960k ] + # If the previous job fails, we still want to show the logs. + if: ${{ !cancelled() && inputs.is_long_test }} + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - uses: actions/checkout@v3.5.2 + with: + persist-credentials: false + fetch-depth: '2' + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + with: + short-length: 7 + + # Install our SSH secret + - name: Install private SSH key + uses: shimataro/ssh-key-action@v2.5.1 + with: + key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} + name: google_compute_engine + known_hosts: unnecessary + + - name: Generate public SSH key + run: ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v1.1.1 + with: + retries: '3' + workload_identity_provider: '${{ vars.GCP_WIF }}' + service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v1.1.1 + + # Show recent logs, following until block 2,030,000 (or the test finishes) + - name: Show logs for ${{ inputs.test_id }} test (2030k) + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command \ + "\ + sudo docker logs \ + --tail all \ + --follow \ + ${{ inputs.test_id }} | \ + tee --output-error=exit /dev/stderr | \ + grep --max-count=1 --extended-regexp --color=always \ + -e 'estimated progress.*current_height.*=.*20[3-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \ + -e 'estimated progress.*current_height.*=.*2[1-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \ + -e 'estimated progress.*current_height.*=.*[3-9][0-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \ + -e 'test result:.*finished in' \ + " + + # follow the logs of the test we just launched, up to block 2,100,000 or later + # (or the test finishing) + # + # We chose this height because we guessed it was 4.5 hours from the last job, in June 2023. + logs-2100k: + name: Log ${{ inputs.test_id }} test (2100k) + needs: [ logs-2030k ] + # If the previous job fails, we still want to show the logs. + if: ${{ !cancelled() && inputs.is_long_test }} + runs-on: ubuntu-latest + permissions: + contents: 'read' + id-token: 'write' + steps: + - uses: actions/checkout@v3.5.2 + with: + persist-credentials: false + fetch-depth: '2' + + - name: Inject slug/short variables + uses: rlespinasse/github-slug-action@v4 + with: + short-length: 7 + + # Install our SSH secret + - name: Install private SSH key + uses: shimataro/ssh-key-action@v2.5.1 + with: + key: ${{ secrets.GCP_SSH_PRIVATE_KEY }} + name: google_compute_engine + known_hosts: unnecessary + + - name: Generate public SSH key + run: ssh-keygen -y -f ~/.ssh/google_compute_engine > ~/.ssh/google_compute_engine.pub + + # Setup gcloud CLI + - name: Authenticate to Google Cloud + id: auth + uses: google-github-actions/auth@v1.1.1 + with: + retries: '3' + workload_identity_provider: '${{ vars.GCP_WIF }}' + service_account: '${{ vars.GCP_DEPLOYMENTS_SA }}' + + - name: Set up Cloud SDK + uses: google-github-actions/setup-gcloud@v1.1.1 + + # Show recent logs, following until block 2,100,000 (or the test finishes) + - name: Show logs for ${{ inputs.test_id }} test (2100k) + run: | + gcloud compute ssh ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \ + --zone ${{ vars.GCP_ZONE }} \ + --ssh-flag="-o ServerAliveInterval=5" \ + --ssh-flag="-o ConnectionAttempts=20" \ + --ssh-flag="-o ConnectTimeout=5" \ + --command \ + "\ + sudo docker logs \ + --tail all \ + --follow \ + ${{ inputs.test_id }} | \ + tee --output-error=exit /dev/stderr | \ + grep --max-count=1 --extended-regexp --color=always \ + -e 'estimated progress.*current_height.*=.*2[1-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \ + -e 'estimated progress.*current_height.*=.*[3-9][0-9][0-9][0-9][0-9][0-9][0-9].*remaining_sync_blocks' \ + -e 'test result:.*finished in' \ + " + # follow the logs of the test we just launched, up to the last checkpoint, or the test finishing, # or for lightwalletd tests, about 5 hours into the full lightwalletd sync (block 1880k) logs-checkpoint: name: Log ${{ inputs.test_id }} test (checkpoint) - needs: [ logs-1960k ] + needs: [ logs-2100k ] # If the previous job fails, we still want to show the logs. if: ${{ !cancelled() && inputs.is_long_test }} runs-on: ubuntu-latest diff --git a/zebrad/tests/common/sync.rs b/zebrad/tests/common/sync.rs index ff483827..d7bc91d3 100644 --- a/zebrad/tests/common/sync.rs +++ b/zebrad/tests/common/sync.rs @@ -83,7 +83,7 @@ pub const FINISH_PARTIAL_SYNC_TIMEOUT: Duration = Duration::from_secs(11 * 60 * /// The maximum time to wait for Zebrad to synchronize up to the chain tip starting from the /// genesis block. -pub const FINISH_FULL_SYNC_TIMEOUT: Duration = Duration::from_secs(58 * 60 * 60); +pub const FINISH_FULL_SYNC_TIMEOUT: Duration = Duration::from_secs(72 * 60 * 60); /// The test sync height where we switch to using the default lookahead limit. ///