From e3a65d86e0be27c3ba897d8f15a84533de128b9a Mon Sep 17 00:00:00 2001
From: Gustavo Valverde <gustavo@iterativo.do>
Date: Fri, 13 May 2022 11:20:17 -0400
Subject: [PATCH] feat(ci): add `lightwalletd_full_sync` test to CI (#4268)

* Temporarily use an earlier lightwalletd version

This checks if commit
https://github.com/adityapk00/lightwalletd/commit/e146dbf5c2860e535fdbe2ba5ab20c4744d7c941
contains a mempool refresh deadlock bug.

* Actually rebuild the lightwalletd image

* Delete an unfinished comment

* Remove duplicate test in entrypoint.sh

* Keep a recent change to make tests consistent

* fix(ci): remove not used variable `lwd_state_dir`

* fix(ci): state wast not being added to the image name

* fix(ci): mount a docker volume with lightwalletd dir

If the volume doesn't mount this lwd cached state dir, the content won't be saved to the mounted disk in the VM

* fix(ci): lwd state condition

* docs(ci): explain disk mounting logic

* docs(ci): explain disk mounting decision better

* docs(ci): add a description for confusing input names

Co-authored-by: teor <teor@riseup.net>
---
 .../continous-integration-docker.yml          | 20 +++-
 .github/workflows/deploy-gcp-tests.yml        | 98 +++++++++++++++++--
 .github/workflows/zcash-lightwalletd.yml      | 29 +++++-
 zebrad/tests/common/launch.rs                 |  2 +-
 zebrad/tests/common/lightwalletd.rs           |  7 +-
 5 files changed, 142 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/continous-integration-docker.yml b/.github/workflows/continous-integration-docker.yml
index 8df82c41..567c5df7 100644
--- a/.github/workflows/continous-integration-docker.yml
+++ b/.github/workflows/continous-integration-docker.yml
@@ -264,9 +264,27 @@ jobs:
       app_name: lightwalletd
       test_id: lwd-send-transactions
       test_description: Test sending transactions via lightwalletd
-      test_variables: '-e TEST_LWD_TRANSACTIONS=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lightwalletd-cache'
+      test_variables: '-e TEST_LWD_TRANSACTIONS=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
       needs_zebra_state: true
       saves_to_disk: false
       disk_suffix: tip
       root_state_path: '/var/cache'
       zebra_state_dir: 'zebrad-cache'
+
+  # Test full sync of lightwalletd with a Zebra tip state
+  lightwalletd-full-sync:
+    needs: build
+    uses: ./.github/workflows/deploy-gcp-tests.yml
+    if: ${{ github.event.inputs.regenerate-disks != 'true' && github.event.inputs.run-full-sync != 'true' }}
+    with:
+      app_name: lightwalletd
+      test_id: lwd-full-sync
+      test_description: Test lightwalletd full sync
+      test_variables: '-e TEST_LWD_FULL_SYNC=1 -e ZEBRA_TEST_LIGHTWALLETD=1 -e ZEBRA_FORCE_USE_COLOR=1 -e ZEBRA_CACHED_STATE_DIR=/var/cache/zebrad-cache -e LIGHTWALLETD_DATA_DIR=/var/cache/lwd-cache'
+      needs_zebra_state: true
+      saves_to_disk: true
+      disk_prefix: lwd-cache
+      disk_suffix: tip
+      root_state_path: '/var/cache'
+      zebra_state_dir: 'zebrad-cache'
+      lwd_state_dir: 'lwd-cache'
\ No newline at end of file
diff --git a/.github/workflows/deploy-gcp-tests.yml b/.github/workflows/deploy-gcp-tests.yml
index b6ca657e..64b66b27 100644
--- a/.github/workflows/deploy-gcp-tests.yml
+++ b/.github/workflows/deploy-gcp-tests.yml
@@ -20,18 +20,28 @@ on:
       test_variables:
         required: true
         type: string
+      # TODO: find a better name
       root_state_path:
         required: false
         type: string
         default: '/zebrad-cache'
+      # TODO: find a better name
       zebra_state_dir:
         required: false
         type: string
         default: ''
+        description: 'Name of the Zebra cached state directory and input image prefix to search in GCP'
+      # TODO: find a better name
+      lwd_state_dir:
+        required: false
+        type: string
+        default: ''
+        description: 'Name of the Lightwalletd cached state directory and input image prefix to search in GCP'
       disk_prefix:
         required: false
         type: string
         default: 'zebrad-cache'
+        description: 'Used to name the image, and for tests that do not use a `zebra_state_dir` to work, but builds a cached state'
       disk_suffix:
         required: false
         type: string
@@ -197,7 +207,7 @@ jobs:
           token_format: 'access_token'
 
       # Find a cached state disk for this job, matching all of:
-      # - disk kind (disk_prefix) - zebra or lwd
+      # - disk cached state (lwd_state_dir/zebra_state_dir or disk_prefix) - zebrad-cache or lwd-cache
       # - state version (from the source code) - v{N}
       # - network (network) - mainnet or testnet
       # - disk target height kind (disk_suffix) - checkpoint or tip
@@ -206,19 +216,19 @@ jobs:
       # - prefer images generated from the `main` branch, then any other branch
       # - prefer newer images to older images
       #
-      # Passes the disk name to subsequent steps using an environmental variable.
+      # Passes the disk name to subsequent steps using $CACHED_DISK_NAME env variable.
       - name: Find cached state disk
         id: get-disk-name
         run: |
           LOCAL_STATE_VERSION=$(grep -oE "DATABASE_FORMAT_VERSION: .* [0-9]+" "$GITHUB_WORKSPACE/zebra-state/src/constants.rs" | grep -oE "[0-9]+" | tail -n1)
-          echo "LOCAL_STATE_VERSION: $LOCAL_STATE_VERSION"
+          echo "STATE_VERSION: $LOCAL_STATE_VERSION"
 
           # Try to find an image generated from the main branch
           # Fields are listed in the "Create image from state disk" step
           #
           # TODO: require ${NETWORK} in the name after PR #4391 merges to main, and runs a full sync
           #       network should replace [a-z]*
-          CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${{ inputs.disk_prefix }}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-[a-z]*-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+          CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${{ inputs.zebra_state_dir || inputs.disk_prefix }}-main-[0-9a-f]+-v${LOCAL_STATE_VERSION}-[a-z]*-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
           echo "main Disk: $CACHED_DISK_NAME"
 
           if [[ -z "$CACHED_DISK_NAME" ]]; then
@@ -226,7 +236,7 @@ jobs:
               #
               # TODO: require ${NETWORK} in the name after PRs #4391 and #4385 merge to main
               #       network should replace [a-z]*
-              CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${{ inputs.disk_prefix }}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-[a-z]*-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
+              CACHED_DISK_NAME=$(gcloud compute images list --filter="name~${{ inputs.zebra_state_dir || inputs.disk_prefix }}-.+-[0-9a-f]+-v${LOCAL_STATE_VERSION}-[a-z]*-${{ inputs.disk_suffix }}" --format="value(NAME)" --sort-by=~creationTimestamp --limit=1)
               echo "Disk: $CACHED_DISK_NAME"
           fi
 
@@ -241,7 +251,8 @@ jobs:
 
           echo "CACHED_DISK_NAME=$CACHED_DISK_NAME" >> $GITHUB_ENV
 
-      # Creates Compute Engine virtual machine instance w/ disks
+      # Creates Compute Engine virtual machine and attach a cached state disk using the
+      # $CACHED_DISK_NAME variable as the source image to populate the disk cached state
       - name: Create GCP compute instance
         id: create-instance
         run: |
@@ -258,7 +269,22 @@ jobs:
           --zone ${{ env.ZONE }}
           sleep 60
 
+      # SSH into the just created VM, and create a Docker container to run the incoming test 
+      # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
+      # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }}.
+      #
+      # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
+      # container in one path:
+      # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
+      #
+      # This path must match the variable used by the tests in Rust, which are also set in
+      # `continous-integration-docker.yml` to be able to run this tests.
+      #
+      # Although we're mounting the disk root, Zebra will only respect the values from 
+      # $ZEBRA_CACHED_STATE_DIR. The inputs like ${{ inputs.zebra_state_dir }} are only used
+      # to match that variable paths.
       - name: Run ${{ inputs.test_id }} test
+        if: ${{ !inputs.lwd_state_dir }}
         run: |
           gcloud compute ssh \
           ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
@@ -274,6 +300,66 @@ jobs:
           --mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
           ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
 
+      # SSH into the just created VM, and create a Docker container to run the incoming test 
+      # from ${{ inputs.test_id }}, then create a docker volume with the recently attached disk.
+      # The disk will be mounted in ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }},
+      # and ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }}
+      #
+      # In this step we're using the same disk for simplicity, as mounting multiple disks to the
+      # VM and to the container might require more steps in this workflow, and additional
+      # considerations.
+      #
+      # The disk mounted in the VM is located at /dev/sdb, we mount the root `/` of this disk to the docker
+      # container in two different paths:
+      # - /var/cache/zebrad-cache -> ${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} -> $ZEBRA_CACHED_STATE_DIR
+      # - /var/cache/lwd-cache -> ${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} -> $LIGHTWALLETD_DATA_DIR
+      #
+      # This doesn't cause any path conflicts, because Zebra and lightwalletd create different
+      # subdirectories for their data. (But Zebra, lightwalletd, and the test harness must not
+      # delete the whole cache directory.)
+      #
+      # This paths must match the variables used by the tests in Rust, which are also set in
+      # `continous-integration-docker.yml` to be able to run this tests.
+      #
+      # Although we're mounting the disk root to both directories, Zebra and Lightwalletd
+      # will only respect the values from $ZEBRA_CACHED_STATE_DIR and $LIGHTWALLETD_DATA_DIR,
+      # the inputs like ${{ inputs.lwd_state_dir }} are only used to match those variables paths.
+      - name: Run ${{ inputs.test_id }} test
+        if: ${{ inputs.lwd_state_dir }}
+        run: |
+          gcloud compute ssh \
+          ${{ inputs.test_id }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }} \
+          --zone ${{ env.ZONE }} \
+          --quiet \
+          --ssh-flag="-o ServerAliveInterval=5" \
+          --command \
+          "\
+          docker volume create --driver local --opt type=ext4 --opt device=/dev/sdb \
+          ${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
+          && \
+          docker run ${{ inputs.test_variables }} -t --name ${{ inputs.test_id }} \
+          --mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.zebra_state_dir }} \
+          --mount type=volume,src=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }},dst=${{ inputs.root_state_path }}/${{ inputs.lwd_state_dir }} \
+          ${{ env.GAR_BASE }}/${{ env.IMAGE_NAME }}:sha-${{ env.GITHUB_SHA_SHORT }}"
+
+      # Create an image from disk that will be used for following/other tests
+      # This image can contain:
+      # - Zebra cached state
+      # - Zebra + lightwalletd cached state
+      # Which cached state is being saved to the disk is defined by ${{ inputs.disk_prefix }}
+      #
+      # Force the image creation (--force) as the disk is still attached even though is not being
+      # used by the container
+      - name: Create image from state disk
+        if: ${{ inputs.saves_to_disk }}
+        run: |
+          gcloud compute images create ${{ inputs.disk_prefix }}-${{ env.GITHUB_REF_SLUG_URL }}-${{ env.GITHUB_SHA_SHORT }}-v${{ env.STATE_VERSION }}-${{ env.NETWORK }}-${{ inputs.disk_suffix }} \
+          --force \
+          --source-disk=${{ inputs.disk_prefix }}-${{ inputs.test_id }}-${{ env.GITHUB_SHA_SHORT }} \
+          --source-disk-zone=${{ env.ZONE }} \
+          --storage-location=us \
+          --description="Created from commit ${{ env.GITHUB_SHA_SHORT }} with height ${{ env.SYNC_HEIGHT }}"
+
       - name: Delete test instance
         # We don't want to leave a failed instance in GCP using resources
         if: always()
diff --git a/.github/workflows/zcash-lightwalletd.yml b/.github/workflows/zcash-lightwalletd.yml
index 5c46e0ad..3045275f 100644
--- a/.github/workflows/zcash-lightwalletd.yml
+++ b/.github/workflows/zcash-lightwalletd.yml
@@ -2,6 +2,7 @@ name: zcash-lightwalletd
 
 on:
   workflow_dispatch:
+  
   push:
     branches:
       - 'main'
@@ -14,9 +15,25 @@ on:
       - 'zebrad/tests/acceptance.rs'
       - 'zebrad/src/config.rs'
       - 'zebrad/src/commands/start.rs'
-      # workflow definitions
-      #
-      # unlike the Zebra code, these workflow definitions do change the docker image
+      # these workflow definitions actually change the docker image
+      - 'docker/zcash-lightwalletd/Dockerfile'
+      - '.github/workflows/zcash-lightwalletd.yml'
+
+  # Update the lightwalletd image when each related PR changes
+  #
+  # TODO: after NU5 mainnet activation and wallet orchard features are stable,
+  #       consider just rebuilding the image on `main` merges
+  pull_request:
+    branches:
+      - main
+    paths:
+      # rebuild lightwalletd whenever the related Zebra code changes
+      # (this code isn't actually compiled in this docker image)
+      - 'zebra-rpc/**'
+      - 'zebrad/tests/acceptance.rs'
+      - 'zebrad/src/config.rs'
+      - 'zebrad/src/commands/start.rs'
+      # these workflow definitions actually change the docker image
       - 'docker/zcash-lightwalletd/Dockerfile'
       - '.github/workflows/zcash-lightwalletd.yml'
 
@@ -36,7 +53,11 @@ jobs:
       - uses: actions/checkout@v3.0.2
         with:
           repository: adityapk00/lightwalletd
-          ref: 'master'
+          # Temporarily use an earlier lightwalletd version,
+          # to check if commit e146dbf5c2860e535fdbe2ba5ab20c4744d7c941 contains a mempool refresh deadlock bug.
+          #
+          # TODO: switch back to 'master' after the bug is resolved
+          ref: 'c1bab818a683e4de69cd952317000f9bb2932274'
           persist-credentials: false
 
       - uses: actions/checkout@v3.0.2
diff --git a/zebrad/tests/common/launch.rs b/zebrad/tests/common/launch.rs
index 6e673e6c..f9db17f0 100644
--- a/zebrad/tests/common/launch.rs
+++ b/zebrad/tests/common/launch.rs
@@ -51,7 +51,7 @@ pub const LIGHTWALLETD_UPDATE_TIP_DELAY: Duration = Duration::from_secs(10 * 60)
 ///
 /// `lightwalletd` takes about half an hour to fully sync,
 /// and Zebra needs time to activate its mempool.
-pub const LIGHTWALLETD_FULL_SYNC_TIP_DELAY: Duration = Duration::from_secs(60 * 60);
+pub const LIGHTWALLETD_FULL_SYNC_TIP_DELAY: Duration = Duration::from_secs(45 * 60);
 
 /// Extension trait for methods on `tempfile::TempDir` for using it as a test
 /// directory for `zebrad`.
diff --git a/zebrad/tests/common/lightwalletd.rs b/zebrad/tests/common/lightwalletd.rs
index 4d649aea..0e876326 100644
--- a/zebrad/tests/common/lightwalletd.rs
+++ b/zebrad/tests/common/lightwalletd.rs
@@ -321,9 +321,12 @@ impl LightwalletdTestType {
 
     /// Returns the `zebrad` timeout for this test type.
     pub fn zebrad_timeout(&self) -> Duration {
+        // We use the same timeouts as lightwalletd,
+        // because the tests swap between checking zebrad and lightwalletd.
         match self {
             LaunchWithEmptyState => LIGHTWALLETD_DELAY,
-            FullSyncFromGenesis { .. } | UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
+            FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
+            UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
         }
     }
 
@@ -331,8 +334,8 @@ impl LightwalletdTestType {
     pub fn lightwalletd_timeout(&self) -> Duration {
         match self {
             LaunchWithEmptyState => LIGHTWALLETD_DELAY,
-            UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
             FullSyncFromGenesis { .. } => LIGHTWALLETD_FULL_SYNC_TIP_DELAY,
+            UpdateCachedState => LIGHTWALLETD_UPDATE_TIP_DELAY,
         }
     }