diff --git a/.github/workflows/delete-gcp-resources.yml b/.github/workflows/delete-gcp-resources.yml index 5cfff8c0..c82208ef 100644 --- a/.github/workflows/delete-gcp-resources.yml +++ b/.github/workflows/delete-gcp-resources.yml @@ -8,7 +8,10 @@ on: workflow_dispatch: env: - # Delete all resources created before $DELETE_AGE_DAYS days ago. + # Delete all resources created before $DELETE_INSTANCE_DAYS days ago. + # We keep this short to reduce CPU, RAM, and storage costs. + DELETE_INSTANCE_DAYS: 3 + # Delete all other resources created before $DELETE_AGE_DAYS days ago. # We keep this short to reduce storage costs. DELETE_AGE_DAYS: 2 # But keep the latest $KEEP_LATEST_IMAGE_COUNT images of each type. @@ -37,38 +40,73 @@ jobs: service_account: 'github-service-account@zealous-zebra.iam.gserviceaccount.com' token_format: 'access_token' + # Deletes all instances older than $DELETE_INSTANCE_DAYS days. + # + # We only delete instances that end in 7 or more hex characters, + # to avoid deleting managed instance groups and manually created instances. + # + # ${INSTANCE_AND_ZONE} expands to: + # --zone= + # so it can't be shell-quoted. + - name: Delete old instances + run: | + DELETE_BEFORE_DATE=$(date --date="$DELETE_INSTANCE_DAYS days ago" '+%Y%m%d') + + IFS=$'\n' + INSTANCES=$(gcloud compute instances list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,ZONE)' | \ + sed 's/\(.*\)\t\(.*\)/\1 --zone=\2/') + + for INSTANCE_AND_ZONE in $INSTANCES + do + IFS=$' ' + gcloud compute instances delete --verbosity=info ${INSTANCE_AND_ZONE} --delete-disks=all || continue + IFS=$'\n' + done + # Deletes all the instance templates older than $DELETE_AGE_DAYS days. - name: Delete old instance templates run: | DELETE_BEFORE_DATE=$(date --date="$DELETE_AGE_DAYS days ago" '+%Y%m%d') - TEMPLATES=$(gcloud compute instance-templates list --sort-by=creationTimestamp --filter="name~-[0-9a-f]+$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') + TEMPLATES=$(gcloud compute instance-templates list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') for TEMPLATE in $TEMPLATES do - gcloud compute instance-templates delete ${TEMPLATE} || continue + gcloud compute instance-templates delete "${TEMPLATE}" || continue done # Deletes all the disks older than $DELETE_AGE_DAYS days. # - # Disks that are attached to an instance template can't be deleted, so it is safe to delete all disks here. + # Disks that are attached to an instance template can't be deleted, so it is safe to try to delete all disks here. + # + # ${DISK_AND_LOCATION} expands to: + # --[zone|region]= + # so it can't be shell-quoted. - name: Delete old disks run: | DELETE_BEFORE_DATE=$(date --date="$DELETE_AGE_DAYS days ago" '+%Y%m%d') + IFS=$'\n' # Disks created by PR jobs, and other jobs that use a commit hash - COMMIT_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~-[0-9a-f]+$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') + COMMIT_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~-[0-9a-f]{7,}$ AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,LOCATION,LOCATION_SCOPE)' | \ + sed 's/\(.*\)\t\(.*\)\t\(.*\)/\1 --\3=\2/') - for DISK in $COMMIT_DISKS + for DISK_AND_LOCATION in $COMMIT_DISKS do - gcloud compute disks delete --verbosity=info ${DISK} || continue + IFS=$' ' + gcloud compute disks delete --verbosity=info ${DISK_AND_LOCATION} || continue + IFS=$'\n' done + IFS=$'\n' # Disks created by managed instance groups, and other jobs that start with "zebrad-" - ZEBRAD_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~^zebrad- AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') + ZEBRAD_DISKS=$(gcloud compute disks list --sort-by=creationTimestamp --filter="name~^zebrad- AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME,LOCATION,LOCATION_SCOPE)' | \ + sed 's/\(.*\)\t\(.*\)\t\(.*\)/\1 --\3=\2/') - for DISK in $ZEBRAD_DISKS + for DISK_AND_LOCATION in $ZEBRAD_DISKS do - gcloud compute disks delete --verbosity=info ${DISK} || continue + IFS=$' ' + gcloud compute disks delete --verbosity=info ${DISK_AND_LOCATION} || continue + IFS=$'\n' done # Deletes cache images older than $DELETE_AGE_DAYS days. @@ -98,7 +136,7 @@ jobs: continue fi - gcloud compute images delete ${IMAGE} || continue + gcloud compute images delete "${IMAGE}" || continue done ZEBRAD_TIP_IMAGES=$(gcloud compute images list --sort-by=~creationTimestamp --filter="name~^zebrad-cache-.*net-tip AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') @@ -112,7 +150,7 @@ jobs: continue fi - gcloud compute images delete ${IMAGE} || continue + gcloud compute images delete "${IMAGE}" || continue done LWD_TIP_IMAGES=$(gcloud compute images list --sort-by=~creationTimestamp --filter="name~^lwd-cache-.*net-tip AND creationTimestamp < $DELETE_BEFORE_DATE" --format='value(NAME)') @@ -126,5 +164,5 @@ jobs: continue fi - gcloud compute images delete ${IMAGE} || continue + gcloud compute images delete "${IMAGE}" || continue done diff --git a/book/src/dev/continuous-integration.md b/book/src/dev/continuous-integration.md index 4bd35dff..e97da9c5 100644 --- a/book/src/dev/continuous-integration.md +++ b/book/src/dev/continuous-integration.md @@ -47,16 +47,13 @@ Please shut down large instances when they are not being used. ### Automated Deletion The [Delete GCP Resources](https://github.com/ZcashFoundation/zebra/blob/main/.github/workflows/delete-gcp-resources.yml) -workflow automatically deletes instance templates, disks, and images older than a few days. +workflow automatically deletes test instances, instance templates, disks, and images older than a few days. -Running instances and their disks are protected from deletion. - -If you want to keep instance templates, disks, or images in Google Cloud, name them so they don't match the automated names: -- deleted instance templates and disks end in a commit hash, so use a name ending in `-` or `-[^0-9a-f]+` -- deleted images start with `zebrad-cache` or `lwd-cache`, so use a name starting with anything else - -Our other Google Cloud projects don't have automated deletion, so you can also use them for experiments or production deployments. +If you want to keep instances, instance templates, disks, or images in Google Cloud, name them so they don't match the automated names: +- deleted instances, instance templates and disks end in a commit hash, so use a name that doesn't end in `-[0-9a-f]{7,}` +- deleted disks and images start with `zebrad-` or `lwd-`, so use a name starting with anything else +Our production Google Cloud project doesn't have automated deletion. ## Troubleshooting