Skip to content

Process Samples

Process Samples #4386

name: Process Samples
on:
schedule:
- cron: '0 0-16/3 * * *'
workflow_dispatch:
concurrency:
group: ${{ github.repository }}
env:
NXF_VER: "24.10.4"
NXF_WORK: ${{ github.workspace }}/work
NXF_OUTPUT: ${{ github.workspace }}/outputs
NXF_NAME: github-${{ github.run_number }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
BATCH_SIZE: 50
defaults:
run:
shell: bash -eli {0}
jobs:
run:
runs-on: self-hosted
permissions:
contents: write
id-token: write
steps:
- name: Checkout main
uses: actions/checkout@v4
with:
token: ${{ secrets.GITHUB_TOKEN }}
- name: Cache envs
id: cache-envs
uses: actions/cache@v4
with:
path: |
${{ env.NXF_WORK }}/conda
$CONDA_PREFIX/envs
key: env-${{ env.NXF_VER }}-${{ github.job }}
restore-keys: |
env-${{ env.NXF_VER }}-${{ github.job }}
env-${{ env.NXF_VER }}
env-
- name: 'Check and setup conda environment'
run: |
mamba env update --name freyja-sc2 --file environment.yml --prune
- id: 'auth'
name: 'Authenticate with gcloud'
uses: 'google-github-actions/auth@v2'
with:
workload_identity_provider: 'projects/12767718289/locations/global/workloadIdentityPools/github-actions/providers/freyja-sc2'
service_account: '[email protected]'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v2'
with:
version: '>= 363.0.0'
- name: Run pipeline on new samples
run: |
mamba activate freyja-sc2
nextflow run main.nf \
--num_samples $BATCH_SIZE \
-profile local \
-entry fetch
- name: Aggregate outputs
run: |
mamba activate freyja-sc2
python scripts/aggregate_demix.py
python scripts/aggregate_variants.py
python scripts/aggregate_metadata.py
- id: 'download-aggregated-outputs'
name: 'Download aggregated outputs'
run: |
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_demix.json outputs/aggregate/aggregate_demix.json --billing-project=andersen-lab-primary
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_variants.json outputs/aggregate/aggregate_variants.json --billing-project=andersen-lab-primary
gcloud storage cp gs://outbreak-ww-data/aggregate/aggregate_metadata.json outputs/aggregate/aggregate_metadata.json --billing-project=andersen-lab-primary
- id: 'concatenate-outputs'
name: 'Concatenate outputs'
run: |
mamba activate freyja-sc2
python scripts/concat_agg_files.py
- id: 'create-demix-by-week'
name: 'Create demix by week'
run: |
mamba activate freyja-sc2
python scripts/aggregate_demix_by_week.py
- id: 'upload-outputs'
name: 'Upload Outputs to Cloud Storage'
uses: 'google-github-actions/upload-cloud-storage@v2'
with:
path: 'outputs/'
destination: 'outbreak-ww-data/'
parent: false
project_id: 'andersen-lab-primary'
- name: 'Update processed samples'
run: |
mamba activate freyja-sc2
python scripts/update_sample_status.py $BATCH_SIZE
- name: 'Commit and push changes'
run: |
git config --local user.name "$GITHUB_ACTOR"
git config --local user.email "[email protected]"
git remote set-url origin https://github.com/andersen-lab/freyja-sc2
git add data/all_metadata.csv
git commit -m "Update processed samples"
git push --force
- name: 'Clean workspace'
run: |
rm -rf ${{ github.workspace }}/*