diff --git a/CHANGELOG.md b/CHANGELOG.md index 71bb6ef2..e17460c1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## v2.3.0dev - [12-March-2025] +## v2.3.0dev - [02-April-2025] ### `Added` @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 2. The minimum required Nextflow version is now 24.10.5 3. Now the pipeline parameters are presented in separate sections on the PARAMS summary page in the report 4. Added parameter `hic_alphanumeric_sort` to allow disabling of FASTA sorting by sequence labels +5. Added parameter `use_all_cpus` to allow multithreaded tasks to consume all the CPUs available on a machine. This behavior is desirable on [MM Cloud](https://www.mmcloud.io) but highly undesirable on AWS Batch, SLURM, etc. ### `Fixed` diff --git a/conf/modules.config b/conf/modules.config index 090ebd3a..e17c2742 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -417,3 +417,4 @@ process { ] } } +process { ext.use_all_cpus = params.use_all_cpus } diff --git a/docs/parameters.md b/docs/parameters.md index 953b2e84..f0de04a0 100644 --- a/docs/parameters.md +++ b/docs/parameters.md @@ -131,3 +131,4 @@ Less common options for the pipeline, typically set in a config file. | `monochrome_logs` | Do not use coloured log outputs. | `boolean` | | | True | | `hook_url` | Incoming hook URL for messaging service | `string` | | | True | | `trace_report_suffix` | Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss. | `string` | | | True | +| `use_all_cpus` | Use all the available CPUs for each task | `boolean` | | | | diff --git a/modules/gallvp/bwa/mem/main.nf b/modules/gallvp/bwa/mem/main.nf index d18cd936..3e07edcd 100644 --- a/modules/gallvp/bwa/mem/main.nf +++ b/modules/gallvp/bwa/mem/main.nf @@ -35,15 +35,19 @@ process BWA_MEM { "bam" def reference = fasta && extension=="cram" ? "--reference ${fasta}" : "" if (!fasta && extension=="cram") error "Fasta reference is required for CRAM output" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + INDEX=`find -L ./ -name "*.amb" | sed 's/\\.amb\$//'` bwa mem \\ $args \\ - -t $task.cpus \\ + -t \${task_cpus} \\ \$INDEX \\ $reads \\ - | samtools $samtools_command $args2 ${reference} --threads $task.cpus -o ${prefix}.${extension} - + | samtools $samtools_command $args2 ${reference} --threads \${task_cpus} -o ${prefix}.${extension} - cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/gallvp/cat/cat/main.nf b/modules/gallvp/cat/cat/main.nf index 2862c64c..10b2f517 100644 --- a/modules/gallvp/cat/cat/main.nf +++ b/modules/gallvp/cat/cat/main.nf @@ -36,12 +36,16 @@ process CAT_CAT { out_zip = prefix.endsWith('.gz') in_zip = file_list[0].endsWith('.gz') command1 = (in_zip && !out_zip) ? 'zcat' : 'cat' - command2 = (!in_zip && out_zip) ? "| pigz -c -p $task.cpus $args2" : '' + command2 = (!in_zip && out_zip) ? "| pigz -c -p \${task_cpus} $args2" : '' if(file_list.contains(prefix.trim())) { error "The name of the input file can't be the same as for the output prefix in the " + "module CAT_CAT (currently `$prefix`). Please choose a different one." } - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + $command1 \\ $args \\ ${file_list.join(' ')} \\ diff --git a/modules/gallvp/ltrfinder/main.nf b/modules/gallvp/ltrfinder/main.nf index 3e59e3c7..e17a2c92 100644 --- a/modules/gallvp/ltrfinder/main.nf +++ b/modules/gallvp/ltrfinder/main.nf @@ -21,10 +21,14 @@ process LTRFINDER { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + LTR_FINDER_parallel \\ -seq $fasta \\ - -threads $task.cpus \\ + -threads \${task_cpus} \\ $args mv "${fasta}.finder.combine.scn" "${prefix}.scn" diff --git a/modules/gallvp/ltrharvest/main.nf b/modules/gallvp/ltrharvest/main.nf index 1e5e06d2..8b89dc2d 100644 --- a/modules/gallvp/ltrharvest/main.nf +++ b/modules/gallvp/ltrharvest/main.nf @@ -21,11 +21,15 @@ process LTRHARVEST { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + LTR_HARVEST_parallel \\ -seq $fasta \\ $args \\ - -threads $task.cpus + -threads \${task_cpus} mv "${fasta}.harvest.combine.gff3" \\ "${prefix}.gff3" diff --git a/modules/gallvp/ltrretriever/lai/main.nf b/modules/gallvp/ltrretriever/lai/main.nf index 464b215b..00e5d7ea 100644 --- a/modules/gallvp/ltrretriever/lai/main.nf +++ b/modules/gallvp/ltrretriever/lai/main.nf @@ -27,12 +27,16 @@ process LTRRETRIEVER_LAI { def monoploid_param = monoploid_seqs ? "-mono $monoploid_seqs" : '' def lai_output_name = monoploid_seqs ? "${annotation_out}.${monoploid_seqs}.out.LAI" : "${annotation_out}.LAI" def VERSION = 'beta3.2' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + LAI \\ -genome $fasta \\ -intact $pass_list \\ -all $annotation_out \\ - -t $task.cpus \\ + -t \${task_cpus} \\ $monoploid_param \\ $args \\ > >(tee "${prefix}.LAI.log") \\ diff --git a/modules/gallvp/ltrretriever/ltrretriever/main.nf b/modules/gallvp/ltrretriever/ltrretriever/main.nf index 8e1e2beb..59f25912 100644 --- a/modules/gallvp/ltrretriever/ltrretriever/main.nf +++ b/modules/gallvp/ltrretriever/ltrretriever/main.nf @@ -41,7 +41,11 @@ process LTRRETRIEVER_LTRRETRIEVER { // // This copy with permissions logic can be removed once https://github.com/oushujun/LTR_retriever/issues/176 // has been resolved. - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + cp \\ $genome \\ $writable_genome @@ -56,7 +60,7 @@ process LTRRETRIEVER_LTRRETRIEVER { $infinder \\ $inmgescan \\ $non_tgca_file \\ - -threads $task.cpus \\ + -threads \${task_cpus} \\ $args \\ &> >(tee "${prefix}.log" 2>&1) \\ || echo "Errors from LTR_retriever printed to ${prefix}.log" diff --git a/modules/gallvp/minimap2/align/main.nf b/modules/gallvp/minimap2/align/main.nf index d82dc14d..cd87b53d 100644 --- a/modules/gallvp/minimap2/align/main.nf +++ b/modules/gallvp/minimap2/align/main.nf @@ -32,19 +32,23 @@ process MINIMAP2_ALIGN { def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" - def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ \$((task_cpus-1)) -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' def bam_input = "${reads.extension}".matches('sam|bam|cram') - def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def samtools_reset_fastq = bam_input ? "samtools reset --threads \$((task_cpus-1)) $args3 $reads | samtools fastq --threads \$((task_cpus-1)) $args4 |" : '' def query = bam_input ? "-" : reads def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + $samtools_reset_fastq \\ minimap2 \\ $args \\ - -t $task.cpus \\ + -t \${task_cpus} \\ $target \\ $query \\ $cigar_paf \\ diff --git a/modules/gallvp/seqkit/seq/main.nf b/modules/gallvp/seqkit/seq/main.nf index 9d76da21..d19eadbe 100644 --- a/modules/gallvp/seqkit/seq/main.nf +++ b/modules/gallvp/seqkit/seq/main.nf @@ -29,10 +29,14 @@ process SEQKIT_SEQ { extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + seqkit \\ seq \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ $args \\ $fastx \\ $call_gzip \\ diff --git a/modules/local/juicer_sort.nf b/modules/local/juicer_sort.nf index 4b0645bd..fbcd2f68 100644 --- a/modules/local/juicer_sort.nf +++ b/modules/local/juicer_sort.nf @@ -17,8 +17,12 @@ process JUICER_SORT { task.ext.when == null || task.ext.when script: + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' """ - sort --parallel=${task.cpus} \\ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + + sort --parallel=\${task_cpus} \\ -k2,2 -k6,6 \\ $out_links_txt \\ > out.sorted.links.txt diff --git a/modules/local/kraken2.nf b/modules/local/kraken2.nf index 53f722ac..2183fbd7 100644 --- a/modules/local/kraken2.nf +++ b/modules/local/kraken2.nf @@ -18,13 +18,17 @@ process KRAKEN2 { task.ext.when == null || task.ext.when script: + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + kraken2 \\ --output "${asm_tag}.kraken2.cut" \\ --report "${asm_tag}.kraken2.report" \\ --use-names \\ --db $db_path \\ - --threads ${task.cpus} \\ + --threads \${task_cpus} \\ $fasta_file > kraken2.log cat <<-END_VERSIONS > versions.yml diff --git a/modules/local/mummer.nf b/modules/local/mummer.nf index 401dbaa1..1a1be4a8 100644 --- a/modules/local/mummer.nf +++ b/modules/local/mummer.nf @@ -15,10 +15,14 @@ process MUMMER { task.ext.when == null || task.ext.when script: + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + nucmer \ --mum \\ - -t ${task.cpus} \\ + -t \${task_cpus} \\ -p "${target}.on.${reference}" \\ $ref_fasta \\ $target_fasta diff --git a/modules/local/ncbi_fcs_gx_screen_samples.nf b/modules/local/ncbi_fcs_gx_screen_samples.nf index 24276f61..55df8882 100644 --- a/modules/local/ncbi_fcs_gx_screen_samples.nf +++ b/modules/local/ncbi_fcs_gx_screen_samples.nf @@ -22,8 +22,12 @@ process NCBI_FCS_GX_SCREEN_SAMPLES { script: def VERSION = '0.5.4' + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' """ - export GX_NUM_CORES=$task.cpus + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + + export GX_NUM_CORES=\${task_cpus} for sample_fasta in $samples; do diff --git a/modules/nf-core/busco/busco/main.nf b/modules/nf-core/busco/busco/main.nf index 17496910..b4c8381d 100644 --- a/modules/nf-core/busco/busco/main.nf +++ b/modules/nf-core/busco/busco/main.nf @@ -45,7 +45,11 @@ process BUSCO_BUSCO { : "--lineage_dataset ${lineage}" def busco_lineage_dir = busco_lineages_path ? "--download_path ${busco_lineages_path}" : '' def clean_cmd = clean_intermediates ? 'rm -fr ./*-busco/*/auto_lineage ./*-busco/*/**/{miniprot,hmmer,.bbtools}_output' : '' - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. if [ -f "/usr/local/env-activate.sh" ]; then @@ -77,7 +81,7 @@ process BUSCO_BUSCO { cd .. busco \\ - --cpu $task.cpus \\ + --cpu \${task_cpus} \\ --in "\$INPUT_SEQS" \\ --out ${prefix}-busco \\ --mode $mode \\ diff --git a/modules/nf-core/fastp/main.nf b/modules/nf-core/fastp/main.nf index e1b9f565..cb94900f 100644 --- a/modules/nf-core/fastp/main.nf +++ b/modules/nf-core/fastp/main.nf @@ -35,14 +35,18 @@ process FASTP { def out_fq2 = discard_trimmed_pass ?: "--out2 ${prefix}_2.fastp.fastq.gz" // Added soft-links to original fastqs for consistent naming in MultiQC // Use single ended for interleaved. Add --interleaved_in in config. + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' if ( task.ext.args?.contains('--interleaved_in') ) { - """ + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz fastp \\ --stdout \\ --in1 ${prefix}.fastq.gz \\ - --thread $task.cpus \\ + --thread \${task_cpus} \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $adapter_list \\ @@ -58,12 +62,15 @@ process FASTP { """ } else if (meta.single_end) { """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + [ ! -f ${prefix}.fastq.gz ] && ln -sf $reads ${prefix}.fastq.gz fastp \\ --in1 ${prefix}.fastq.gz \\ $out_fq1 \\ - --thread $task.cpus \\ + --thread \${task_cpus} \\ --json ${prefix}.fastp.json \\ --html ${prefix}.fastp.html \\ $adapter_list \\ @@ -79,6 +86,9 @@ process FASTP { } else { def merge_fastq = save_merged ? "-m --merged_out ${prefix}.merged.fastq.gz" : '' """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + [ ! -f ${prefix}_1.fastq.gz ] && ln -sf ${reads[0]} ${prefix}_1.fastq.gz [ ! -f ${prefix}_2.fastq.gz ] && ln -sf ${reads[1]} ${prefix}_2.fastq.gz fastp \\ @@ -91,7 +101,7 @@ process FASTP { $adapter_list \\ $fail_fastq \\ $merge_fastq \\ - --thread $task.cpus \\ + --thread \${task_cpus} \\ --detect_adapter_for_pe \\ $args \\ 2> >(tee ${prefix}.fastp.log >&2) diff --git a/modules/nf-core/fastqc/main.nf b/modules/nf-core/fastqc/main.nf index 033f4154..d6b9794d 100644 --- a/modules/nf-core/fastqc/main.nf +++ b/modules/nf-core/fastqc/main.nf @@ -33,14 +33,18 @@ process FASTQC { // FastQC memory value allowed range (100 - 10000) def fastqc_memory = memory_in_mb > 10000 ? 10000 : (memory_in_mb < 100 ? 100 : memory_in_mb) - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + printf "%s %s\\n" ${rename_to} | while read old_name new_name; do [ -f "\${new_name}" ] || ln -s \$old_name \$new_name done fastqc \\ ${args} \\ - --threads ${task.cpus} \\ + --threads \${task_cpus} \\ --memory ${fastqc_memory} \\ ${renamed_files} diff --git a/modules/nf-core/gfastats/main.nf b/modules/nf-core/gfastats/main.nf index 0fb31832..6e6f53d2 100644 --- a/modules/nf-core/gfastats/main.nf +++ b/modules/nf-core/gfastats/main.nf @@ -32,10 +32,14 @@ process GFASTATS { def ibed = include_bed ? "--include-bed $include_bed" : "" def ebed = exclude_bed ? "--exclude-bed $exclude_bed" : "" def sak = instructions ? "--swiss-army-knife $instructions" : "" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + gfastats \\ $args \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ $agp \\ $ibed \\ $ebed \\ diff --git a/modules/nf-core/merqury/merqury/main.nf b/modules/nf-core/merqury/merqury/main.nf index ca8795a9..c6fb6916 100644 --- a/modules/nf-core/merqury/merqury/main.nf +++ b/modules/nf-core/merqury/merqury/main.nf @@ -37,7 +37,11 @@ process MERQURY_MERQURY { // def args = task.ext.args ?: '' prefix = task.ext.prefix ?: "${meta.id}" def VERSION = 1.3 // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions. - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + # Nextflow changes the container --entrypoint to /bin/bash (container default entrypoint: /usr/local/env-execute) # Check for container variable initialisation script and source it. if [ -f "/usr/local/env-activate.sh" ]; then @@ -46,7 +50,7 @@ process MERQURY_MERQURY { set -u fi # limit meryl to use the assigned number of cores. - export OMP_NUM_THREADS=$task.cpus + export OMP_NUM_THREADS=\${task_cpus} merqury.sh \\ $meryl_db \\ diff --git a/modules/nf-core/meryl/count/main.nf b/modules/nf-core/meryl/count/main.nf index c90079d6..ad3418e7 100644 --- a/modules/nf-core/meryl/count/main.nf +++ b/modules/nf-core/meryl/count/main.nf @@ -22,11 +22,15 @@ process MERYL_COUNT { def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def reduced_mem = task.memory.multiply(0.9).toGiga() - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + for READ in ${reads}; do meryl count \\ k=${kvalue} \\ - threads=${task.cpus} \\ + threads=\${task_cpus} \\ memory=${reduced_mem} \\ ${args} \\ \$READ \\ diff --git a/modules/nf-core/meryl/unionsum/main.nf b/modules/nf-core/meryl/unionsum/main.nf index bc2853b0..9a2d06f3 100644 --- a/modules/nf-core/meryl/unionsum/main.nf +++ b/modules/nf-core/meryl/unionsum/main.nf @@ -21,10 +21,14 @@ process MERYL_UNIONSUM { script: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + meryl union-sum \\ k=$kvalue \\ - threads=$task.cpus \\ + threads=\${task_cpus} \\ memory=${task.memory.toGiga()} \\ $args \\ output ${prefix}.unionsum.meryl \\ diff --git a/modules/nf-core/minimap2/align/main.nf b/modules/nf-core/minimap2/align/main.nf index d82dc14d..cd87b53d 100644 --- a/modules/nf-core/minimap2/align/main.nf +++ b/modules/nf-core/minimap2/align/main.nf @@ -32,19 +32,23 @@ process MINIMAP2_ALIGN { def args4 = task.ext.args4 ?: '' def prefix = task.ext.prefix ?: "${meta.id}" def bam_index = bam_index_extension ? "${prefix}.bam##idx##${prefix}.bam.${bam_index_extension} --write-index" : "${prefix}.bam" - def bam_output = bam_format ? "-a | samtools sort -@ ${task.cpus-1} -o ${bam_index} ${args2}" : "-o ${prefix}.paf" + def bam_output = bam_format ? "-a | samtools sort -@ \$((task_cpus-1)) -o ${bam_index} ${args2}" : "-o ${prefix}.paf" def cigar_paf = cigar_paf_format && !bam_format ? "-c" : '' def set_cigar_bam = cigar_bam && bam_format ? "-L" : '' def bam_input = "${reads.extension}".matches('sam|bam|cram') - def samtools_reset_fastq = bam_input ? "samtools reset --threads ${task.cpus-1} $args3 $reads | samtools fastq --threads ${task.cpus-1} $args4 |" : '' + def samtools_reset_fastq = bam_input ? "samtools reset --threads \$((task_cpus-1)) $args3 $reads | samtools fastq --threads \$((task_cpus-1)) $args4 |" : '' def query = bam_input ? "-" : reads def target = reference ?: (bam_input ? error("BAM input requires reference") : reads) - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + $samtools_reset_fastq \\ minimap2 \\ $args \\ - -t $task.cpus \\ + -t \${task_cpus} \\ $target \\ $query \\ $cigar_paf \\ diff --git a/modules/nf-core/orthofinder/main.nf b/modules/nf-core/orthofinder/main.nf index a47c4dea..e799b5d9 100644 --- a/modules/nf-core/orthofinder/main.nf +++ b/modules/nf-core/orthofinder/main.nf @@ -24,12 +24,16 @@ process ORTHOFINDER { prefix = task.ext.prefix ?: "${meta.id}" def include_command = prior_run ? "-b $prior_run" : '' - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + mkdir temp_pickle orthofinder \\ - -t $task.cpus \\ - -a $task.cpus \\ + -t \${task_cpus} \\ + -a \${task_cpus} \\ -p temp_pickle \\ -f input \\ -n $prefix \\ diff --git a/modules/nf-core/seqkit/rmdup/main.nf b/modules/nf-core/seqkit/rmdup/main.nf index f943a76f..393a45c0 100644 --- a/modules/nf-core/seqkit/rmdup/main.nf +++ b/modules/nf-core/seqkit/rmdup/main.nf @@ -29,10 +29,14 @@ process SEQKIT_RMDUP { extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension // SeqKit/rmdup takes care of compressing the output: https://bioinf.shenwei.me/seqkit/usage/#rmdup if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + seqkit \\ rmdup \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ $args \\ $fastx \\ -o ${prefix}.${extension} \\ diff --git a/modules/nf-core/seqkit/seq/main.nf b/modules/nf-core/seqkit/seq/main.nf index 9d76da21..d19eadbe 100644 --- a/modules/nf-core/seqkit/seq/main.nf +++ b/modules/nf-core/seqkit/seq/main.nf @@ -29,10 +29,14 @@ process SEQKIT_SEQ { extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2" : '' if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + seqkit \\ seq \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ $args \\ $fastx \\ $call_gzip \\ diff --git a/modules/nf-core/seqkit/sort/main.nf b/modules/nf-core/seqkit/sort/main.nf index 19f5a1a4..e4be2546 100644 --- a/modules/nf-core/seqkit/sort/main.nf +++ b/modules/nf-core/seqkit/sort/main.nf @@ -29,10 +29,14 @@ process SEQKIT_SORT { extension = fastx.toString().endsWith('.gz') ? "${extension}.gz" : extension def call_gzip = extension.endsWith('.gz') ? "| gzip -c $args2 " : '' if("${prefix}.${extension}" == "$fastx") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + seqkit \\ sort \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ $args \\ $fastx \\ $call_gzip \\ diff --git a/modules/nf-core/sratools/fasterqdump/main.nf b/modules/nf-core/sratools/fasterqdump/main.nf index d5b65bab..11af28e3 100644 --- a/modules/nf-core/sratools/fasterqdump/main.nf +++ b/modules/nf-core/sratools/fasterqdump/main.nf @@ -33,12 +33,16 @@ process SRATOOLS_FASTERQDUMP { } else if (certificate.toString().endsWith('.ngc')) { key_file += " --ngc ${certificate}" } - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + export NCBI_SETTINGS="\$PWD/${ncbi_settings}" fasterq-dump \\ $args \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ --outfile $outfile \\ ${key_file} \\ ${sra} @@ -48,7 +52,7 @@ process SRATOOLS_FASTERQDUMP { pigz \\ $args2 \\ --no-name \\ - --processes $task.cpus \\ + --processes \${task_cpus} \\ *.fastq cat <<-END_VERSIONS > versions.yml @@ -73,7 +77,11 @@ process SRATOOLS_FASTERQDUMP { key_file += " --ngc ${certificate}" } def touch_outfiles = meta.single_end ? "${prefix}.fastq" : "${prefix}_1.fastq ${prefix}_2.fastq" - """ + def use_all_cpus = task.ext.use_all_cpus ? 'yes' : 'no' + """ + n_proc=\$(nproc 2>/dev/null || < /proc/cpuinfo grep '^process' -c) + task_cpus=\$([ "$use_all_cpus" = "yes" ] && echo "\$n_proc" || echo "$task.cpus") + touch $touch_outfiles export NCBI_SETTINGS="\$PWD/${ncbi_settings}" @@ -81,7 +89,7 @@ process SRATOOLS_FASTERQDUMP { echo \\ "fasterq-dump \\ $args \\ - --threads $task.cpus \\ + --threads \${task_cpus} \\ --outfile $outfile \\ ${key_file} \\ ${sra}" @@ -91,7 +99,7 @@ process SRATOOLS_FASTERQDUMP { pigz \\ $args2 \\ --no-name \\ - --processes $task.cpus \\ + --processes \${task_cpus} \\ *.fastq cat <<-END_VERSIONS > versions.yml diff --git a/nextflow.config b/nextflow.config index b2fc814c..454d34c7 100644 --- a/nextflow.config +++ b/nextflow.config @@ -91,6 +91,7 @@ params { help_full = false show_hidden = false trace_report_suffix = new java.util.Date().format( 'yyyy-MM-dd_HH-mm-ss')// Config options + use_all_cpus = false // Config options config_profile_name = null diff --git a/nextflow_schema.json b/nextflow_schema.json index eaaa6d1c..5a1a99c4 100644 --- a/nextflow_schema.json +++ b/nextflow_schema.json @@ -460,6 +460,11 @@ "fa_icon": "far calendar", "description": "Suffix to add to the trace report filename. Default is the date and time in the format yyyy-MM-dd_HH-mm-ss.", "hidden": true + }, + "use_all_cpus": { + "type": "boolean", + "fa_icon": "fas fa-microchip", + "description": "Use all the available CPUs for each task" } } } diff --git a/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf b/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf index 3ef50014..01715cda 100644 --- a/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf +++ b/subworkflows/local/utils_nfcore_assemblyqc_pipeline/main.nf @@ -171,7 +171,7 @@ workflow PIPELINE_INITIALISATION { maternal_reads = ch_maternal_reads paternal_reads = ch_paternal_reads params_as_json = ch_params_as_json - summary_params_as_json = ch_summary_params_as_json | view + summary_params_as_json = ch_summary_params_as_json versions = ch_versions }