From 43f258d12c0d466548f219adceae3a5fb617815a Mon Sep 17 00:00:00 2001 From: silask Date: Mon, 29 May 2023 11:40:58 +0200 Subject: [PATCH] formating --- workflow/Snakefile | 3 +-- workflow/rules/assemble.smk | 4 ++-- workflow/rules/bin_quality.smk | 20 +++++++++++--------- workflow/rules/download.smk | 8 ++------ workflow/rules/dram.smk | 10 +++------- workflow/rules/genecatalog.smk | 22 ++++++++++------------ workflow/rules/genomes.smk | 2 +- workflow/rules/qc.smk | 10 +++++----- 8 files changed, 35 insertions(+), 44 deletions(-) diff --git a/workflow/Snakefile b/workflow/Snakefile index 63124fbb..22619007 100644 --- a/workflow/Snakefile +++ b/workflow/Snakefile @@ -407,7 +407,7 @@ for r in workflow.rules: if "mem" in r.resources: r.resources["mem_mb"] = r.resources["mem"] * 1000 else: - r.resources["mem_mb"] = config["mem"]*1000 + r.resources["mem_mb"] = config["mem"] * 1000 # snakemake has a new name for that if not "mem_mib" in r.resources: @@ -421,6 +421,5 @@ for r in workflow.rules: else: r.resources["time_min"] = config["runtime"]["default"] * 60 - if not "runtime" in r.resources: r.resources["runtime"] = r.resources["time_min"] diff --git a/workflow/rules/assemble.smk b/workflow/rules/assemble.smk index 1c89e6fd..84698c8a 100644 --- a/workflow/rules/assemble.smk +++ b/workflow/rules/assemble.smk @@ -558,7 +558,7 @@ if config["filter_contigs"]: covstats="{sample}/assembly/contig_stats/prefilter_coverage_stats.txt", params: pileup_secondary="t", - minmapq=config["minimum_map_quality"] + minmapq=config["minimum_map_quality"], log: "{sample}/logs/assembly/post_process/pilup_prefilter_contigs.log", conda: @@ -677,7 +677,7 @@ rule pileup_contigs_sample: if config.get("count_multi_mapped_reads", CONTIG_COUNT_MULTI_MAPPED_READS) else "f" ), - minmapq=config["minimum_map_quality"] + minmapq=config["minimum_map_quality"], benchmark: "logs/benchmarks/assembly/calculate_coverage/pileup/{sample}.txt" log: diff --git a/workflow/rules/bin_quality.smk b/workflow/rules/bin_quality.smk index eca178ff..80616c55 100644 --- a/workflow/rules/bin_quality.smk +++ b/workflow/rules/bin_quality.smk @@ -305,26 +305,28 @@ rule all_contigs2bins: def quality_filter_bins_input(wildcards): "Specify input files for quality_filter_bins rule" - - input_files= dict(paths=rules.get_bin_filenames.output.filenames, + input_files = dict( + paths=rules.get_bin_filenames.output.filenames, stats="Binning/{binner}/genome_stats.tsv", quality="Binning/{binner}/checkm2_quality_report.tsv", - gunc = "Binning/{binner}/gunc_report.tsv" - ) + gunc="Binning/{binner}/gunc_report.tsv", + ) # check if gunc is in config file - filter_chimieric_bins= config["filter_chimieric_bins"] - assert type(filter_chimieric_bins)==bool, f"filter_chimieric_bins in config file must be a boolean, got {filter_chimieric_bins}" + filter_chimieric_bins = config["filter_chimieric_bins"] + assert ( + type(filter_chimieric_bins) == bool + ), f"filter_chimieric_bins in config file must be a boolean, got {filter_chimieric_bins}" if not filter_chimieric_bins: del input_files["gunc"] - + # replace wildcards for key in input_files: - input_files[key]=input_files[key].format(binner=wildcards.binner) + input_files[key] = input_files[key].format(binner=wildcards.binner) return input_files - + rule quality_filter_bins: input: diff --git a/workflow/rules/download.smk b/workflow/rules/download.smk index 2ff6e473..cc67a07a 100644 --- a/workflow/rules/download.smk +++ b/workflow/rules/download.smk @@ -2,9 +2,6 @@ import hashlib import os - - - # this values are incuded in the snakefile DBDIR = os.path.realpath(config["database_dir"]) CHECKMDIR = os.path.join(DBDIR, "checkm") @@ -22,7 +19,8 @@ CONDAENV = "../envs" GTDB_VERSION = "V08_R214" GTDB_DATA_URL = "https://data.gtdb.ecogenomic.org/releases/release214/214.0/auxillary_files/gtdbtk_r214_data.tar.gz" -GTDBTK_DATA_PATH = os.path.join(DBDIR, "GTDB_"+GTDB_VERSION) +GTDBTK_DATA_PATH = os.path.join(DBDIR, "GTDB_" + GTDB_VERSION) + def md5(fname): # https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file @@ -35,8 +33,6 @@ def md5(fname): return hash_md5.hexdigest() - - # note: saving OG_fasta.tar.gz in order to not create secondary "success" file FILES = { "adapters.fa": "ae839dc79cfb855a1b750a0d593fe01e", diff --git a/workflow/rules/dram.smk b/workflow/rules/dram.smk index c89b80ff..84048f31 100644 --- a/workflow/rules/dram.smk +++ b/workflow/rules/dram.smk @@ -36,16 +36,12 @@ rule dram_download: " DRAM-setup.py export_config --output_file {output.config}" - - - - rule DRAM_annotate: input: fasta="genomes/genomes/{genome}.fasta", #checkm= "genomes/checkm/completeness.tsv", #gtdb_dir= "genomes/taxonomy/gtdb/classify", - config= get_dram_config, + config=get_dram_config, output: outdir=directory("genomes/annotations/dram/intermediate_files/{genome}"), threads: config["simplejob_threads"] @@ -131,8 +127,8 @@ rule DRAM_destill: rule get_all_modules: input: - annotations = "genomes/annotations/dram/annotations.tsv", - config = get_dram_config, + annotations="genomes/annotations/dram/annotations.tsv", + config=get_dram_config, output: "genomes/annotations/dram/kegg_modules.tsv", threads: 1 diff --git a/workflow/rules/genecatalog.smk b/workflow/rules/genecatalog.smk index 4e32b433..313cd485 100644 --- a/workflow/rules/genecatalog.smk +++ b/workflow/rules/genecatalog.smk @@ -242,21 +242,20 @@ rule concat_all_reads: input: lambda wc: get_quality_controlled_reads(wc, include_se=True), output: - temp("Intermediate/genecatalog/alignments/{sample}.fastq.gz") + temp("Intermediate/genecatalog/alignments/{sample}.fastq.gz"), log: - "logs/Genecatalog/alignment/concat_reads/{sample}.log" - threads: - 1 + "logs/Genecatalog/alignment/concat_reads/{sample}.log", + threads: 1 resources: - mem_mb=300 + mem_mb=300, shell: "cat {input} > {output} 2> {log}" - + rule align_reads_to_Genecatalog: input: target=rules.index_genecatalog.output, - query= rules.concat_all_reads.output[0] + query=rules.concat_all_reads.output[0], output: temp("Genecatalog/alignments/{sample}.bam"), log: @@ -271,7 +270,6 @@ rule align_reads_to_Genecatalog: "v1.19.0/bio/minimap2/aligner" - rule pileup_Genecatalog: input: bam=rules.align_reads_to_Genecatalog.output, @@ -279,7 +277,7 @@ rule pileup_Genecatalog: covstats=temp("Genecatalog/alignments/{sample}_coverage.tsv"), rpkm=temp("Genecatalog/alignments/{sample}_rpkm.tsv"), params: - minmapq=config["minimum_map_quality"] + minmapq=config["minimum_map_quality"], log: "logs/Genecatalog/alignment/{sample}_pileup.log", conda: @@ -308,7 +306,7 @@ rule gene_pileup_as_parquet: threads: 1 resources: mem=config["simplejob_mem"], - time_min=config["runtime"]["simplejob"]*60, + time_min=config["runtime"]["simplejob"] * 60, log: "logs/Genecatalog/counts/parse_gene_coverages/{sample}.log", run: @@ -533,8 +531,8 @@ rule combine_egg_nogg_annotations: del Tables combined.columns = EGGNOG_HEADER - combined['Seed_evalue'] = combined['Seed_evalue'].astype('bytes') - combined['Seed_Score'] = combined['Seed_Score'].astype('bytes') + combined["Seed_evalue"] = combined["Seed_evalue"].astype("bytes") + combined["Seed_Score"] = combined["Seed_Score"].astype("bytes") # combined.sort_values("Gene",inplace=True) diff --git a/workflow/rules/genomes.smk b/workflow/rules/genomes.smk index 59bce769..30e970f9 100644 --- a/workflow/rules/genomes.smk +++ b/workflow/rules/genomes.smk @@ -482,7 +482,7 @@ rule pileup_MAGs: bincov=temp("genomes/alignments/coverage_binned/{sample}.tsv.gz"), orf="genomes/alignments/orf_coverage/{sample}.tsv.gz", params: - minmapq=config["minimum_map_quality"] + minmapq=config["minimum_map_quality"], log: "logs/genomes/alignments/pilup_{sample}.log", conda: diff --git a/workflow/rules/qc.smk b/workflow/rules/qc.smk index a564877c..ce742491 100644 --- a/workflow/rules/qc.smk +++ b/workflow/rules/qc.smk @@ -88,7 +88,7 @@ rule initialize_qc: threads: config.get("simplejob_threads", 1) resources: mem=config["simplejob_mem"], - java_mem = int( config["simplejob_mem"] * JAVA_MEM_FRACTION) + java_mem=int(config["simplejob_mem"] * JAVA_MEM_FRACTION), shell: "reformat.sh " " {params.inputs} " @@ -170,7 +170,7 @@ if not SKIP_QC: threads: config.get("threads", 1) resources: mem=config["mem"], - java_mem = int(config["mem"]*JAVA_MEM_FRACTION) + java_mem=int(config["mem"] * JAVA_MEM_FRACTION), shell: "clumpify.sh " " {params.inputs} " @@ -276,7 +276,7 @@ if not SKIP_QC: threads: config.get("threads", 1) resources: mem=config["mem"], - java_mem = int(config["mem"]*JAVA_MEM_FRACTION) + java_mem=int(config["mem"] * JAVA_MEM_FRACTION), shell: " bbduk.sh {params.inputs} " " {params.ref} " @@ -315,7 +315,7 @@ if not SKIP_QC: threads: config.get("threads", 1) resources: mem=config["mem"], - java_mem = int(config["mem"]*JAVA_MEM_FRACTION) + java_mem=int(config["mem"] * JAVA_MEM_FRACTION), log: "logs/QC/build_decontamination_db.log", conda: @@ -463,7 +463,7 @@ if PAIRED_END: threads: config.get("simplejob_threads", 1) resources: mem=config["mem"], - java_mem = int(config["mem"]*JAVA_MEM_FRACTION) + java_mem=int(config["mem"] * JAVA_MEM_FRACTION), conda: "../envs/required_packages.yaml" log: