Skip to content

Commit

Permalink
formating
Browse files Browse the repository at this point in the history
  • Loading branch information
SilasK committed May 29, 2023
1 parent 0a14794 commit 43f258d
Show file tree
Hide file tree
Showing 8 changed files with 35 additions and 44 deletions.
3 changes: 1 addition & 2 deletions workflow/Snakefile
Original file line number Diff line number Diff line change
Expand Up @@ -407,7 +407,7 @@ for r in workflow.rules:
if "mem" in r.resources:
r.resources["mem_mb"] = r.resources["mem"] * 1000
else:
r.resources["mem_mb"] = config["mem"]*1000
r.resources["mem_mb"] = config["mem"] * 1000

# snakemake has a new name for that
if not "mem_mib" in r.resources:
Expand All @@ -421,6 +421,5 @@ for r in workflow.rules:
else:
r.resources["time_min"] = config["runtime"]["default"] * 60


if not "runtime" in r.resources:
r.resources["runtime"] = r.resources["time_min"]
4 changes: 2 additions & 2 deletions workflow/rules/assemble.smk
Original file line number Diff line number Diff line change
Expand Up @@ -558,7 +558,7 @@ if config["filter_contigs"]:
covstats="{sample}/assembly/contig_stats/prefilter_coverage_stats.txt",
params:
pileup_secondary="t",
minmapq=config["minimum_map_quality"]
minmapq=config["minimum_map_quality"],
log:
"{sample}/logs/assembly/post_process/pilup_prefilter_contigs.log",
conda:
Expand Down Expand Up @@ -677,7 +677,7 @@ rule pileup_contigs_sample:
if config.get("count_multi_mapped_reads", CONTIG_COUNT_MULTI_MAPPED_READS)
else "f"
),
minmapq=config["minimum_map_quality"]
minmapq=config["minimum_map_quality"],
benchmark:
"logs/benchmarks/assembly/calculate_coverage/pileup/{sample}.txt"
log:
Expand Down
20 changes: 11 additions & 9 deletions workflow/rules/bin_quality.smk
Original file line number Diff line number Diff line change
Expand Up @@ -305,26 +305,28 @@ rule all_contigs2bins:

def quality_filter_bins_input(wildcards):
"Specify input files for quality_filter_bins rule"


input_files= dict(paths=rules.get_bin_filenames.output.filenames,
input_files = dict(
paths=rules.get_bin_filenames.output.filenames,
stats="Binning/{binner}/genome_stats.tsv",
quality="Binning/{binner}/checkm2_quality_report.tsv",
gunc = "Binning/{binner}/gunc_report.tsv"
)
gunc="Binning/{binner}/gunc_report.tsv",
)

# check if gunc is in config file
filter_chimieric_bins= config["filter_chimieric_bins"]
assert type(filter_chimieric_bins)==bool, f"filter_chimieric_bins in config file must be a boolean, got {filter_chimieric_bins}"
filter_chimieric_bins = config["filter_chimieric_bins"]
assert (
type(filter_chimieric_bins) == bool
), f"filter_chimieric_bins in config file must be a boolean, got {filter_chimieric_bins}"
if not filter_chimieric_bins:
del input_files["gunc"]

# replace wildcards
for key in input_files:
input_files[key]=input_files[key].format(binner=wildcards.binner)
input_files[key] = input_files[key].format(binner=wildcards.binner)

return input_files


rule quality_filter_bins:
input:
Expand Down
8 changes: 2 additions & 6 deletions workflow/rules/download.smk
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@ import hashlib
import os





# this values are incuded in the snakefile
DBDIR = os.path.realpath(config["database_dir"])
CHECKMDIR = os.path.join(DBDIR, "checkm")
Expand All @@ -22,7 +19,8 @@ CONDAENV = "../envs"

GTDB_VERSION = "V08_R214"
GTDB_DATA_URL = "https://data.gtdb.ecogenomic.org/releases/release214/214.0/auxillary_files/gtdbtk_r214_data.tar.gz"
GTDBTK_DATA_PATH = os.path.join(DBDIR, "GTDB_"+GTDB_VERSION)
GTDBTK_DATA_PATH = os.path.join(DBDIR, "GTDB_" + GTDB_VERSION)


def md5(fname):
# https://stackoverflow.com/questions/3431825/generating-an-md5-checksum-of-a-file
Expand All @@ -35,8 +33,6 @@ def md5(fname):
return hash_md5.hexdigest()




# note: saving OG_fasta.tar.gz in order to not create secondary "success" file
FILES = {
"adapters.fa": "ae839dc79cfb855a1b750a0d593fe01e",
Expand Down
10 changes: 3 additions & 7 deletions workflow/rules/dram.smk
Original file line number Diff line number Diff line change
Expand Up @@ -36,16 +36,12 @@ rule dram_download:
" DRAM-setup.py export_config --output_file {output.config}"






rule DRAM_annotate:
input:
fasta="genomes/genomes/{genome}.fasta",
#checkm= "genomes/checkm/completeness.tsv",
#gtdb_dir= "genomes/taxonomy/gtdb/classify",
config= get_dram_config,
config=get_dram_config,
output:
outdir=directory("genomes/annotations/dram/intermediate_files/{genome}"),
threads: config["simplejob_threads"]
Expand Down Expand Up @@ -131,8 +127,8 @@ rule DRAM_destill:

rule get_all_modules:
input:
annotations = "genomes/annotations/dram/annotations.tsv",
config = get_dram_config,
annotations="genomes/annotations/dram/annotations.tsv",
config=get_dram_config,
output:
"genomes/annotations/dram/kegg_modules.tsv",
threads: 1
Expand Down
22 changes: 10 additions & 12 deletions workflow/rules/genecatalog.smk
Original file line number Diff line number Diff line change
Expand Up @@ -242,21 +242,20 @@ rule concat_all_reads:
input:
lambda wc: get_quality_controlled_reads(wc, include_se=True),
output:
temp("Intermediate/genecatalog/alignments/{sample}.fastq.gz")
temp("Intermediate/genecatalog/alignments/{sample}.fastq.gz"),
log:
"logs/Genecatalog/alignment/concat_reads/{sample}.log"
threads:
1
"logs/Genecatalog/alignment/concat_reads/{sample}.log",
threads: 1
resources:
mem_mb=300
mem_mb=300,
shell:
"cat {input} > {output} 2> {log}"


rule align_reads_to_Genecatalog:
input:
target=rules.index_genecatalog.output,
query= rules.concat_all_reads.output[0]
query=rules.concat_all_reads.output[0],
output:
temp("Genecatalog/alignments/{sample}.bam"),
log:
Expand All @@ -271,15 +270,14 @@ rule align_reads_to_Genecatalog:
"v1.19.0/bio/minimap2/aligner"



rule pileup_Genecatalog:
input:
bam=rules.align_reads_to_Genecatalog.output,
output:
covstats=temp("Genecatalog/alignments/{sample}_coverage.tsv"),
rpkm=temp("Genecatalog/alignments/{sample}_rpkm.tsv"),
params:
minmapq=config["minimum_map_quality"]
minmapq=config["minimum_map_quality"],
log:
"logs/Genecatalog/alignment/{sample}_pileup.log",
conda:
Expand Down Expand Up @@ -308,7 +306,7 @@ rule gene_pileup_as_parquet:
threads: 1
resources:
mem=config["simplejob_mem"],
time_min=config["runtime"]["simplejob"]*60,
time_min=config["runtime"]["simplejob"] * 60,
log:
"logs/Genecatalog/counts/parse_gene_coverages/{sample}.log",
run:
Expand Down Expand Up @@ -533,8 +531,8 @@ rule combine_egg_nogg_annotations:
del Tables

combined.columns = EGGNOG_HEADER
combined['Seed_evalue'] = combined['Seed_evalue'].astype('bytes')
combined['Seed_Score'] = combined['Seed_Score'].astype('bytes')
combined["Seed_evalue"] = combined["Seed_evalue"].astype("bytes")
combined["Seed_Score"] = combined["Seed_Score"].astype("bytes")

# combined.sort_values("Gene",inplace=True)

Expand Down
2 changes: 1 addition & 1 deletion workflow/rules/genomes.smk
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ rule pileup_MAGs:
bincov=temp("genomes/alignments/coverage_binned/{sample}.tsv.gz"),
orf="genomes/alignments/orf_coverage/{sample}.tsv.gz",
params:
minmapq=config["minimum_map_quality"]
minmapq=config["minimum_map_quality"],
log:
"logs/genomes/alignments/pilup_{sample}.log",
conda:
Expand Down
10 changes: 5 additions & 5 deletions workflow/rules/qc.smk
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ rule initialize_qc:
threads: config.get("simplejob_threads", 1)
resources:
mem=config["simplejob_mem"],
java_mem = int( config["simplejob_mem"] * JAVA_MEM_FRACTION)
java_mem=int(config["simplejob_mem"] * JAVA_MEM_FRACTION),
shell:
"reformat.sh "
" {params.inputs} "
Expand Down Expand Up @@ -170,7 +170,7 @@ if not SKIP_QC:
threads: config.get("threads", 1)
resources:
mem=config["mem"],
java_mem = int(config["mem"]*JAVA_MEM_FRACTION)
java_mem=int(config["mem"] * JAVA_MEM_FRACTION),
shell:
"clumpify.sh "
" {params.inputs} "
Expand Down Expand Up @@ -276,7 +276,7 @@ if not SKIP_QC:
threads: config.get("threads", 1)
resources:
mem=config["mem"],
java_mem = int(config["mem"]*JAVA_MEM_FRACTION)
java_mem=int(config["mem"] * JAVA_MEM_FRACTION),
shell:
" bbduk.sh {params.inputs} "
" {params.ref} "
Expand Down Expand Up @@ -315,7 +315,7 @@ if not SKIP_QC:
threads: config.get("threads", 1)
resources:
mem=config["mem"],
java_mem = int(config["mem"]*JAVA_MEM_FRACTION)
java_mem=int(config["mem"] * JAVA_MEM_FRACTION),
log:
"logs/QC/build_decontamination_db.log",
conda:
Expand Down Expand Up @@ -463,7 +463,7 @@ if PAIRED_END:
threads: config.get("simplejob_threads", 1)
resources:
mem=config["mem"],
java_mem = int(config["mem"]*JAVA_MEM_FRACTION)
java_mem=int(config["mem"] * JAVA_MEM_FRACTION),
conda:
"../envs/required_packages.yaml"
log:
Expand Down

0 comments on commit 43f258d

Please sign in to comment.