Skip to content

Commit

Permalink
workflow v1.4.1
Browse files Browse the repository at this point in the history
+ pca.R
+ log (generated files)
+ plot (generated files)
  • Loading branch information
mv-lab committed Oct 1, 2019
1 parent 120a99a commit 2e3f479
Show file tree
Hide file tree
Showing 11 changed files with 66 additions and 129 deletions.
9 changes: 2 additions & 7 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,19 +5,14 @@
# Ignore notes, txt etc
command

# Ignore mockdata (backup) and others...
mockdata/
oldtoy/
backup
toydata/
igv/

# PyCache
*/__pycache__

# generated data files
data/*
!data/readme
log/
plots/

# rawdata (size problem)
#rawdata/reference/*
Expand Down
15 changes: 8 additions & 7 deletions Snakefile
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@

#include: "rules/newcommon.smk"
include: "rules/init.rules.smk"

##### Modules #####

include: "rules/denovo.rules.smk"
include: "rules/readqc.rules.smk"
include: "rules/align.rules.smk"
include: "rules/varcall.rules.smk"
include: "rules/stats.rules.smk"
include: "rules/snpeff.rules.smk"
#include: "rules/align.rules.smk"
#include: "rules/varcall.rules.smk"
#include: "rules/stats.rules.smk"
#include: "rules/snpeff.rules.smk"

##### Target rules #####

Expand All @@ -17,6 +18,6 @@ rule all:
rules.init.input,
rules.denovo.input,
rules.readqc.input,
rules.align.input,
rules.varcall.input,
rules.stats.input,
#rules.align.input,
#rules.varcall.input,
#rules.stats.input,
3 changes: 2 additions & 1 deletion configs/toolconfig.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ init:
refs:
Sorghum: genomes_and_annotations/genomes/Sorghum/genome.fa


mapping:
aligners:
- bwa
Expand All @@ -37,7 +38,7 @@ denovodist:
mash_sketchsize: 1e5
sourmash_sketchsize: 1e5
ksize: 21
kwip_sets: []
kwip_sets: []
mash_sets: # we can subset from everything for (sour)mash
- all_samples
sourmash_sets: []
Expand Down
84 changes: 1 addition & 83 deletions data/readme
Original file line number Diff line number Diff line change
Expand Up @@ -206,88 +206,6 @@ data/
│   └── mash
│   └── k21-s1e5
│   └── all_samples.dist
├── log
│   ├── abra
│   │   ├── bwa~Sorghum~all_samples.benchmark
│   │   └── bwa~Sorghum~all_samples.log
│   ├── adapterremoval
│   │   ├── run_1
│   │   │   ├── con.log
│   │   │   ├── D2.log
│   │   │   └── D3.log
│   │   └── run_2
│   │   └── con.log
│   ├── bamindex
│   │   └── data
│   │   └── abra
│   ├── bamstats
│   │   └── mergeallbamstats.log
│   ├── bamstats_sample
│   │   ├── bwa~Sorghum~con.tsv
│   │   ├── bwa~Sorghum~D2.tsv
│   │   └── bwa~Sorghum~D3.tsv
│   ├── bcf2vcf
│   │   └── data
│   │   └── variants
│   │   └── final
│   │   ├── freebayes~bwa~Sorghum~all_samples~filtered-default.log
│   │   └── freebayes~bwa~Sorghum~all_samples~filtered-strict.log
│   ├── bcffilter
│   │   └── freebayes~bwa~Sorghum~all_samples
│   │   ├── default
│   │   │   ├── Chr02_region1M_2M:000000001-001000000.log
│   │   │   └── Chr04_region1M_2M:000000001-001000000.log
│   │   └── strict
│   │   ├── Chr02_region1M_2M:000000001-001000000.log
│   │   └── Chr04_region1M_2M:000000001-001000000.log
│   ├── bwa
│   │   └── Sorghum
│   │   ├── run_1
│   │   │   ├── con.log
│   │   │   ├── D2.log
│   │   │   └── D3.log
│   │   └── run_2
│   │   └── con.log
│   ├── freebayes
│   │   └── bwa~Sorghum~all_samples
│   │   ├── Chr02_region1M_2M:000000001-001000000.benchmark
│   │   ├── Chr02_region1M_2M:000000001-001000000.log
│   │   ├── Chr04_region1M_2M:000000001-001000000.benchmark
│   │   └── Chr04_region1M_2M:000000001-001000000.log
│   ├── markdup
│   │   └── bwa
│   │   └── Sorghum
│   │   ├── run_1
│   │   │   ├── con.log
│   │   │   ├── D2.log
│   │   │   └── D3.log
│   │   └── run_2
│   │   └── con.log
│   ├── mash
│   │   ├── dist
│   │   │   └── k21-s1e5-all_samples.log
│   │   └── sketch
│   │   └── k21-s1e5-all_samples.log
│   ├── mergebcf
│   │   ├── freebayes~bwa~Sorghum~all_samples_filtered~default.log
│   │   └── freebayes~bwa~Sorghum~all_samples_filtered~strict.log
│   ├── mergesamplebam
│   │   └── bwa
│   │   └── Sorghum
│   │   ├── con.log
│   │   ├── D2.log
│   │   └── D3.log
│   ├── mergesetbam
│   │   └── bwa
│   │   └── Sorghum
│   │   └── all_samples.log
│   ├── qualimap_sample
│   │   ├── bwa~Sorghum~con.log
│   │   ├── bwa~Sorghum~D2.log
│   │   └── bwa~Sorghum~D3.log
│   ├── readstats
│   │   └── seqhax-stats-librun.log
│   └── samplefastq
├── plots
│   └── quals.svg
├── readme
Expand Down Expand Up @@ -340,4 +258,4 @@ data/
├── Chr02_region1M_2M:000000001-001000000.bcf
└── Chr04_region1M_2M:000000001-001000000.bcf

98 directories, 242 files
56 directories, 202 files
1 change: 1 addition & 0 deletions envs/condaenv.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@ dependencies:
- matplotlib =2.2
- pandas =0.23
- seaborn =0.8
- r-rgl
18 changes: 9 additions & 9 deletions rules/align.rules.smk
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ rule align_stats:
output:
expand("data/alnstats/everything_{type}.csv",
type=["SN", "IS", "COV"])
log: "data/log/bamstats/mergeallbamstats.log"
log: "log/align/bamstats/mergeallbamstats.log"
shell:
"python3 scripts/tidybamstat.py"
" -o data/alnstats/everything" # prefix
Expand Down Expand Up @@ -61,7 +61,7 @@ rule ngmap:
output:
bam=temp("data/alignments/byrun.raw/ngm/{ref}/{run}/{lib}.bam"),
log:
"data/log/ngm/{ref}/{run}/{lib}.log"
"log/align/ngm/{ref}/{run}/{lib}.log"
threads:
8
params:
Expand All @@ -86,7 +86,7 @@ rule bwamem:
ref=lambda wc: config['refs'][wc.ref],
output:
bam=temp("data/alignments/byrun.raw/bwa/{ref}/{run}/{lib}.bam"),
log: "data/log/bwa/{ref}/{run}/{lib}.log"
log: "log/align/bwa/{ref}/{run}/{lib}.log"
threads:
8
params:
Expand All @@ -109,7 +109,7 @@ rule bam_markdups_sort:
output:
bam=temp("data/alignments/byrun/{aligner}/{ref}/{run}/{lib}.bam"),
threads: 4
log: "data/log/markdup/{aligner}/{ref}/{run}/{lib}.log"
log: "log/align/markdup/{aligner}/{ref}/{run}/{lib}.log"
shell:
"( samtools fixmate "
" -m"
Expand Down Expand Up @@ -141,7 +141,7 @@ rule mergebam_samp:
output:
bam="data/alignments/samples/{aligner}/{ref}/{sample}.bam",
log:
"data/log/mergesamplebam/{aligner}/{ref}/{sample}.log"
"log/align/mergesamplebam/{aligner}/{ref}/{sample}.log"
threads: 8
priority: 1 # so the temps get cleaned sooner
shell:
Expand All @@ -159,7 +159,7 @@ rule qualimap_samp:
output:
"data/alignments/qualimap/samples/{aligner}~{ref}~{sample}/",
log:
"data/log/qualimap_sample/{aligner}~{ref}~{sample}.log"
"log/align/qualimap_sample/{aligner}~{ref}~{sample}.log"
threads: 4
shell:
"( unset DISPLAY; qualimap bamqc"
Expand Down Expand Up @@ -195,7 +195,7 @@ rule mergebam_set:
bam="data/alignments/sets/{aligner}~{ref}~{sampleset}.bam",
bai="data/alignments/sets/{aligner}~{ref}~{sampleset}.bam.bai",
log:
"data/log/mergesetbam/{aligner}/{ref}/{sampleset}.log"
"log/align/mergesetbam/{aligner}/{ref}/{sampleset}.log"
threads: 4
shell:
"( samtools merge"
Expand All @@ -215,7 +215,7 @@ rule bamstat_samps:
output:
"data/alignments/bamstats/sample/{aligner}~{ref}~{sample}.tsv",
log:
"data/log/bamstats_sample/{aligner}~{ref}~{sample}.tsv"
"log/align/bamstats_sample/{aligner}~{ref}~{sample}.tsv"
shell:
"(samtools stats -i 5000 -x {input} >{output}) >{log}"

Expand All @@ -229,7 +229,7 @@ rule bamidx:
output:
"{path}.bam.bai"
log:
"data/log/bamindex/{path}.log"
"log/align/bamindex/{path}.log"
shell:
"samtools index {input}"

Expand Down
24 changes: 17 additions & 7 deletions rules/denovo.rules.smk
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,21 @@ rule mash:
sketchsize=config["denovodist"]["mash_sketchsize"],
set=config["denovodist"]["mash_sets"]),

rule pca:
input:
rules.mash.input,
output:
"plots/denovo/mash/pca.pdf"
script:
"../scripts/pca.R"


rule denovo:
input:
rules.kwip.input,
rules.mash.input,
rules.sourmash.input,
rules.pca.output,


##### Actual rules #####
Expand All @@ -42,7 +52,7 @@ rule mashsketch:
output:
temp("data/denovo/mash/k{ksize}-s{sketchsize}/{set}.msh"),
log:
"data/log/mash/sketch/k{ksize}-s{sketchsize}-{set}.log"
"log/denovo/mash/sketch/k{ksize}-s{sketchsize}-{set}.log"
threads: 27
shell:
" mash sketch"
Expand All @@ -60,7 +70,7 @@ rule mashdist:
output:
dist="data/denovo/mash/k{ksize}-s{sketchsize}/{set}.dist",
log:
"data/log/mash/dist/k{ksize}-s{sketchsize}-{set}.log"
"log/denovo/mash/dist/k{ksize}-s{sketchsize}-{set}.log"
threads: 27
shell:
"mash dist"
Expand All @@ -78,7 +88,7 @@ rule countsketch:
info="data/denovo/kwip/sketch/k{ksize}-s{sketchsize}/{sample}.ct.gz.info",
tsv="data/denovo/kwip/sketch/k{ksize}-s{sketchsize}/{sample}.ct.gz.info.tsv",
log:
"data/log/kwip/sketch/k{ksize}-s{sketchsize}-{sample}.log"
"log/denovo/kwip/sketch/k{ksize}-s{sketchsize}-{sample}.log"
threads:
3
shell:
Expand All @@ -103,7 +113,7 @@ rule kwipdist:
d="data/denovo/kwip/k{ksize}-s{sketchsize}/{set}.dist",
k="data/denovo/kwip/k{ksize}-s{sketchsize}/{set}.kern",
log:
"data/log/kwip/dist/k{ksize}-s{sketchsize}-{set}.log"
"log/denovo/kwip/dist/k{ksize}-s{sketchsize}-{set}.log"
threads:
4
shell:
Expand All @@ -125,7 +135,7 @@ rule unique_kmers:
params:
kmersize=config["denovodist"]["ksize"],
log:
"data/log/readstats/unique-kmers/{set}.log",
"log/denovo/readstats/unique-kmers/{set}.log",
shell:
"( kdm-unique-kmers.py"
" -t {threads}"
Expand All @@ -141,7 +151,7 @@ rule sourmash_sketch:
output:
temp("data/denovo/sourmash/sketch/k{ksize}-s{sketchsize}/{sample}.smh"),
log:
"data/log/sourmash/sketch/k{ksize}-s{sketchsize}-{sample}.log"
"log/denovo/sourmash/sketch/k{ksize}-s{sketchsize}-{sample}.log"
shell:
"( sourmash compute"
" --name '{wildcards.sample}'"
Expand All @@ -159,7 +169,7 @@ rule sourmash_dist:
output:
"data/denovo/sourmash/k{ksize}-s{sketchsize}/{set}.dist",
log:
"data/log/sourmash/dist/k{ksize}-s{sketchsize}-{set}.log"
"log/denovo/sourmash/dist/k{ksize}-s{sketchsize}-{set}.log"
threads: 1
shell:
"(sourmash compare -k {wildcards.ksize} -o {output} {input} ) >{log} 2>&1"
1 change: 0 additions & 1 deletion rules/init.rules.smk
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ rule prepare_ref:
shell:
"samtools faidx {input} 2> {log}"


rule contigs:
input:
"genomes_and_annotations/genomes/{ref}/genome.fa.fai",
Expand Down
10 changes: 5 additions & 5 deletions rules/readqc.rules.smk
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ rule qcreads:
output:
reads="data/reads/runs/{run}/{lib}.fastq.gz",
log:
log="data/log/adapterremoval/{run}/{lib}.log",
log="log/adapterremoval/{run}/{lib}.log",
settings="data/stats/adapterremoval/{run}/{lib}.txt",
threads:
7
Expand Down Expand Up @@ -78,7 +78,7 @@ rule qcreads_il:
output:
reads="data/reads/runs/{run}/{lib}.fastq.gz",
log:
log="data/log/adapterremoval/{run}/{lib}.log",
log="log/adapterremoval/{run}/{lib}.log",
settings="data/stats/adapterremoval/{run}/{lib}.txt",
threads:
7
Expand Down Expand Up @@ -113,7 +113,7 @@ rule samplefastq:
input:
lambda wc: ["data/reads/runs/{run}/{lib}.fastq.gz".format(run=r, lib=l) for r, l in SAMP2RUNLIB[wc.sample]],
output: "data/reads/samples/{sample}.fastq.gz"
log: "data/log/samplefastq/{sample}.log"
log: "log/samplefastq/{sample}.log"
threads: 1
shell:
"cat {input} > {output}"
Expand All @@ -127,7 +127,7 @@ rule read_count_librun:
threads:
28
log:
"data/log/readstats/seqhax-stats-librun.log",
"log/readstats/seqhax-stats-librun.log",
shell:
"( seqhax stats"
" -t {threads}"
Expand All @@ -144,7 +144,7 @@ rule read_count_sample:
threads:
27
log:
"data/log/readstats/seqhax-stats-sample.log",
"log/readstats/seqhax-stats-sample.log",
shell:
"( seqhax stats"
" -t {threads}"
Expand Down
Loading

0 comments on commit 2e3f479

Please sign in to comment.