Skip to content

Commit

Permalink
Added separate options for reference and assembly SAM/BAM files; chan…
Browse files Browse the repository at this point in the history
…ged reads analysis in metaquast
  • Loading branch information
almiheenko committed Apr 4, 2017
1 parent 3bdbb27 commit 3c0a1ae
Show file tree
Hide file tree
Showing 8 changed files with 124 additions and 62 deletions.
33 changes: 2 additions & 31 deletions metaquast.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@

from quast_libs.metautils import Assembly, correct_meta_references, correct_assemblies, \
get_downloaded_refs_with_alignments, partition_contigs, calculate_ave_read_support
from quast_libs.options_parser import parse_options, remove_from_quast_py_args
from quast_libs.options_parser import parse_options, remove_from_quast_py_args, prepare_regular_quast_args

from quast_libs import contigs_analyzer, reads_analyzer, search_references_meta, plotter_data, qutils
from quast_libs.qutils import cleanup, check_dirpath, is_python2, run_parallel, get_reads_fpaths
Expand Down Expand Up @@ -206,25 +206,9 @@ def main(args):
combined_output_dirpath = os.path.join(output_dirpath, qconfig.combined_output_name)
qconfig.reference = combined_ref_fpath

reads_fpaths = get_reads_fpaths(logger)
if reads_fpaths or qconfig.sam or qconfig.bam:
corrected_contigs_fpaths = [assembly.fpath for assembly in assemblies]
qconfig.bed, qconfig.cov_fpath, qconfig.phys_cov_fpath =\
reads_analyzer.do(combined_ref_fpath, corrected_contigs_fpaths,
os.path.join(combined_output_dirpath, qconfig.reads_stats_dirname), corrected_ref_fpaths, external_logger=logger)

if qconfig.sam:
quast_py_args += ['--sam']
quast_py_args += [qconfig.sam]
if qconfig.bed:
quast_py_args += ['--sv-bed']
quast_py_args += [qconfig.bed]
if qconfig.cov_fpath:
quast_py_args += ['--cov']
quast_py_args += [qconfig.cov_fpath]
if qconfig.phys_cov_fpath:
quast_py_args += ['--phys-cov']
quast_py_args += [qconfig.phys_cov_fpath]

quast_py_args += ['--combined-ref']
if qconfig.draw_plots or qconfig.html_report:
Expand Down Expand Up @@ -311,20 +295,7 @@ def main(args):
if qconfig.calculate_read_support:
calculate_ave_read_support(combined_output_dirpath, assemblies)

for arg in args:
if arg in ('-s', "--scaffolds"):
quast_py_args.remove(arg)
quast_py_args += ['--no-check-meta']
qconfig.contig_thresholds = ','.join([str(threshold) for threshold in qconfig.contig_thresholds if threshold >= qconfig.min_contig])
if not qconfig.contig_thresholds:
qconfig.contig_thresholds = 'None'
quast_py_args = remove_from_quast_py_args(quast_py_args, '--contig-thresholds', qconfig.contig_thresholds)
quast_py_args += ['--contig-thresholds']
quast_py_args += [qconfig.contig_thresholds]
quast_py_args.remove('--combined-ref')
if qconfig.sam:
quast_py_args = remove_from_quast_py_args(quast_py_args, '--sam', qconfig.sam)

prepare_regular_quast_args(quast_py_args, combined_output_dirpath)
logger.main_info()
logger.main_info('Partitioning contigs into bins aligned to each reference..')

Expand Down
2 changes: 1 addition & 1 deletion quast.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def main(args):
reads_fpaths = get_reads_fpaths(logger)
cov_fpath = qconfig.cov_fpath
physical_cov_fpath = qconfig.phys_cov_fpath
if reads_fpaths or qconfig.sam or qconfig.bam:
if reads_fpaths or qconfig.reference_sam or qconfig.reference_sam or qconfig.sam_fpaths or qconfig.bam_fpaths:
bed_fpath, cov_fpath, physical_cov_fpath = reads_analyzer.do(ref_fpath, contigs_fpaths,
os.path.join(output_dirpath, qconfig.reads_stats_dirname),
external_logger=logger)
Expand Down
2 changes: 1 addition & 1 deletion quast_libs/create_meta_summary.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ def do(html_fpath, output_dirpath, combined_output_dirpath, output_dirpath_per_r
summary_plot_fpath = os.path.join(output_dirpath, plots_dirname, metric.replace(' ', '_'))
results, all_rows, cur_ref_names = \
get_results_for_metric(ref_names, metric, contigs_num, labels, output_dirpath_per_ref, qconfig.transposed_report_prefix + '.tsv')
if not results or not results[0]:
if not results or all(not value for result in results for value in result):
continue
if cur_ref_names:
transposed_table = [{'metricName': 'Assemblies',
Expand Down
10 changes: 5 additions & 5 deletions quast_libs/html_saver/static/scripts/build_total_report.js
Original file line number Diff line number Diff line change
Expand Up @@ -160,9 +160,9 @@ function buildTotalReport(assembliesNames, totalReport, order, glossary, qualiti
var refGenes = referenceValues['Reference genes'];
var refOperons = referenceValues['Reference operons'];
var refChr = referenceValues['Reference chromosomes'];
var totalReads = referenceValues['# reads'];
var refMappedReads = referenceValues['Reference mapped reads (%)'];
var refPairedReads = referenceValues['Reference properly paired reads (%)'];
var totalReads = referenceValues['# total'];
var refMappedReads = referenceValues['Reference mapped (%)'];
var refPairedReads = referenceValues['Reference properly paired (%)'];
var estRefLen = referenceValues['Estimated reference length'];

if (refLen)
Expand All @@ -187,9 +187,9 @@ function buildTotalReport(assembliesNames, totalReport, order, glossary, qualiti
}
if (totalReads)
$('#total_reads').show().find('.val').html(toPrettyString(totalReads));
if (refMappedReads)
if (refMappedReads !== undefined)
$('#reference_mapped_reads').show().find('.val').html(toPrettyString(refMappedReads));
if (refPairedReads)
if (refPairedReads !== undefined)
$('#reference_paired_reads').show().find('.val').html(toPrettyString(refPairedReads));
continue;
}
Expand Down
13 changes: 12 additions & 1 deletion quast_libs/html_saver/static/scripts/build_total_report_meta.js
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,11 @@ function buildGenomeTable(reports, group_n, numColumns) {
'</span>' +
'</td>';
var metrics = reports[report_n].report[group_n][1];
var referenceMetrics = ['Reference length', 'Reference fragments', 'Reference GC (%)',
'Reference genes', 'Reference operons'];
for (var metric_n = 0; metric_n < metrics.length; metric_n++) {
var metric = metrics[metric_n];
if (metric.metricName == 'Reference name') continue;
if (referenceMetrics.indexOf(metric.metricName) === -1) continue;

var value = metric.values[0];

Expand Down Expand Up @@ -293,6 +295,9 @@ function buildTotalReport(assembliesNames, report, order, date, minContig, gloss
var refGC = referenceValues['Reference GC (%)'];
var refGenes = referenceValues['Reference genes'];
var refOperons = referenceValues['Reference operons'];
var totalReads = referenceValues['# total'];
var refMappedReads = referenceValues['Reference mapped (%)'];
var refPairedReads = referenceValues['Reference properly paired (%)'];

var numColumns = 1; // no GC in combined reference

Expand Down Expand Up @@ -331,6 +336,12 @@ function buildTotalReport(assembliesNames, report, order, date, minContig, gloss
numColumns++;
}

if (totalReads)
$('#total_reads').show().find('.val').html(toPrettyString(totalReads));
if (refMappedReads !== undefined)
$('#reference_mapped_reads').show().find('.val').html(toPrettyString(refMappedReads));
if (refPairedReads !== undefined)
$('#reference_paired_reads').show().find('.val').html(toPrettyString(refPairedReads));
$('#main_ref_genome').html(buildGenomeTable(reports, group_n, numColumns));
continue;
}
Expand Down
80 changes: 74 additions & 6 deletions quast_libs/options_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
import sys

from quast_libs import qconfig, qutils
from quast_libs.qutils import assert_file_exists, set_up_output_dir, check_dirpath
from quast_libs.qutils import assert_file_exists, set_up_output_dir, check_dirpath, is_non_empty_file

test_data_dir_basename = 'test_data'
test_data_dir = join(qconfig.QUAST_HOME, test_data_dir_basename)
Expand Down Expand Up @@ -154,6 +154,27 @@ def parse_meta_references(option, opt_str, value, parser, logger):
ensure_value(qconfig, option.dest, []).extend(ref_fpaths)


def parse_files_list(option, opt_str, value, parser, extension, logger):
fpaths = []
values = value.split(',')
for i, value in enumerate(values):
if value.endswith(extension):
assert_file_exists(value, extension.upper() + ' file')
fpaths.append(value)
else:
logger.error("incorrect extension for " + extension.upper() + " file (" + str(value) + ")! ",
to_stderr=True, exit_with_code=2)

ensure_value(qconfig, option.dest, []).extend(fpaths)


def check_sam_bam_files(contigs_fpaths, sam_fpaths, bam_fpaths, logger):
if sam_fpaths and len(contigs_fpaths) != len(sam_fpaths):
logger.error('Number of SAM files does not match the number of files with contigs', to_stderr=True, exit_with_code=11)
if bam_fpaths and len(contigs_fpaths) != len(bam_fpaths):
logger.error('Number of BAM files does not match the number of files with contigs', to_stderr=True, exit_with_code=11)


def wrong_test_option(logger, msg, is_metaquast):
logger.error(msg)
qconfig.usage(meta=is_metaquast)
Expand All @@ -169,12 +190,42 @@ def clean_metaquast_args(quast_py_args, contigs_fpaths):
quast_py_args.remove(contigs_fpath)
for opt in opts_with_args_to_remove:
remove_from_quast_py_args(quast_py_args, opt, arg=True)

for opt in opts_to_remove:
remove_from_quast_py_args(quast_py_args, opt)
return quast_py_args


def prepare_regular_quast_args(quast_py_args, combined_output_dirpath):
opts_with_args_to_remove = ['--contig-thresholds', '--sv-bed',]
opts_to_remove = ['-s', '--scaffolds', '--combined-ref']
for opt in opts_with_args_to_remove:
remove_from_quast_py_args(quast_py_args, opt, arg=True)
for opt in opts_to_remove:
remove_from_quast_py_args(quast_py_args, opt)

quast_py_args += ['--no-check-meta']
qconfig.contig_thresholds = ','.join([str(threshold) for threshold in qconfig.contig_thresholds if threshold >= qconfig.min_contig])
if not qconfig.contig_thresholds:
qconfig.contig_thresholds = 'None'
quast_py_args += ['--contig-thresholds']
quast_py_args += [qconfig.contig_thresholds]

reads_stats_dirpath = os.path.join(combined_output_dirpath, qconfig.reads_stats_dirname)
reference_name = qutils.name_from_fpath(qconfig.combined_ref_name)
qconfig.bed = qconfig.bed or os.path.join(reads_stats_dirpath, reference_name + '.bed')
qconfig.cov_fpath = qconfig.cov_fpath or os.path.join(reads_stats_dirpath, reference_name + '.cov')
qconfig.phys_cov_fpath = qconfig.phys_cov_fpath or os.path.join(reads_stats_dirpath, reference_name + '.physical.cov')
if qconfig.bed and is_non_empty_file(qconfig.bed):
quast_py_args += ['--sv-bed']
quast_py_args += [qconfig.bed]
if qconfig.cov_fpath and is_non_empty_file(qconfig.cov_fpath):
quast_py_args += ['--cov']
quast_py_args += [qconfig.cov_fpath]
if qconfig.phys_cov_fpath and is_non_empty_file(qconfig.phys_cov_fpath):
quast_py_args += ['--phys-cov']
quast_py_args += [qconfig.phys_cov_fpath]


def parse_options(logger, quast_args, is_metaquast=False):
if '-h' in quast_args or '--help' in quast_args or '--help-hidden' in quast_args:
qconfig.usage('--help-hidden' in quast_args, meta=is_metaquast, short=False)
Expand Down Expand Up @@ -255,13 +306,27 @@ def parse_options(logger, quast_args, is_metaquast=False):
dest='unpaired_reads',
type='file')
),
(['--ref-sam'], dict(
dest='reference_sam',
type='file')
),
(['--ref-bam'], dict(
dest='reference_bam',
type='file')
),
(['--sam'], dict(
dest='sam',
type='file')
dest='sam_fpaths',
type='string',
action='callback',
callback_args=('.sam', logger),
callback=parse_files_list)
),
(['--bam'], dict(
dest='bam',
type='file')
dest='bam_fpaths',
type='string',
action='callback',
callback_args=('.bam', logger),
callback=parse_files_list)
),
(['--sv-bedpe'], dict(
dest='bed',
Expand Down Expand Up @@ -623,6 +688,9 @@ def parse_options(logger, quast_args, is_metaquast=False):
if is_metaquast:
quast_py_args = clean_metaquast_args(quast_py_args, contigs_fpaths)

if qconfig.sam_fpaths or qconfig.bam_fpaths:
check_sam_bam_files(contigs_fpaths, qconfig.sam_fpaths, qconfig.bam_fpaths, logger)

return quast_py_args, contigs_fpaths


14 changes: 10 additions & 4 deletions quast_libs/qconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,8 +206,10 @@
genes = None
operons = None
labels = None
sam = None
bam = None
sam_fpaths = None
bam_fpaths = None
reference_sam = None
reference_bam = None
bed = None
reads_fpaths = None
forward_reads = None
Expand Down Expand Up @@ -370,8 +372,12 @@ def usage(show_hidden=False, meta=False, short=True):
sys.stderr.write("-2 --reads2 <filename> File with reverse reads (in FASTQ format, may be gzipped)\n")
sys.stderr.write(" --12 <filename> File with interlaced forward and reverse paired-end reads. (in FASTQ format, may be gzipped)\n")
sys.stderr.write(" --single <filename> File with unpaired reads. (in FASTQ format, may be gzipped)\n")
sys.stderr.write(" --sam <filename> SAM alignment file\n")
sys.stderr.write(" --bam <filename> BAM alignment file\n")
sys.stderr.write(" --ref-sam <filename> SAM alignment file obtained by aligning reads to reference genome file\n")
sys.stderr.write(" --ref-bam <filename> BAM alignment file obtained by aligning reads to reference genome file\n")
sys.stderr.write(" --sam <filename,filename,...> Comma-separated list of SAM alignment files obtained by aligning reads to assemblies\n"
" (use the same order as for files with contigs)\n")
sys.stderr.write(" --bam <filename,filename,...> Comma-separated list of BAM alignment files obtained by aligning reads to assemblies\n"
" (use the same order as for files with contigs)\n")
sys.stderr.write(" Reads (or SAM/BAM file) are used for structural variation detection and\n")
sys.stderr.write(" coverage histogram building in Icarus\n")
sys.stderr.write(" --sv-bedpe <filename> File with structural variations (in BEDPE format)\n")
Expand Down
Loading

0 comments on commit 3c0a1ae

Please sign in to comment.