-
Notifications
You must be signed in to change notification settings - Fork 145
/
anvi-init-bam
executable file
·86 lines (64 loc) · 2.73 KB
/
anvi-init-bam
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
#!/usr/bin/env python
# -*- coding: utf-8
import os
import sys
import pysam
import anvio
import anvio.terminal as terminal
import anvio.filesnpaths as filesnpaths
from anvio.errors import ConfigError
from anvio.errors import FilesNPathsError
__copyright__ = "Copyleft 2015-2024, The Anvi'o Project (http://anvio.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__authors__ = ['meren', 'ekiefl']
__requires__ = ['raw-bam-file',]
__provides__ = ['bam-file',]
__description__ = "Sort/Index BAM files"
__resources__ = [("Another description as part of the metagenomic workflow", "http://merenlab.org/2016/06/22/anvio-tutorial-v2/#anvi-profile")]
progress = terminal.Progress()
run = terminal.Run()
pp = terminal.pretty_print
def init_bam_file(input_file_path, output_file_path = None, num_threads = 1):
"""sort and index a BAM file"""
input_file_path = os.path.abspath(input_file_path)
if output_file_path:
output_file_path = os.path.abspath(output_file_path)
if not output_file_path.endswith('.bam'):
raise ConfigError("The output file should end with extension '.bam'.")
else:
output_file_path = input_file_path + '-sorted.bam'
filesnpaths.is_file_exists(input_file_path)
filesnpaths.is_output_file_writable(output_file_path)
progress.new('SORT')
progress.update('Sorting BAM File... May take a while depending on the size.')
# -@ is number of _additional_ threads (i.e the default is 0), so we subtract 1
pysam.sort('-o', output_file_path, '-@', str(num_threads - 1), input_file_path)
progress.end()
run.info('Sorted BAM File', output_file_path)
if not os.path.exists(output_file_path):
raise ConfigError("Sorry. Something went wrong. Samtools thinks it generated the sorted output, yet it is not there :(")
progress.new('INDEX')
progress.update('Indexing BAM File...')
pysam.index(output_file_path)
progress.end()
run.info('BAM File Index', output_file_path + '.bai', mc='green')
if __name__ == '__main__':
from anvio.argparse import ArgumentParser
parser = ArgumentParser(description=__description__)
parser.add_argument('input_file', metavar = 'BAM_FILE', help = 'BAM file to analyze')
parser.add_argument(*anvio.A('output-file'), **anvio.K('output-file'))
parser.add_argument(*anvio.A('num-threads'), **anvio.K('num-threads'))
args = parser.get_args(parser)
if args.num_threads < 1:
raise ConfigError("--num-threads should be at least 1.")
try:
init_bam_file(args.input_file, args.output_file, args.num_threads)
sys.exit()
except ConfigError as e:
print(e)
sys.exit(-1)
except FilesNPathsError as e:
print(e)
sys.exit(-2)