-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathanvi-compute-completeness
executable file
·89 lines (67 loc) · 3.29 KB
/
anvi-compute-completeness
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/env python
# -*- coding: utf-8
import sys
from anvio.argparse import ArgumentParser
import anvio
import anvio.tables as t
import anvio.utils as utils
import anvio.dbops as dbops
import anvio.terminal as terminal
from anvio.errors import ConfigError, FilesNPathsError
from anvio.completeness import Completeness
__copyright__ = "Copyleft 2015-2024, The Anvi'o Project (http://anvio.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__authors__ = ['meren']
__requires__ = ["contigs-db", "splits-txt", "hmm-source"]
__description__ = "A script to generate completeness info for a given list of _splits_"
run = terminal.Run()
progress = terminal.Progress()
def compute_completeness(args):
utils.is_contigs_db(args.contigs_db)
completeness = Completeness(args.contigs_db, args.completeness_source)
if args.list_completeness_sources:
run.info('Available singlecopy sources', ', '.join(completeness.sources))
sys.exit()
contigs_db = dbops.ContigsDatabase(args.contigs_db)
splits_in_db = set(contigs_db.db.get_table_as_dict(t.splits_info_table_name).keys())
contigs_db.disconnect()
if args.splits_of_interest:
splits_in_users_list = set([s.strip() for s in open(args.splits_of_interest).readlines() if s.strip() and not s.startswith('#')])
splits_of_interest = splits_in_db.intersection(splits_in_users_list)
if len(splits_of_interest) != len(splits_in_users_list):
if not len(splits_of_interest):
run.warning('None of the split names you provided in %s matched split names in the database...' % args.splits_of_interest)
sys.exit()
else:
run.warning('Only %d of %d split names you listed in "%s" matched split names in the database...'\
% (len(splits_of_interest), len(splits_in_users_list), args.splits_of_interest))
else:
splits_of_interest = splits_in_db
try:
p_completion, p_redundancy, domain, domain_probabilities, info_text, results_dict = completeness.get_info_for_splits(splits_of_interest, min_e_value = args.min_e_value)
except ConfigError as e:
print(e)
sys.exit(-1)
results = [list(v.values())[0] for v in results_dict.values()]
run.warning('', header = 'Completeness for %d splits (p < %g)' % (len(splits_of_interest), args.min_e_value))
for v in results:
run.info('%s (%sl SCGs)' % (v['source'], v['domain']), '%.2f%% complete, %.2f%% redundant' % (v['percent_completion'], v['percent_redundancy']))
print()
if __name__ == '__main__':
parser = ArgumentParser(description=__description__)
parser.add_argument(*anvio.A('splits-of-interest'), **anvio.K('splits-of-interest'))
parser.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
parser.add_argument(*anvio.A('min-e-value'), **anvio.K('min-e-value'))
parser.add_argument(*anvio.A('list-completeness-sources'), **anvio.K('list-completeness-sources'))
parser.add_argument(*anvio.A('completeness-source'), **anvio.K('completeness-source'))
try:
args = parser.get_args(parser)
compute_completeness(args)
except ConfigError as e:
print(e)
sys.exit(-1)
except FilesNPathsError as e:
print(e)
sys.exit(-2)