-
Notifications
You must be signed in to change notification settings - Fork 145
/
Copy pathanvi-refine
executable file
·146 lines (117 loc) · 7.6 KB
/
anvi-refine
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
#!/usr/bin/env python
# -*- coding: utf-8
"""Further analyze one or more bins in a collection.
This is especially useful when there are one or more highly contaminated
bins in a merged profile.
"""
import sys
from anvio.argparse import ArgumentParser
import anvio
import anvio.utils as utils
import anvio.terminal as terminal
import anvio.interactive as interactive
from anvio.bottleroutes import BottleApplication
from anvio.errors import ConfigError, FilesNPathsError, DictIOError
__copyright__ = "Copyleft 2015-2024, The Anvi'o Project (http://anvio.org/)"
__credits__ = []
__license__ = "GPL 3.0"
__version__ = anvio.__version__
__authors__ = ['meren', 'blankenberg']
__resources__ = [("Refining a bin", "http://merenlab.org/2015/05/11/anvi-refine/"),
("Notes on genome refinement", "http://merenlab.org/2017/05/11/anvi-refine-by-veronika/"),
("A case study: Inspecting the genomic link between Archaea and Eukaryota", "http://merenlab.org/2017/01/03/loki-the-link-archaea-eukaryota/"),
("As part of the metagenomic workflow", "http://merenlab.org/2016/06/22/anvio-tutorial-v2/#anvi-refine"),
("A demo", "https://www.youtube.com/watch?v=vXPKP5vKiBM")
]
__requires__ = ['profile-db', 'contigs-db', 'bin',]
__provides__ = ['bin',]
__description__ = ("Start an anvi'o interactive interactive to manually curate or refine a genome, "
"whether it is a metagenome-assembled, single-cell, or an isolate genome")
run = terminal.Run()
progress = terminal.Progress()
if __name__ == '__main__':
parser = ArgumentParser(description=__description__)
groupA = parser.add_argument_group('DEFAULT INPUTS', "The interavtive interface can be started with and without\
anvi'o databases. The default use assumes you have your\
profile and contigs database, however, it is also possible\
to start the interface using ad-hoc input files. See 'MANUAL\
INPUT' section for other set of parameters that are mutually\
exclusive with datanases.")
groupA.add_argument(*anvio.A('profile-db'), **anvio.K('profile-db'))
groupA.add_argument(*anvio.A('contigs-db'), **anvio.K('contigs-db'))
groupB = parser.add_argument_group('REFINE-SPECIFICS', "Parameters that are essential to the refinement process.")
groupB.add_argument(*anvio.A('collection-name'), **anvio.K('collection-name'))
groupB.add_argument(*anvio.A('bin-id'), **anvio.K('bin-id'))
groupB.add_argument(*anvio.A('bin-ids-file'), **anvio.K('bin-ids-file'))
groupB.add_argument(*anvio.A('find-from-split-name'), **anvio.K('find-from-split-name'))
groupC = parser.add_argument_group('ADDITIONAL STUFF', "Parameters to provide additional layers, views, or layer data.")
groupC.add_argument(*anvio.A('tree'), **anvio.K('tree'))
groupC.add_argument(*anvio.A('skip-hierarchical-clustering'), **anvio.K('skip-hierarchical-clustering', {'help':
'Skip hierarchical clustering for the splits in the refined bin, if you skip clustering \
you need to provide your own newick formatted tree using --tree parameter.'}))
groupC.add_argument(*anvio.A('load-full-state'), **anvio.K('load-full-state'))
groupC.add_argument(*anvio.A('additional-view'), **anvio.K('additional-view'))
groupC.add_argument(*anvio.A('additional-layers'), **anvio.K('additional-layers'))
groupC.add_argument(*anvio.A('annotation-source-for-per-split-summary'), **anvio.K('annotation-source-for-per-split-summary'))
groupD = parser.add_argument_group('VISUALS RELATED', "Parameters that give access to various adjustements regarding\
the interface.")
groupD.add_argument(*anvio.A('show-all-layers'), **anvio.K('show-all-layers'))
groupD.add_argument(*anvio.A('split-hmm-layers'), **anvio.K('split-hmm-layers'))
groupD.add_argument(*anvio.A('taxonomic-level'), **anvio.K('taxonomic-level'))
groupD.add_argument(*anvio.A('hide-outlier-SNVs'), **anvio.K('hide-outlier-SNVs'))
groupD.add_argument(*anvio.A('title'), **anvio.K('title'))
groupD.add_argument(*anvio.A('export-svg'), **anvio.K('export-svg'))
groupE = parser.add_argument_group('SWEET PARAMS OF CONVENIENCE', "Parameters and flags that are not quite essential (but\
nice to have).")
groupE.add_argument(*anvio.A('dry-run'), **anvio.K('dry-run'))
groupE.add_argument(*anvio.A('skip-init-functions'), **anvio.K('skip-init-functions'))
groupE.add_argument(*anvio.A('skip-news'), **anvio.K('skip-news'))
groupF = parser.add_argument_group('SERVER CONFIGURATION', "For power users.")
groupE.add_argument(*anvio.A('skip-auto-ordering'), **anvio.K('skip-auto-ordering'))
groupF.add_argument(*anvio.A('ip-address'), **anvio.K('ip-address'))
groupF.add_argument(*anvio.A('port-number'), **anvio.K('port-number'))
groupF.add_argument(*anvio.A('browser-path'), **anvio.K('browser-path'))
groupF.add_argument(*anvio.A('read-only'), **anvio.K('read-only'))
groupF.add_argument(*anvio.A('server-only'), **anvio.K('server-only'))
groupF.add_argument(*anvio.A('password-protected'), **anvio.K('password-protected'))
args = parser.get_args(parser)
try:
A = lambda x: args.__dict__[x] if x in args.__dict__ else None
find_from_split_name = A('find_from_split_name')
collection_name = A('collection_name')
if not collection_name:
raise ConfigError("You need to provide this program with a collection name :/")
if find_from_split_name and not collection_name:
raise ConfigError("If you would like anvi'o to find the bin name for your split, you should "
"also specify a collection name.")
if find_from_split_name:
rows = utils.get_bin_name_from_item_name(args.profile_db, find_from_split_name, collection_name=args.collection_name)
if not rows:
raise ConfigError("The split name you requested was not found in collection %s :/" % collection_name)
if not len(rows) == 1:
raise ConfigError("There is something silly going on here. The split name is found in more "
"than one collection. Which is not really possible so goodbye.")
entry_id, collection_name, split_name, bin_name = rows[0]
run.warning("Anvi'o found your split, and will set the bin name for your "
"refinement analysis to '%s'." % (bin_name),
header="SPLIT FOUND IN %s!" % (bin_name), lc="green")
args.bin_id = bin_name
if not args.bin_id or args.bin_ids_file:
raise ConfigError("This program needs to know which bin(s) you wish to refine.")
args.mode = 'refine'
d = interactive.Interactive(args)
args.port_number = utils.get_port_num(args.port_number, args.ip_address, run=run)
except ConfigError as e:
print(e)
sys.exit(-1)
except FilesNPathsError as e:
print(e)
sys.exit(-2)
except DictIOError as e:
print(e)
sys.exit(-3)
if args.dry_run:
run.info_single('Dry run, eh? Fine. Bai!', nl_after=1)
sys.exit()
app = BottleApplication(d)
app.run_application(args.ip_address, args.port_number)