-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathdownload_abide.py
executable file
·88 lines (63 loc) · 2.64 KB
/
download_abide.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Data download
Usage:
download_abide.py [--pipeline=cpac] [--strategy=filt_global] [<derivative> ...]
download_abide.py (-h | --help)
Options:
-h --help Show this screen
--pipeline=cpac Pipeline [default: cpac]
--strategy=filt_global Strategy [default: filt_global]
derivative Derivatives to download
"""
import os
import urllib
import urllib.request
from docopt import docopt
def collect_and_download(derivative, pipeline, strategy, out_dir):
s3_prefix = "https://s3.amazonaws.com/fcp-indi/data/Projects/ABIDE_Initiative"
derivative = derivative.lower()
pipeline = pipeline.lower()
strategy = strategy.lower()
if "roi" in derivative:
extension = ".1D"
else:
extension = ".nii.gz"
if not os.path.exists(out_dir):
os.makedirs(out_dir)
s3_pheno_file = open("./data/phenotypes/Phenotypic_V1_0b_preprocessed1.csv", "r")
pheno_list = s3_pheno_file.readlines()
header = pheno_list[0].split(",")
file_idx = header.index("FILE_ID")
s3_paths = []
for pheno_row in pheno_list[1:]:
cs_row = pheno_row.split(",")
row_file_id = cs_row[file_idx]
if row_file_id == "no_filename":
continue
filename = row_file_id + "_" + derivative + extension
s3_path = "/".join([s3_prefix, "Outputs", pipeline, strategy, derivative, filename])
s3_paths.append(s3_path)
total_num_files = len(s3_paths)
for path_idx, s3_path in enumerate(s3_paths):
rel_path = s3_path.lstrip(s3_prefix).split("/")[-1]
download_file = os.path.join(out_dir, rel_path)
download_dir = os.path.dirname(download_file)
if not os.path.exists(download_dir):
os.makedirs(download_dir)
if not os.path.exists(download_file):
print ("Retrieving: %s" % download_file)
urllib.request.urlretrieve(s3_path, download_file)
print ("%.3f%% percent complete" % (100*(float(path_idx+1)/total_num_files)))
else:
print ("File %s already exists, skipping..." % download_file)
if __name__ == "__main__":
arguments = docopt(__doc__)
if not arguments['<derivative>']:
arguments['<derivative>'] = ['rois_aal', 'rois_cc200', 'rois_dosenbach160', 'rois_ez', 'rois_ho', 'rois_tt']
pipeline = arguments.get('pipeline', 'cpac')
strategy = arguments.get('strategy', 'filt_global')
out_dir = os.path.abspath("data/functionals/cpac/filt_global/")
for derivative in arguments['<derivative>']:
collect_and_download(derivative, pipeline, strategy, os.path.join(out_dir, derivative))