-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathgetwtcccdata.m
141 lines (105 loc) · 3.99 KB
/
getwtcccdata.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
% This is a script to import the WTCCC controls, bipolar disorder (BD)
% cases, coronary artery disease (CAD) cases, Crohn's disease (CD) cases,
% hypertension (HT) cases, rheumatoid arthritis (RA) cases, type 1 diabetes
% (T1D) cases, and type 2 diabetes (T2D) cases from files in BIMBAM format,
% in which the genotype data for each chromosome is stored in a separate
% file.
% 1958 BIRTH CONTROLS (58BC)
% --------------------------
clear
n = 1480; % Number of subjects.
p = 459446; % Number of SNPs.
posfiles = 'birthctrl.wtccc.chr%d.pos';
genfiles = 'birthctrl.chr%d.mean.genotype.txt';
matfile = 'wtccc_58bc.mat';
fprintf('1958 birth controls\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% UK BLOOD SERVICES CONTROLS (UKBS)
% ---------------------------------
clear
n = 1458; % Number of subjects.
p = 459446; % Number of SNPs.
posfiles = 'bloodctrl.wtccc.chr%d.pos';
genfiles = 'bloodctrl.chr%d.mean.genotype.txt';
matfile = 'wtccc_ukbs.mat';
fprintf('UK blood services controls\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% BIPOLAR DISORDER CASES (CD)
% ---------------------------
clear
n = 1868; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 'bd.wtccc.chr%d.pos';
genfiles = 'bd.chr%d.mean.genotype.txt';
matfile = 'wtccc_bd.mat';
fprintf('Bipolar disorder cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% CORONARY ARTERY DISEASE CASES (CAD)
% -----------------------------------
clear
n = 1926; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 'cad.wtccc.chr%d.pos';
genfiles = 'cad.chr%d.mean.genotype.txt';
matfile = 'wtccc_cad.mat';
fprintf('Coronary artery disease cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% CROHN'S DISEASE CASES (CD)
% --------------------------
clear
n = 1748; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 'cd.wtccc.chr%d.pos';
genfiles = 'cd.chr%d.mean.genotype.txt';
matfile = 'wtccc_cd.mat';
fprintf('Crohn''s disease cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% HYPERTENSION CASES (HT)
% -----------------------
clear
n = 1952; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 'ht.wtccc.chr%d.pos';
genfiles = 'ht.chr%d.mean.genotype.txt';
matfile = 'wtccc_ht.mat';
fprintf('Hypertension cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% RHEUMATOID ARTHRITIS (RA)
% -------------------------
clear
n = 1860; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 'ra.wtccc.chr%d.pos';
genfiles = 'ra.chr%d.mean.genotype.txt';
matfile = 'wtccc_ra.mat';
fprintf('Rheumatoid arthritis cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% TYPE 1 DIABETES CASES (T1D)
% ---------------------------
clear
n = 1963; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 't1d.wtccc.chr%d.pos';
genfiles = 't1d.chr%d.mean.genotype.txt';
matfile = 'wtccc_t1d.mat';
fprintf('Type 1 diabetes cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');
% TYPE 2 DIABETES (T2D)
% ---------------------
clear
n = 1924; % Number of subjects.
p = 458868; % Number of SNPs.
posfiles = 't2d.wtccc.chr%d.pos';
genfiles = 't2d.chr%d.mean.genotype.txt';
matfile = 'wtccc_t2d.mat';
fprintf('Type 2 diabetes cases\n');
[X labels chr pos minor major] = readbimbam(n,p,genfiles,posfiles);
save(matfile,'X','labels','chr','pos','minor','major','-v7.3');