-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathgtex_analysis.py
92 lines (69 loc) · 3.2 KB
/
gtex_analysis.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Sat Mar 21 19:26:43 2020
@author: Joan Smith
"""
#%%
import pandas as pd
from matplotlib import pyplot as plt
import numpy as np
gtex = pd.read_csv("data/raw-data/GTEX/GTEx_Analysis_2017-06-05_v8_RNASeQCv1.1.9_gene_tpm.gct", sep="\t", header=2, index_col=1)
gtex = gtex.drop('Name', axis=1).astype(float)
attributes = pd.read_csv("data/raw-data/GTEX/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt", sep='\t', index_col=0, dtype=None)
phenotypes = pd.read_csv("data/raw-data/GTEX/GTEx_Analysis_v8_Annotations_SubjectPhenotypesDS.txt", sep='\t', index_col=0)
phenotypes[['SEX']] = phenotypes[['SEX']].replace(2, 'F')
phenotypes[['SEX']] = phenotypes[['SEX']].replace(1, 'M')
#%%
def set_labels(ax, labels):
ax.set_xticks(np.arange(1, len(labels) + 1))
ax.set_xticklabels(labels)
#%%
def plot_by_pheno(ace2_tissue_w_pheno, col, tissue):
fig, ax = plt.subplots()
plt.title(tissue + '\nACE2 by ' + col.lower())
pheno = [(i[0], i[1].ACE2.dropna()) for i in ace2_tissue_w_pheno.groupby(col)]
labels, split = zip(*pheno)
ax.violinplot(split, showmeans=True)
set_labels(ax, labels)
ace2_tissue_w_pheno.reset_index().pivot_table(columns='SEX', values='ACE2', index='index').to_csv('gtex_' + tissue.lower() + '_sex.csv')
ace2_tissue_w_pheno.reset_index().pivot_table(columns='AGE', values='ACE2', index='index').to_csv('gtex_' + tissue.lower() + '_age.csv')
#%% LUNG
ace2 = gtex.loc['ACE2']
samples_w_lung = attributes[attributes['SMTS'] == 'Lung'].index
ace2_lung = gtex.loc['ACE2'][samples_w_lung].astype(float)
ace2_lung.index = ace2_lung.index.str[0:10]
ace2_lung_w_pheno = phenotypes.join(ace2_lung, how='inner')
plot_by_pheno(ace2_lung_w_pheno, 'AGE', 'Lung')
plot_by_pheno(ace2_lung_w_pheno, 'SEX', 'Lung')
#%% Esophogeal Mucosa
samples_w_esoph_muc = attributes[attributes['SMTSD'] == 'Esophagus - Mucosa'].index
ace2_esoph = gtex.loc['ACE2'][samples_w_esoph_muc].astype(float)
ace2_esoph.index = ace2_esoph.index.str[0:10]
ace2_esoph_w_pheno = phenotypes.join(ace2_esoph, how='inner')
plot_by_pheno(ace2_esoph_w_pheno, 'AGE', 'Esophagus - Mucosa')
plot_by_pheno(ace2_esoph_w_pheno, 'SEX', 'Esophagus - Mucosa')
#%% Salivary
samples_w_salivary = attributes[attributes['SMTS'] == 'Salivary Gland'].index
ace2_sal = gtex.loc['ACE2'][samples_w_salivary].astype(float)
ace2_sal.index = ace2_sal.index.str[0:10]
ace2_sal_w_pheno = phenotypes.join(ace2_sal, how='inner')
plot_by_pheno(ace2_sal_w_pheno, 'AGE', 'Salivary Gland')
plot_by_pheno(ace2_sal_w_pheno, 'SEX', 'Salivary Gland')
#%% All Tissue
#%% Plot All Tissue
ace2_tissue = gtex.loc[['ACE2']].T.join(attributes)
ace2_tissue['ACE2'] = np.log2(ace2_tissue['ACE2'] + 1)
fig, ax = plt.subplots()
plt.title('ACE2 by tissue')
order = ace2_tissue.groupby('SMTS')['ACE2'].apply(np.mean).sort_values()
print(order)
g = {i[0]: i[1].ACE2.dropna() for i in ace2_tissue.groupby('SMTS')}
ordered_g= [(k, g[k]) for k in order.index]
labels, split = zip(*ordered_g)
ax.violinplot(split, showmeans=True)
set_labels(ax, labels)
plt.xticks(rotation=45)
plt.show()
#%% Export Data for All Tissue
ace2_tissue.reset_index().pivot_table(columns='SMTS', values='ACE2', index='index').to_csv('ace2_by_tissue.csv')