-
Notifications
You must be signed in to change notification settings - Fork 1
/
run.py
executable file
·91 lines (74 loc) · 2.8 KB
/
run.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#!/usr/local/bin/python
import dynclipy
task = dynclipy.main()
# task = dynclipy.main(
# ["--dataset", "/code/example.h5", "--output", "/mnt/output"],
# "/code/definition.yml"
# )
from gpflow import settings
settings.session.intra_op_parallelism_threads = 1
settings.session.inter_op_parallelism_threads = 1
import pandas as pd
import numpy as np
import json
import os
import pandas as pd
import numpy as np
from GrandPrix import GrandPrix
import time as tm
checkpoints = {}
# ____________________________________________________________________________
# Load data ####
expression = task["expression"]
parameters = task["parameters"]
end_n = task["priors"]["end_n"]
if "timecourse_continuous" in task["priors"]:
timecourse_continuous = task["priors"]["timecourse_continuous"]
else:
timecourse_continuous = None
checkpoints["method_afterpreproc"] = tm.time()
# ____________________________________________________________________________
# Infer trajectory ####
# fit grandprix model, based on https://github.com/ManchesterBioinference/GrandPrix/blob/master/notebooks/Guo.ipynb
if end_n == 0:
end_n = 1
if timecourse_continuous is not None:
pt_np, var_np = GrandPrix.fit_model(
data = expression.values,
n_latent_dims = end_n,
n_inducing_points = parameters["n_inducing_points"],
latent_var = parameters["latent_var"],
latent_prior_mean = np.repeat(np.expand_dims(timecourse_continuous, 1), 4, 1),
latent_prior_var = parameters["latent_prior_var"]
)
else:
pt_np, var_np = GrandPrix.fit_model(
data = expression.values,
n_latent_dims = end_n,
n_inducing_points = parameters["n_inducing_points"],
latent_var = parameters["latent_var"]
)
checkpoints["method_aftermethod"] = tm.time()
# ____________________________________________________________________________
# Process output ####
# process to end_state_probabilities output format^
cell_ids = expression.index
pseudotime = pd.DataFrame({
"cell_id": expression.index,
"pseudotime": pt_np[:, 0]
})
end_state_probabilities = pd.DataFrame({"cell_id":expression.index})
for i in range(pt_np.shape[1] - 1):
split_id = "split_" + str(i)
probabilities = pt_np[:, 1]
probabilities = (probabilities - min(probabilities))/(max(probabilities) - min(probabilities))
end_state_probabilities[split_id + "_1"] = probabilities
end_state_probabilities[split_id + "_2"] = 1-probabilities
# save
dataset = dynclipy.wrap_data(cell_ids = expression.index)
dataset.add_end_state_probabilities(
end_state_probabilities = end_state_probabilities,
pseudotime = pseudotime
)
dataset.add_timings(timings = checkpoints)
dataset.write_output(task["output"])