-
Notifications
You must be signed in to change notification settings - Fork 51
/
Copy pathsurvival.py
338 lines (282 loc) · 14 KB
/
survival.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
"""Extension template for time-to-event predictors aka survival predictors."""
# todo: write an informative docstring for the file or module, remove the above
# todo: add an appropriate copyright notice for your estimator
# estimators contributed to skpro should have the copyright notice at the top
# estimators of your own do not need to have permissive or BSD-3 copyright
# todo: uncomment the following line, enter authors' GitHub IDs
# __author__ = [authorGitHubID, anotherAuthorGitHubID]
from skpro.survival.base import BaseSurvReg
# todo: add any necessary imports here
# todo: for imports of skpro soft dependencies:
# make sure to fill in the "python_dependencies" tag with the package import name
# import soft dependencies only inside methods of the class, not at the top of the file
# todo: change class name and write docstring
class ClassName(BaseSurvReg):
"""Custom probabilistic survival regressor. todo: write docstring.
todo: describe your custom regressor here
Parameters
----------
parama : int
descriptive explanation of parama
paramb : string, optional (default='default')
descriptive explanation of paramb
paramc : boolean, optional (default= whether paramb is not the default)
descriptive explanation of paramc
and so on
est : skpro.estimator, BaseEstimator descendant
descriptive explanation of est
est2: another estimator
descriptive explanation of est2
and so on
"""
# todo: fill out estimator tags here
# tags are inherited from parent class if they are not set
# tags inherited from base are "safe defaults" which can usually be left as-is
_tags = {"capability:survival": True}
# todo: fill init
# params should be written to self and never changed
# super call must not be removed, change class name
# parameter checks can go after super call
def __init__(self, paramname, paramname2="paramname2default"):
# estimators should precede parameters
# if estimators have default values, set None and initialize below
# todo: write any hyper-parameters and components to self
self.paramname = paramname
self.paramname2 = paramname2
# leave this as is
super().__init__()
# todo: optional, parameter checking logic (if applicable) should happen here
# if writes derived values to self, should *not* overwrite self.parama etc
# instead, write to self._parama, self._newparam (starting with _)
# todo: default estimators should have None arg defaults
# and be initialized here
# do this only with default estimators, not with parameters
# if est2 is None:
# self.estimator = MyDefaultEstimator()
# todo: if tags of estimator depend on component tags, set these here
# only needed if estimator is a composite
# tags set in the constructor apply to the object and override the class
#
# example 1: conditional setting of a tag
# if est.foo == 42:
# self.set_tags(handles-missing-data=True)
# example 2: cloning tags from component
# self.clone_tags(est2, ["enforce_index_type", "handles-missing-data"])
# todo: implement this, mandatory
def _fit(self, X, y, C=None):
"""Fit regressor to training data.
Writes to self:
Sets fitted model attributes ending in "_".
Changes state to "fitted" = sets is_fitted flag to True
Parameters
----------
X : pandas DataFrame
feature instances to fit regressor to
y : pd.DataFrame, must be same length as X
labels to fit regressor to
C : pd.DataFrame, optional (default=None)
censoring information for survival analysis,
should have same column name as y, same length as X and y
should have entries 0 and 1 (float or int)
0 = uncensored, 1 = (right) censored
if None, all observations are assumed to be uncensored
"""
# insert logic for estimator here
# fitted parameters should be written to parameters ending in underscore
# the estimator should be able to cover
# the case where C is a pd.DataFrame,
# and the case where C is None (interpreted as "no censoring")
# self must be returned at the end
return self
# todo: implement this, mandatory
def _predict(self, X):
"""Predict labels for data from features.
State required:
Requires state to be "fitted" = self.is_fitted=True
Accesses in self:
Fitted model attributes ending in "_"
Parameters
----------
X : pandas DataFrame, must have same columns as X in `fit`
data to predict labels for
Returns
-------
y : pandas DataFrame, same length as `X`, same columns as `y` in `fit`
labels predicted for `X`
"""
# implement logic for prediction here
# this can read out parameters fitted in fit, or hyperparameters from init
# no attributes should be written to self
y_pred = "placeholder"
# returned object should be pd.DataFrame
# same length as X, same columns as y in fit
return y_pred
# todo: implement at least one of the probabilistic prediction methods
# _predict_proba, _predict_interval, _predict_quantiles
# if one is implemented, the other two are filled in by default
# implementation of _predict_proba is preferred, if possible
#
# CAVEAT: if not implemented, _predict_proba assumes normal distribution
# this can be inconsistent with _predict_interval or _predict_quantiles
def _predict_proba(self, X):
"""Predict distribution over labels for data from features.
State required:
Requires state to be "fitted".
Accesses in self:
Fitted model attributes ending in "_"
Parameters
----------
X : pandas DataFrame, must have same columns as X in `fit`
data to predict labels for
Returns
-------
y_pred : skpro BaseDistribution, same length as `X`
labels predicted for `X`
"""
# if implementing _predict_proba (otherwise delete this method)
# todo: adapt the following by filling in logic to produce prediction values
# boilerplate code to create correct output index
index = X.index
y_cols = self._y_cols # columns from y in fit, not automatically stored
columns = y_cols
# values = logic to produce prediction values
# replace this import by the distribution you are using
# the distribution type can be conditional, e.g., data or parameter dependent
from skpro.distributions import SomeDistribution
values = None # fill in values
y_pred = SomeDistribution(values, index=index, columns=columns)
return y_pred
# todo: implement at least one of the probabilistic prediction methods, see above
# delete the methods that are not implemented and filled by default
def _predict_interval(self, X, coverage):
"""Compute/return interval predictions.
private _predict_interval containing the core logic,
called from predict_interval and default _predict_quantiles
Parameters
----------
X : pandas DataFrame, must have same columns as X in `fit`
data to predict labels for
coverage : guaranteed list of float of unique values
nominal coverage(s) of predictive interval(s)
Returns
-------
pred_int : pd.DataFrame
Column has multi-index: first level is variable name from ``y`` in fit,
second level coverage fractions for which intervals were computed,
in the same order as in input `coverage`.
Third level is string "lower" or "upper", for lower/upper interval end.
Row index is equal to row index of ``X``.
Entries are lower/upper bounds of interval predictions,
for var in col index, at nominal coverage in second col index,
lower/upper depending on third col index, for the row index.
Upper/lower interval end are equivalent to
quantile predictions at alpha = 0.5 - c/2, 0.5 + c/2 for c in coverage.
"""
# if implementing _predict_interval (otherwise delete this method)
# todo: adapt the following by filling in logic to produce prediction values
# boilerplate code to create correct pandas output index
# only if using pandas, for other mtypes, use appropriate data structure
import pandas as pd
index = X.index
y_cols = self._y_cols # columns from y in fit, not automatically stored
columns = pd.MultiIndex.from_product(
[y_cols, coverage, ["lower", "upper"]],
)
# values = logic to produce prediction values
values = None # fill in values
pred_int = pd.DataFrame(values, index=index, columns=columns)
return pred_int
# todo: implement at least one of the probabilistic prediction methods, see above
# delete the methods that are not implemented and filled by default
def _predict_quantiles(self, X, alpha):
"""Compute/return quantile predictions.
private _predict_quantiles containing the core logic,
called from predict_quantiles and default _predict_interval
Parameters
----------
X : pandas DataFrame, must have same columns as X in `fit`
data to predict labels for
alpha : guaranteed list of float
A list of probabilities at which quantile predictions are computed.
Returns
-------
quantiles : pd.DataFrame
Column has multi-index: first level is variable name from ``y`` in fit,
second level being the values of alpha passed to the function.
Row index is equal to row index of ``X``.
Entries are quantile predictions, for var in col index,
at quantile probability in second col index, for the row index.
"""
# if implementing _predict_quantiles (otherwise delete this method)
# todo: adapt the following by filling in logic to produce prediction values
# boilerplate code to create correct pandas output index
# only if using pandas, for other mtypes, use appropriate data structure
import pandas as pd
index = X.index
y_cols = self._y_cols # columns from y in fit, not automatically stored
columns = pd.MultiIndex.from_product(
[y_cols, alpha],
)
# values = logic to produce prediction values
values = None # fill in values
quantiles = pd.DataFrame(values, index=index, columns=columns)
return quantiles
# todo: return default parameters, so that a test instance can be created
# required for automated unit and integration testing of estimator
@classmethod
def get_test_params(cls, parameter_set="default"):
"""Return testing parameter settings for the estimator.
Parameters
----------
parameter_set : str, default="default"
Name of the set of test parameters to return, for use in tests. If no
special parameters are defined for a value, will return `"default"` set.
Returns
-------
params : dict or list of dict, default = {}
Parameters to create testing instances of the class
Each dict are parameters to construct an "interesting" test instance, i.e.,
`MyClass(**params)` or `MyClass(**params[i])` creates a valid test instance.
`create_test_instance` uses the first (or only) dictionary in `params`
"""
# todo: set the testing parameters for the estimators
# Testing parameters can be dictionary or list of dictionaries
#
# this can, if required, use:
# class properties (e.g., inherited); parent class test case
# imported objects such as estimators from skpro or sklearn
# important: all such imports should be *inside get_test_params*, not at the top
# since imports are used only at testing time
#
# The parameter_set argument is not used for most automated, module level tests.
# It can be used in custom, estimator specific tests, for "special" settings.
# A parameter dictionary must be returned *for all values* of parameter_set,
# i.e., "parameter_set not available" errors should never be raised.
#
# A good parameter set should primarily satisfy two criteria,
# 1. Chosen set of parameters should have a low testing time,
# ideally in the magnitude of few seconds for the entire test suite.
# This is vital for the cases where default values result in
# "big" models which not only increases test time but also
# run into the risk of test workers crashing.
# 2. There should be a minimum two such parameter sets with different
# sets of values to ensure a wide range of code coverage is provided.
#
# example 1: specify params as dictionary
# any number of params can be specified
# params = {"est": value0, "parama": value1, "paramb": value2}
#
# example 2: specify params as list of dictionary
# note: Only first dictionary will be used by create_test_instance
# params = [{"est": value1, "parama": value2},
# {"est": value3, "parama": value4}]
#
# example 3: parameter set depending on param_set value
# note: only needed if a separate parameter set is needed in tests
# if parameter_set == "special_param_set":
# params = {"est": value1, "parama": value2}
# return params
#
# # "default" params
# params = {"est": value3, "parama": value4}
# return params