forked from statsmodels/statsmodels
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtmodel.py
227 lines (170 loc) · 6.96 KB
/
tmodel.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
"""Linear Model with Student-t distributed errors
Because the t distribution has fatter tails than the normal distribution, it
can be used to model observations with heavier tails and observations that have
some outliers. For the latter case, the t-distribution provides more robust
estimators for mean or mean parameters (what about var?).
References
----------
Kenneth L. Lange, Roderick J. A. Little, Jeremy M. G. Taylor (1989)
Robust Statistical Modeling Using the t Distribution
Journal of the American Statistical Association
Vol. 84, No. 408 (Dec., 1989), pp. 881-896
Published by: American Statistical Association
Stable URL: http://www.jstor.org/stable/2290063
not read yet
Created on 2010-09-24
Author: josef-pktd
License: BSD
TODO
----
* add starting values based on OLS
* bugs: store_params doesn't seem to be defined, I think this was a module
global for debugging - commented out
* parameter restriction: check whether version with some fixed parameters works
"""
#mostly copied from the examples directory written for trying out generic mle.
import numpy as np
from scipy import special #, stats
#redefine some shortcuts
np_log = np.log
np_pi = np.pi
sps_gamln = special.gammaln
from statsmodels.base.model import GenericLikelihoodModel
class TLinearModel(GenericLikelihoodModel):
'''Maximum Likelihood Estimation of Linear Model with t-distributed errors
This is an example for generic MLE.
Except for defining the negative log-likelihood method, all
methods and results are generic. Gradients and Hessian
and all resulting statistics are based on numerical
differentiation.
'''
def initialize(self):
# TODO: here or in __init__
self.k_vars = self.exog.shape[1]
if not hasattr(self, 'fix_df'):
self.fix_df = False
if self.fix_df is False:
# df will be estimated, no parameter restrictions
self.fixed_params = None
self.fixed_paramsmask = None
self.k_params = self.exog.shape[1] + 2
extra_params_names = ['df', 'scale']
else:
# df fixed
self.k_params = self.exog.shape[1] + 1
fixdf = np.nan * np.zeros(self.exog.shape[1] + 2)
fixdf[-2] = self.fix_df
self.fixed_params = fixdf
self.fixed_paramsmask = np.isnan(fixdf)
extra_params_names = ['scale']
self._set_extra_params_names(extra_params_names)
self._set_start_params()
super(TLinearModel, self).initialize()
def _set_start_params(self, start_params=None, use_kurtosis=False):
if start_params is not None:
self.start_params = start_params
else:
from statsmodels.regression.linear_model import OLS
res_ols = OLS(self.endog, self.exog).fit()
start_params = 0.1*np.ones(self.k_params)
start_params[:self.k_vars] = res_ols.params
if self.fix_df is False:
if use_kurtosis:
kurt = stats.kurtosis(res_ols.resid)
df = 6./kurt + 4
else:
df = 5
start_params[-2] = df
#TODO adjust scale for df
start_params[-1] = np.sqrt(res_ols.scale)
self.start_params = start_params
def loglike(self, params):
return -self.nloglikeobs(params).sum(0)
def nloglikeobs(self, params):
"""
Loglikelihood of linear model with t distributed errors.
Parameters
----------
params : array
The parameters of the model. The last 2 parameters are degrees of
freedom and scale.
Returns
-------
loglike : array, (nobs,)
The log likelihood of the model evaluated at `params` for each
observation defined by self.endog and self.exog.
Notes
-----
.. math :: \\ln L=\\sum_{i=1}^{n}\\left[-\\lambda_{i}+y_{i}x_{i}^{\\prime}\\beta-\\ln y_{i}!\\right]
The t distribution is the standard t distribution and not a standardized
t distribution, which means that the scale parameter is not equal to the
standard deviation.
self.fixed_params and self.expandparams can be used to fix some
parameters. (I doubt this has been tested in this model.)
"""
#print len(params),
#store_params.append(params)
if not self.fixed_params is None:
#print 'using fixed'
params = self.expandparams(params)
beta = params[:-2]
df = params[-2]
scale = np.abs(params[-1]) #TODO check behavior around zero
loc = np.dot(self.exog, beta)
endog = self.endog
x = (endog - loc)/scale
#next part is stats.t._logpdf
lPx = sps_gamln((df+1)/2) - sps_gamln(df/2.)
lPx -= 0.5*np_log(df*np_pi) + (df+1)/2.*np_log(1+(x**2)/df)
lPx -= np_log(scale) # correction for scale
return -lPx
def predict(self, params, exog=None):
if exog is None:
exog = self.exog
return np.dot(exog, params[:self.exog.shape[1]])
from scipy import stats
from statsmodels.tsa.arma_mle import Arma
class TArma(Arma):
'''Univariate Arma Model with t-distributed errors
This inherit all methods except loglike from tsa.arma_mle.Arma
This uses the standard t-distribution, the implied variance of
the error is not equal to scale, but ::
error_variance = df/(df-2)*scale**2
Notes
-----
This might be replaced by a standardized t-distribution with scale**2
equal to variance
'''
def loglike(self, params):
return -self.nloglikeobs(params).sum(0)
#add for Jacobian calculation bsejac in GenericMLE, copied from loglike
def nloglikeobs(self, params):
"""
Loglikelihood for arma model for each observation, t-distribute
Notes
-----
The ancillary parameter is assumed to be the last element of
the params vector
"""
errorsest = self.geterrors(params[:-2])
#sigma2 = np.maximum(params[-1]**2, 1e-6) #do I need this
#axis = 0
#nobs = len(errorsest)
df = params[-2]
scale = np.abs(params[-1])
llike = - stats.t._logpdf(errorsest/scale, df) + np_log(scale)
return llike
#TODO rename fit_mle -> fit, fit -> fit_ls
def fit_mle(self, order, start_params=None, method='nm', maxiter=5000,
tol=1e-08, **kwds):
nar, nma = order
if start_params is not None:
if len(start_params) != nar + nma + 2:
raise ValueError('start_param need sum(order) + 2 elements')
else:
start_params = np.concatenate((0.05*np.ones(nar + nma), [5, 1]))
res = super(TArma, self).fit_mle(order=order,
start_params=start_params,
method=method, maxiter=maxiter,
tol=tol, **kwds)
return res