forked from yukit-k/ai-for-trading
-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathsharpe_ratio_solution.py
379 lines (225 loc) · 9.13 KB
/
sharpe_ratio_solution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
#!/usr/bin/env python
# coding: utf-8
# # Sharpe ratio (Solution)
# ## Install packages
# In[ ]:
import sys
# In[ ]:
get_ipython().system('{sys.executable} -m pip install -r requirements.txt')
# In[ ]:
import cvxpy as cvx
import numpy as np
import pandas as pd
import time
import os
import quiz_helper
import matplotlib.pyplot as plt
# In[ ]:
get_ipython().run_line_magic('matplotlib', 'inline')
plt.style.use('ggplot')
plt.rcParams['figure.figsize'] = (14, 8)
# ### data bundle
# In[ ]:
import os
import quiz_helper
from zipline.data import bundles
# In[ ]:
os.environ['ZIPLINE_ROOT'] = os.path.join(os.getcwd(), '..', '..','data','module_4_quizzes_eod')
ingest_func = bundles.csvdir.csvdir_equities(['daily'], quiz_helper.EOD_BUNDLE_NAME)
bundles.register(quiz_helper.EOD_BUNDLE_NAME, ingest_func)
print('Data Registered')
# ### Build pipeline engine
# In[ ]:
from zipline.pipeline import Pipeline
from zipline.pipeline.factors import AverageDollarVolume
from zipline.utils.calendars import get_calendar
universe = AverageDollarVolume(window_length=120).top(500)
trading_calendar = get_calendar('NYSE')
bundle_data = bundles.load(quiz_helper.EOD_BUNDLE_NAME)
engine = quiz_helper.build_pipeline_engine(bundle_data, trading_calendar)
# ### View Data¶
# With the pipeline engine built, let's get the stocks at the end of the period in the universe we're using. We'll use these tickers to generate the returns data for the our risk model.
# In[ ]:
universe_end_date = pd.Timestamp('2016-01-05', tz='UTC')
universe_tickers = engine .run_pipeline(
Pipeline(screen=universe),
universe_end_date,
universe_end_date)\
.index.get_level_values(1)\
.values.tolist()
universe_tickers
# # Get Returns data
# In[ ]:
from zipline.data.data_portal import DataPortal
data_portal = DataPortal(
bundle_data.asset_finder,
trading_calendar=trading_calendar,
first_trading_day=bundle_data.equity_daily_bar_reader.first_trading_day,
equity_minute_reader=None,
equity_daily_reader=bundle_data.equity_daily_bar_reader,
adjustment_reader=bundle_data.adjustment_reader)
# ## Get pricing data helper function
# In[ ]:
def get_pricing(data_portal, trading_calendar, assets, start_date, end_date, field='close'):
end_dt = pd.Timestamp(end_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')
start_dt = pd.Timestamp(start_date.strftime('%Y-%m-%d'), tz='UTC', offset='C')
end_loc = trading_calendar.closes.index.get_loc(end_dt)
start_loc = trading_calendar.closes.index.get_loc(start_dt)
return data_portal.get_history_window(
assets=assets,
end_dt=end_dt,
bar_count=end_loc - start_loc,
frequency='1d',
field=field,
data_frequency='daily')
# ## get pricing data into a dataframe
# In[ ]:
returns_df = get_pricing(
data_portal,
trading_calendar,
universe_tickers,
universe_end_date - pd.DateOffset(years=5),
universe_end_date)\
.pct_change()[1:].fillna(0) #convert prices into returns
returns_df
# ## Sector data helper function
# We'll create an object for you, which defines a sector for each stock. The sectors are represented by integers. We inherit from the Classifier class. [Documentation for Classifier](https://www.quantopian.com/posts/pipeline-classifiers-are-here), and the [source code for Classifier](https://github.com/quantopian/zipline/blob/master/zipline/pipeline/classifiers/classifier.py)
# In[ ]:
from zipline.pipeline.classifiers import Classifier
from zipline.utils.numpy_utils import int64_dtype
class Sector(Classifier):
dtype = int64_dtype
window_length = 0
inputs = ()
missing_value = -1
def __init__(self):
self.data = np.load('../../data/project_4_sector/data.npy')
def _compute(self, arrays, dates, assets, mask):
return np.where(
mask,
self.data[assets],
self.missing_value,
)
# In[ ]:
sector = Sector()
# ## We'll use 2 years of data to calculate the factor
# **Note:** Going back 2 years falls on a day when the market is closed. Pipeline package doesn't handle start or end dates that don't fall on days when the market is open. To fix this, we went back 2 extra days to fall on the next day when the market is open.
# In[ ]:
factor_start_date = universe_end_date - pd.DateOffset(years=2, days=2)
factor_start_date
# ## Create smoothed momentum factor
# In[ ]:
from zipline.pipeline.factors import Returns
from zipline.pipeline.factors import SimpleMovingAverage
# create a pipeline called p
p = Pipeline(screen=universe)
# create a factor of one year returns, deman by sector, then rank
factor = (
Returns(window_length=252, mask=universe).
demean(groupby=Sector()). #we use the custom Sector class that we reviewed earlier
rank().
zscore()
)
# Use this factor as input into SimpleMovingAverage, with a window length of 5
# Also rank and zscore (don't need to de-mean by sector, s)
factor_smoothed = (
SimpleMovingAverage(inputs=[factor], window_length=5).
rank().
zscore()
)
# add the unsmoothed factor to the pipeline
p.add(factor, 'Momentum_Factor')
# add the smoothed factor to the pipeline too
p.add(factor_smoothed, 'Smoothed_Momentum_Factor')
# ## visualize the pipeline
#
# Note that if the image is difficult to read in the notebook, right-click and view the image in a separate tab.
# In[ ]:
p.show_graph(format='png')
# ## run pipeline and view the factor data
# In[ ]:
df = engine.run_pipeline(p, factor_start_date, universe_end_date)
# In[ ]:
df.head()
# ## Evaluate Factors
#
# We'll go over some tools that we can use to evaluate alpha factors. To do so, we'll use the [alphalens library](https://github.com/quantopian/alphalens)
#
# ## Import alphalens
# In[ ]:
import alphalens as al
# ## Get price data
#
# Note, we already got the price data and converted it to returns, which we used to calculate a factor. We'll retrieve the price data again, but won't convert these to returns. This is because we'll use alphalens functions that take their input as prices and not returns.
#
# ## Define the list of assets
# Just to make sure we get the prices for the stocks that have factor values, we'll get the list of assets, which may be a subset of the original universe
# In[ ]:
# get list of stocks in our portfolio (tickers that identify each stock)
assets = df.index.levels[1].values.tolist()
print(f"stock universe number of stocks {len(universe_tickers)}, and number of stocks for which we have factor values {len(assets)}")
# In[ ]:
factor_start_date
# In[ ]:
pricing = get_pricing(
data_portal,
trading_calendar,
assets, #notice that we used assets instead of universe_tickers; in this example, they're the same
factor_start_date, # notice we're using the same start and end dates for when we calculated the factor
universe_end_date)
# ## Prepare data for use in alphalens
#
# In[ ]:
factor_names = df.columns
print(f"The factor names are {factor_names}")
factor_data = {}
for factor_name in factor_names:
print("Formatting factor data for: " + factor_name)
# get clean factor and forward returns for each factor
factor_data[factor_name] = al.utils.get_clean_factor_and_forward_returns(
factor=df[factor_name],
prices=pricing,
periods=[1])
# ### factor returns
# In[ ]:
ls_factor_return = []
for i, factor_name in enumerate(factor_names):
# use alphalens function "factor_returns" to calculate factor returns
factor_return = al.performance.factor_returns(factor_data[factor_name])
factor_return.columns = [factor_name]
ls_factor_return.append(factor_return)
# # Quiz 1: Sharpe ratio
#
# Generally, a sharpe ratio of 1 or higher indicates a better factor than one with a lower Sharpe ratio. In other words, the returns that would have been accrued by a portfolio that was weighted according to the alpha factor would have had an annualized return that is greater or equal to its annualized volatility
#
# Recall that the annualize the sharpe ratio (from daily to annual), multiply by $ \sqrt[2]{252} $
# In[ ]:
def sharpe_ratio(df, frequency="daily"):
if frequency == "daily":
# TODO: daily to annual conversion
annualization_factor = np.sqrt(252)
elif frequency == "monthly":
#TODO: monthly to annual conversion
annualization_factor = np.sqrt(12)
else:
# TODO: no conversion
annualization_factor = 1
#TODO: calculate the sharpe ratio and store it in a dataframe.
# name the column 'Sharpe Ratio'.
# round the numbers to 2 decimal places
df_sharpe = pd.DataFrame(data=annualization_factor*df.mean()/df.std(),
columns=['Sharpe Ratio']).round(2)
return df_sharpe
# ## Quiz 2
#
# Compare the sharpe ratio of the unsmoothed versus smoothed version of the factors.
# ## Answer 2
# In[ ]:
# TODO: calculate sharpe ratio on the unsmooothed factor
sharpe_ratio(ls_factor_return[0])
# In[ ]:
# TODO: calculate sharpe ratio on the smooothed factor
sharpe_ratio(ls_factor_return[1])
# ## Answer 2 continued
# The smoothed factor has a slightly lower sharpe ratio in this example.
# In[ ]: