changed alphalens tutorial notebooks

fewqo · Nov 2, 2018 · 38eeb9c · 38eeb9c
1 parent 8963f25
commit 38eeb9c
Show file tree

Hide file tree

Showing 3 changed files with 67 additions and 77 deletions.
diff --git a/notebooks/tutorials/3_alphalens_lesson_2/notebook.ipynb b/notebooks/tutorials/3_alphalens_lesson_2/notebook.ipynb
@@ -6,7 +6,7 @@
    "source": [
     "#### Companion notebook for Alphalens tutorial lesson 2\n",
     "\n",
-    "# Creating tear sheets with Alphalens\n",
+    "# Creating Tear Sheets With Alphalens\n",
     "\n",
     "In the previous lesson, you learned what Alphalens is. In this lesson, you will learn a four step process for how to use it:\n",
     "\n",
@@ -30,9 +30,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from quantopian.pipeline.data import factset \n",
-    "\n",
     "from quantopian.pipeline import Pipeline\n",
+    "from quantopian.pipeline.data import factset\n",
     "from quantopian.research import run_pipeline\n",
     "from quantopian.pipeline.filters import QTradableStocksUS\n",
     "\n",
@@ -46,10 +45,10 @@
     "        screen = QTradableStocksUS() & asset_growth.notnull()\n",
     "    )\n",
     "\n",
-    "factor_data = run_pipeline(pipeline=make_pipeline(), start_date='2014-1-1', end_date='2016-1-1')\n",
+    "pipeline_output = run_pipeline(pipeline=make_pipeline(), start_date='2014-1-1', end_date='2016-1-1')\n",
     "\n",
     "# Show the first 5 rows of factor_data\n",
-    "factor_data.head(5) "
+    "pipeline_output.head(5) "
    ]
   },
   {
@@ -74,10 +73,10 @@
    "outputs": [],
    "source": [
     "pricing_data = get_pricing(\n",
-    "    symbols=factor_data.index.levels[1], # Finds all assets that appear at least once in \"factor_data\"  \n",
+    "    symbols=pipeline_output.index.levels[1], # Finds all assets that appear at least once in \"factor_data\"  \n",
     "    start_date='2014-1-1',\n",
     "    end_date='2016-2-1', # must be after run_pipeline()'s end date. Explained more in lesson 4\n",
-    "    fields='open_price' # Generally, you should use open pricing. Explained more in lesson 4\n",
+    "    fields='open_price' # Generally, you should use open pricing.\n",
     ")\n",
     "\n",
     "# Show the first 5 rows of pricing_data\n",
@@ -90,7 +89,7 @@
    "source": [
     "## Align Data\n",
     "\n",
-    "`get_clean_factor_and_forward_returns()` aligns factor data from a Pipeline with pricing data from `get_pricing()`, and returns an object suitable for analysis with Alphalens' charting functions. It requires two arguments:\n",
+    "`get_clean_factor_and_forward_returns()` aligns the factor data created by `run_pipeline()` with the pricing data created by `get_pricing()`, and returns an object suitable for analysis with Alphalens' charting functions. It requires two arguments:\n",
     "- The factor data we created with `run_pipeline()`.\n",
     "- The pricing data we created with `get_pricing()`.\n",
     "\n",
@@ -107,13 +106,13 @@
    "source": [
     "from alphalens.utils import get_clean_factor_and_forward_returns\n",
     "\n",
-    "merged_data = get_clean_factor_and_forward_returns(\n",
-    "    factor=factor_data, \n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    factor=pipeline_output, \n",
     "    prices=pricing_data\n",
     ")\n",
     "\n",
     "# Show the first 5 rows of merged_data\n",
-    "merged_data.head(5) "
+    "factor_data.head(5) "
    ]
   },
   {
@@ -135,7 +134,7 @@
    "source": [
     "from alphalens.tears import create_full_tear_sheet\n",
     "\n",
-    "create_full_tear_sheet(merged_data)"
+    "create_full_tear_sheet(factor_data)"
    ]
   },
   {

diff --git a/notebooks/tutorials/3_alphalens_lesson_3/notebook.ipynb b/notebooks/tutorials/3_alphalens_lesson_3/notebook.ipynb
@@ -23,9 +23,9 @@
    "source": [
     "### Our Starting Alpha Factor\n",
     "\n",
-    "The following code expresses an alpha factor based on a company's net income and market cap, and then creates an information tear sheet for that alpha factor. We will start analyzing the alpha factor by looking at it's information coefficient (IC). The IC is a number ranging from -1, to 1, which quantifies the predictiveness of an alpha factor. Any number above 0 is considered somewhat predictive.\n",
+    "The following code expresses an alpha factor based on a company's net income and market cap, then creates an information tear sheet for that alpha factor. We will start analyzing the alpha factor by looking at it's information coefficient (IC). The IC is a number ranging from -1, to 1, which quantifies the predictiveness of an alpha factor. Any number above 0 is considered somewhat predictive.\n",
     "\n",
-    "The first number you should look at is the IC mean, which is an alpha factor's average IC over a given time period. You want your factor's IC Mean to be as high as possible. Generally speaking, a factor is worth investigating if it has an IC mean over 0. If it has an IC mean close to .1 (or higher) over a large trading universe, that factor is probably really good.\n",
+    "The first number you should look at is the IC mean, which is an alpha factor's average IC over a given time period. You want your factor's IC Mean to be as high as possible. Generally speaking, a factor is worth investigating if it has an IC mean over 0. If it has an IC mean close to .1 (or higher) over a large trading universe, that factor is probably **exceptionally good**. In fact, you might want to check to make sure there isn't some lookahead bias if your alpha factor's IC mean is over .1\n",
     "\n",
     "**Run the cell below to create an information tear sheet for our alpha factor. Notice how the IC Mean figures (the first numbers on the first chart) are all positive. That is a good sign!**"
    ]
@@ -74,11 +74,11 @@
     "    )\n",
     "\n",
     "\n",
-    "factor_data = run_pipeline(make_pipeline(), '2010-1-1', '2012-1-1')\n",
-    "pricing_data = get_pricing(factor_data.index.levels[1], '2010-1-1', '2012-2-1', fields='open_price')\n",
-    "merged_data = get_clean_factor_and_forward_returns(factor_data, pricing_data)\n",
+    "pipeline_output = run_pipeline(make_pipeline(), '2010-1-1', '2012-1-1')\n",
+    "pricing_data = get_pricing(pipeline_output.index.levels[1], '2010-1-1', '2012-2-1', fields='open_price')\n",
+    "factor_data = get_clean_factor_and_forward_returns(pipeline_output, pricing_data)\n",
     "\n",
-    "create_information_tear_sheet(merged_data)"
+    "create_information_tear_sheet(factor_data)"
    ]
   },
   {
@@ -89,7 +89,7 @@
     "\n",
     "**Alphalens is useful for identifying alpha factors that aren't predictive early in the quant workflow. This allows you to avoid wasting time running a full backtest on a factor that could have been discarded earlier in the process.**\n",
     "\n",
-    "Run the following cell to express another alpha factor called `price_to_book`, combine it with `projected_market_cap` using zscores and winsorizing, then creates another information tearsheet based on our new (and hopefully improved) alpha factor. \n",
+    "Run the following cell to express another alpha factor called `price_to_book`, combine it with `projected_market_cap` using zscores and winsorization, then creates another information tearsheet based on our new (and hopefully improved) alpha factor. \n",
     "\n",
     "Notice how the IC figures are lower than they were in the first chart. That means the factor we added is making our predictions worse!"
    ]
@@ -131,11 +131,11 @@
     "\n",
     "\n",
     "\n",
-    "factor_data = run_pipeline(make_pipeline(), '2010-1-1', '2012-1-1')\n",
-    "pricing_data = get_pricing(factor_data.index.levels[1], '2010-1-1', '2012-2-1', fields='open_price')\n",
-    "new_merged_data = get_clean_factor_and_forward_returns(factor_data, pricing_data)\n",
+    "pipeline_output = run_pipeline(make_pipeline(), '2010-1-1', '2012-1-1')\n",
+    "pricing_data = get_pricing(pipeline_output.index.levels[1], '2010-1-1', '2012-2-1', fields='open_price')\n",
+    "new_factor_data = get_clean_factor_and_forward_returns(pipeline_output, pricing_data)\n",
     "\n",
-    "create_information_tear_sheet(new_merged_data)"
+    "create_information_tear_sheet(new_factor_data)"
    ]
   },
   {
@@ -150,7 +150,7 @@
     "\n",
     "This function creates six types of charts, but the two most important ones are:\n",
     "\n",
-    "- **Mean period wise returns by factor quantile:** This chart shows the average return for each quantile in your universe, per time period. You want the quantiles on the right to have higher average returns than the quantiles on the left.\n",
+    "- **Mean period-wise returns by factor quantile:** This chart shows the average return for each quantile in your universe, per time period. You want the quantiles on the right to have higher average returns than the quantiles on the left.\n",
     "- **Cumulative return by quantile:** This chart shows you how each quantile performed over time. You want to see quantile 1 consistently performing the worst, quantile 5 consistently performing the best, and the other quantiles in the middle.\n",
     "\n",
     "**Run the following cell, and notice how quantile 5 doesn't have the highest returns. Ideally, you want quantile 1 to have the lowest returns, and quantile 5 to have the highest returns. This tear sheet is telling us we still have work to do!**"
@@ -166,7 +166,7 @@
    "source": [
     "from alphalens.tears import create_returns_tear_sheet\n",
     "\n",
-    "create_returns_tear_sheet(merged_data)"
+    "create_returns_tear_sheet(factor_data)"
    ]
   },
   {

diff --git a/notebooks/tutorials/3_alphalens_lesson_4/notebook.ipynb b/notebooks/tutorials/3_alphalens_lesson_4/notebook.ipynb
@@ -26,54 +26,45 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "from quantopian.pipeline.data import factset\n",
-    "\n",
     "from quantopian.pipeline import Pipeline\n",
+    "from quantopian.pipeline.data import factset\n",
     "from quantopian.research import run_pipeline\n",
-    "from quantopian.pipeline.factors import SimpleMovingAverage\n",
-    "from quantopian.pipeline.classifiers.fundamentals import Sector\n",
     "from quantopian.pipeline.filters import QTradableStocksUS\n",
+    "from quantopian.pipeline.classifiers.fundamentals import Sector\n",
     "from alphalens.utils import get_clean_factor_and_forward_returns\n",
     "\n",
     "\n",
     "def make_pipeline():\n",
-    "\n",
-    "    # 1 year moving average of net income\n",
-    "    net_income_moving_average = SimpleMovingAverage( \n",
-    "        inputs=[factset.Fundamentals.net_inc_af], \n",
-    "        window_length=252\n",
-    "    )\n",
     "    \n",
-    "    # 1 year moving average of market cap\n",
-    "    market_cap_moving_average = SimpleMovingAverage( \n",
-    "        inputs=[factset.Fundamentals.mkt_val], \n",
-    "        window_length=252\n",
-    "    )\n",
-    "    \n",
-    "    average_market_cap_per_net_income = (market_cap_moving_average / net_income_moving_average)\n",
-    "    \n",
-    "    # the last quarter's net income\n",
-    "    net_income = factset.Fundamentals.net_inc_qf.latest \n",
+    "    change_in_working_capital = factset.Fundamentals.wkcap_chg_qf.latest\n",
+    "    ciwc_processed = change_in_working_capital.winsorize(.2, .98).zscore()\n",
     "    \n",
-    "    projected_market_cap = average_market_cap_per_net_income * net_income\n",
+    "    sales_per_working_capital = factset.Fundamentals.sales_wkcap_qf.latest\n",
+    "    spwc_processed = sales_per_working_capital.winsorize(.2, .98).zscore()\n",
+    "\n",
+    "    factor_to_analyze = (ciwc_processed + spwc_processed).zscore()\n",
     "\n",
     "    sector = Sector()\n",
-    "    \n",
+    "\n",
     "    return Pipeline(\n",
     "        columns = {\n",
-    "            'projected_market_cap': projected_market_cap,\n",
-    "            'sector': sector\n",
+    "            'factor_to_analyze': factor_to_analyze,\n",
+    "            'sector': sector,\n",
     "        },\n",
-    "        screen = QTradableStocksUS() & projected_market_cap.notnull() & sector.notnull()\n",
+    "        screen = (\n",
+    "            QTradableStocksUS()\n",
+    "            & factor_to_analyze.notnull()\n",
+    "            & sector.notnull()\n",
+    "        )\n",
     "    )\n",
     "\n",
     "\n",
-    "factor_data = run_pipeline(make_pipeline(), '2013-1-1', '2014-1-1')\n",
-    "pricing_data = get_pricing(factor_data.index.levels[1], '2013-1-1', '2014-3-1', fields='open_price')\n",
+    "pipeline_output = run_pipeline(make_pipeline(), '2013-1-1', '2014-1-1')\n",
+    "pricing_data = get_pricing(pipeline_output.index.levels[1], '2013-1-1', '2014-3-1', fields='open_price')\n",
     "\n",
     "\n",
-    "merged_data = get_clean_factor_and_forward_returns(\n",
-    "    factor_data['projected_market_cap'], # This is how you analyze a specific pipeline column with Alphalens\n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    pipeline_output['factor_to_analyze'], # How to analyze a specific pipeline column with Alphalens\n",
     "    pricing_data, \n",
     "    periods=range(1,32,3)\n",
     ")"
@@ -101,7 +92,7 @@
    "outputs": [],
    "source": [
     "from alphalens.performance import mean_information_coefficient\n",
-    "mean_information_coefficient(merged_data).plot(title=\"IC Decay\");"
+    "mean_information_coefficient(factor_data).plot(title=\"IC Decay\");"
    ]
   },
   {
@@ -119,13 +110,13 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "merged_data = get_clean_factor_and_forward_returns(\n",
-    "    factor_data['projected_market_cap'], \n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    pipeline_output['factor_to_analyze'], \n",
     "    pricing_data,\n",
     "    periods=range(1,252,20) # The third argument to the range statement changes the \"step\" of the range\n",
     ")\n",
     "\n",
-    "mean_information_coefficient(merged_data).plot()"
+    "mean_information_coefficient(factor_data).plot()"
    ]
   },
   {
@@ -140,7 +131,7 @@
     "\n",
     "In this case, we'll change `get_pricing()`'s `end_date` to be at least a year after `run_pipeline()`'s `end_date`.\n",
     "\n",
-    "**Run the following cell to make those changes. As you can see, this alpha factor's IC decays quickly after a few days, but comes back even stronger than before six months into the future. Interesting!**"
+    "**Run the following cell to make those changes. As you can see, this alpha factor's IC decays quickly after a quarter, but comes back even stronger six months into the future. Interesting!**"
    ]
   },
   {
@@ -149,26 +140,26 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "factor_data = run_pipeline(\n",
+    "pipeline_output = run_pipeline(\n",
     "    make_pipeline(),\n",
     "    start_date='2013-1-1', \n",
     "    end_date='2014-1-1' #  *** NOTE *** Our factor data ends in 2014\n",
     ")\n",
     "\n",
     "pricing_data = get_pricing(\n",
-    "    factor_data.index.levels[1], \n",
+    "    pipeline_output.index.levels[1], \n",
     "    start_date='2013-1-1',\n",
     "    end_date='2015-2-1', # *** NOTE *** Our pricing data ends in 2015\n",
     "    fields='open_price'\n",
     ")\n",
     "\n",
-    "merged_data = get_clean_factor_and_forward_returns(\n",
-    "    factor_data['projected_market_cap'], \n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    pipeline_output['factor_to_analyze'], \n",
     "    pricing_data,\n",
     "    periods=range(1,252,20) # Change the step to 10 or more for long look forward periods to save time\n",
     ")\n",
     "\n",
-    "mean_information_coefficient(merged_data).plot()"
+    "mean_information_coefficient(factor_data).plot()"
    ]
   },
   {
@@ -201,14 +192,14 @@
     "\n",
     "sector_labels, sector_labels[-1] = dict(Sector.SECTOR_NAMES), \"Unknown\"\n",
     "\n",
-    "result = get_clean_factor_and_forward_returns(\n",
-    "    factor=factor_data['projected_market_cap'],\n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    factor=pipeline_output['factor_to_analyze'],\n",
     "    prices=pricing_data,\n",
-    "    groupby=factor_data['sector'],\n",
+    "    groupby=pipeline_output['sector'],\n",
     "    groupby_labels=sector_labels,\n",
     ")\n",
     "\n",
-    "create_returns_tear_sheet(factor_data=result, by_group=True)"
+    "create_returns_tear_sheet(factor_data=factor_data, by_group=True)"
    ]
   },
   {
@@ -236,15 +227,15 @@
    },
    "outputs": [],
    "source": [
-    "result = get_clean_factor_and_forward_returns(\n",
-    "    factor_data['projected_market_cap'],\n",
+    "factor_data = get_clean_factor_and_forward_returns(\n",
+    "    pipeline_output['factor_to_analyze'],\n",
     "    prices=pricing_data,\n",
-    "    groupby=factor_data['sector'],\n",
+    "    groupby=pipeline_output['sector'],\n",
     "    groupby_labels=sector_labels,\n",
     "    binning_by_group=True,\n",
     ")\n",
     "\n",
-    "create_returns_tear_sheet(result, by_group=True, group_neutral=True)"
+    "create_returns_tear_sheet(factor_data, by_group=True, group_neutral=True)"
    ]
   },
   {
@@ -264,21 +255,21 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 2",
    "language": "python",
-   "name": "python3"
+   "name": "python2"
   },
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 3
+    "version": 2
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.7.0"
+   "pygments_lexer": "ipython2",
+   "version": "2.7.12"
   }
  },
  "nbformat": 4,