update 9 notebooks to use az.extract + PyMC v5 upgrade (pymc-devs#522)

* update moderation analysis notebook * update GLM binomial regression * update simpsons paradox * update rolling regression * update glm robust with outlier detection * update truncated/censored notebook * update stochastic volatility * update dp mixture * update putting * update author blocks where I'd missed them before * add link to PR in author block * finish putting notebook * polish off rough edges * remove wrong/duplicate v5 update claim in author block
iykat · Feb 7, 2023 · ce0f674 · ce0f674
1 parent 081f2f9
commit ce0f674
Show file tree

Hide file tree

Showing 18 changed files with 583 additions and 3,088 deletions.
diff --git a/examples/case_studies/moderation_analysis.ipynb b/examples/case_studies/moderation_analysis.ipynb
diff --git a/examples/case_studies/moderation_analysis.myst.md b/examples/case_studies/moderation_analysis.myst.md
@@ -7,7 +7,7 @@ jupytext:
 kernelspec:
   display_name: pymc_env
   language: python
-  name: pymc_env
+  name: python3
 ---
 
 (moderation_analysis)=
@@ -71,7 +71,7 @@ def posterior_prediction_plot(result, x, moderator, m_quantiles, ax=None):
     if ax is None:
         fig, ax = plt.subplots(1, 1)
 
-    post = result.posterior.stack(sample=("chain", "draw"))
+    post = az.extract(result)
     xi = xr.DataArray(np.linspace(np.min(x), np.max(x), 20), dims=["x_plot"])
     m_levels = result.constant_data["m"].quantile(m_quantiles).rename({"quantile": "m_level"})
 
@@ -99,13 +99,13 @@ def posterior_prediction_plot(result, x, moderator, m_quantiles, ax=None):
     return ax
 
 
-def plot_moderation_effect(m, m_quantiles, ax=None):
+def plot_moderation_effect(result, m, m_quantiles, ax=None):
     """Spotlight graph"""
 
     if ax is None:
         fig, ax = plt.subplots(1, 1)
 
-    post = result.posterior.stack(sample=("chain", "draw"))
+    post = az.extract(result)
 
     # calculate 95% CI region and median
     xi = xr.DataArray(np.linspace(np.min(m), np.max(m), 20), dims=["x_plot"])
@@ -316,7 +316,7 @@ We can also visualise the moderation effect by plotting $\beta_1 + \beta_2 \cdot
 
 ```{code-cell} ipython3
 fig, ax = plt.subplots(1, 2, figsize=(10, 5))
-plot_moderation_effect(m, m_quantiles, ax[0])
+plot_moderation_effect(result, m, m_quantiles, ax[0])
 az.plot_posterior(result, var_names="β2", ax=ax[1]);
 ```
 
@@ -362,6 +362,7 @@ But readers are strongly encouraged to read {cite:t}`mcclelland2017multicollinea
 - Authored by Benjamin T. Vincent in June 2021
 - Updated by Benjamin T. Vincent in March 2022
 - Updated by Benjamin T. Vincent in February 2023 to run on PyMC v5
+- Updated to use `az.extract` by [Benjamin T. Vincent](https://github.com/drbenvincent) in February 2023 ([pymc-examples#522](https://github.com/pymc-devs/pymc-examples/pull/522))
 
 +++
 

diff --git a/examples/case_studies/putting_workflow.ipynb b/examples/case_studies/putting_workflow.ipynb
diff --git a/examples/case_studies/putting_workflow.myst.md b/examples/case_studies/putting_workflow.myst.md
@@ -5,7 +5,7 @@ jupytext:
     format_name: myst
     format_version: 0.13
 kernelspec:
-  display_name: Python 3.10.6 ('pymc_env')
+  display_name: pymc_env
   language: python
   name: python3
 substitutions:
@@ -43,8 +43,6 @@ import scipy.stats as st
 import xarray as xr
 
 from xarray_einstats.stats import XrContinuousRV
-
-print(f"Running on PyMC3 v{pm.__version__}")
 ```
 
 ```{code-cell} ipython3
@@ -172,30 +170,30 @@ We plot 50 posterior draws of $p(\text{success})$ along with the expected value.
 ```{code-cell} ipython3
 # Draw posterior predictive samples
 with logit_model:
-    # hard to plot more than 400 sensibly
-    # we generate a posterior predictive sample for only 1 in every 10 draws
-    logit_trace.extend(pm.sample_posterior_predictive(logit_trace.sel(draw=slice(None, None, 10))))
-logit_post = logit_trace.posterior
-logit_ppc = logit_trace.posterior_predictive
-const_data = logit_trace.constant_data
-logit_ppc_success = (logit_ppc["success"] / const_data["tries"]).stack(sample=("chain", "draw"))
+    logit_trace.extend(pm.sample_posterior_predictive(logit_trace))
+
+# hard to plot more than 400 sensibly
+logit_post = az.extract(logit_trace, num_samples=400)
+logit_ppc = az.extract(logit_trace, group="posterior_predictive", num_samples=400)
+const_data = logit_trace["constant_data"]
+
+logit_ppc_success = logit_ppc["success"] / const_data["tries"]
 
 # Plotting
 ax = plot_golf_data(golf_data)
 t_ary = np.linspace(CUP_RADIUS - BALL_RADIUS, golf_data.distance.max(), 200)
 t = xr.DataArray(t_ary, coords=[("distance", t_ary)])
 logit_post["expit"] = scipy.special.expit(logit_post["a"] * t + logit_post["b"])
-logit_post_subset = az.extract_dataset(logit_post, num_samples=50, rng=RANDOM_SEED)
 
 ax.plot(
     t,
-    logit_post_subset["expit"],
+    logit_post["expit"].T,
     lw=1,
     color="C1",
     alpha=0.5,
 )
 
-ax.plot(t, logit_post["expit"].mean(("chain", "draw")), color="C2")
+ax.plot(t, logit_post["expit"].mean(dim="sample"), color="C2")
 
 ax.plot(golf_data.distance, logit_ppc_success, "k.", alpha=0.01)
 ax.set_title("Logit mean and posterior predictive");
@@ -204,9 +202,7 @@ ax.set_title("Logit mean and posterior predictive");
 The fit is ok, but not great! It is a good start for a baseline, and lets us answer curve-fitting type questions. We may not trust much extrapolation beyond the end of the data, especially given how the curve does not fit the last four values very well. For example, putts from 50 feet are expected to be made with probability:
 
 ```{code-cell} ipython3
-prob_at_50 = (
-    scipy.special.expit(logit_post["a"] * 50 + logit_post["b"]).mean(("chain", "draw")).item()
-)
+prob_at_50 = scipy.special.expit(logit_post["a"] * 50 + logit_post["b"]).mean(dim="sample").item()
 print(f"{100 * prob_at_50:.5f}%")
 ```
 
@@ -220,13 +216,13 @@ this appeared here in using
 
 ```python
 # Right!
-scipy.special.expit(logit_trace.posterior["a"] * 50 + logit_trace.posterior["b"]).mean(('chain', 'draw'))
+scipy.special.expit(logit_trace.posterior["a"] * 50 + logit_trace.posterior["b"]).mean(dim="sample")
 ```
 rather than
 
 ```python
 # Wrong!
-scipy.special.expit(logit_trace.posterior["a"].mean(('chain', 'draw')) * 50 + logit_trace.posterior["b"].mean(('chain', 'draw')))
+scipy.special.expit(logit_trace.posterior["a"].mean(dim="sample") * 50 + logit_trace.posterior["b"].mean(dim="sample"))
 ```
 
 to calculate our expectation at 50 feet.
@@ -333,7 +329,7 @@ This is a little funny! Most obviously, it should probably be not this common to
 with angle_model:
     angle_trace.extend(pm.sample(1000, tune=1000, target_accept=0.85))
 
-angle_post = angle_trace.posterior
+angle_post = az.extract(angle_trace)
 ```
 
 ```{code-cell} ipython3
@@ -343,21 +339,21 @@ angle_post["expit"] = forward_angle_model(angle_post["variance_of_shot"], t)
 
 ax.plot(
     t,
-    az.extract_dataset(angle_post, num_samples=50)["expit"],
+    angle_post["expit"][:, ::100],
     lw=1,
     color="C1",
     alpha=0.1,
 )
 
 ax.plot(
     t,
-    angle_post["expit"].mean(("chain", "draw")),
+    angle_post["expit"].mean(dim="sample"),
     label="Geometry-based model",
 )
 
 ax.plot(
     t,
-    logit_post["expit"].mean(("chain", "draw")),
+    logit_post["expit"].mean(dim="sample"),
     label="Logit-binomial model",
 )
 ax.set_title("Comparing the fit of geometry-based and logit-binomial model")
@@ -374,21 +370,18 @@ print(f"{100 * angle_prob_at_50.mean().item():.2f}% vs {100 * prob_at_50:.5f}%")
 We can also recreate our prior predictive plot, giving us some confidence that the prior was not leading to unreasonable situations in the posterior distribution: the variance in angle is quite small!
 
 ```{code-cell} ipython3
-angle_of_shot = XrContinuousRV(
-    st.norm, 0, az.extract_dataset(angle_post, num_samples=500)["variance_of_shot"]
-).rvs(
+angle_of_shot = XrContinuousRV(st.norm, 0, angle_post["variance_of_shot"]).rvs(
     random_state=RANDOM_SEED
 )  # radians
 distance = 20  # feet
 
 end_positions = xr.Dataset(
-    {"endx": distance * np.cos(angle_of_shot), "endy": distance * np.sin(angle_of_shot)}
+    {"endx": distance * np.cos(angle_of_shot.data), "endy": distance * np.sin(angle_of_shot.data)}
 )
 
 fig, ax = plt.subplots()
-for sample in range(end_positions.dims["sample"]):
-    end = end_positions.isel(sample=sample)
-    ax.plot([0, end["endx"]], [0, end["endy"]], "k-o", lw=1, mfc="w", alpha=0.5)
+for x, y in zip(end_positions.endx, end_positions.endy):
+    ax.plot([0, x], [0, y], "k-o", lw=1, mfc="w", alpha=0.5)
 ax.plot(0, 0, "go", label="Start", mfc="g", ms=20)
 ax.plot(distance, 0, "ro", label="Goal", mfc="r", ms=20)
 
@@ -482,16 +475,15 @@ and convergence warnings have no other solution than using a different model tha
 ax = plot_golf_data(new_golf_data)
 plot_golf_data(golf_data, ax=ax, color="C1")
 
-new_angle_post = new_angle_trace.posterior
-
+new_angle_post = az.extract(new_angle_trace)
 ax.plot(
     t,
-    forward_angle_model(angle_post["variance_of_shot"], t).mean(("chain", "draw")),
+    forward_angle_model(angle_post["variance_of_shot"], t).mean(dim="sample"),
     label="Trained on original data",
 )
 ax.plot(
     t,
-    forward_angle_model(new_angle_post["variance_of_shot"], t).mean(("chain", "draw")),
+    forward_angle_model(new_angle_post["variance_of_shot"], t).mean(dim="sample"),
     label="Trained on new data",
 )
 ax.set_title("Retraining the model on new data")
@@ -576,11 +568,11 @@ def forward_distance_angle_model(variance_of_shot, variance_of_distance, t):
 
 ax = plot_golf_data(new_golf_data)
 
-distance_angle_post = distance_angle_trace.posterior
+distance_angle_post = az.extract(distance_angle_trace)
 
 ax.plot(
     t,
-    forward_angle_model(new_angle_post["variance_of_shot"], t).mean(("chain", "draw")),
+    forward_angle_model(new_angle_post["variance_of_shot"], t).mean(dim="sample"),
     label="Just angle",
 )
 ax.plot(
@@ -589,7 +581,7 @@ ax.plot(
         distance_angle_post["variance_of_shot"],
         distance_angle_post["variance_of_distance"],
         t,
-    ).mean(("chain", "draw")),
+    ).mean(dim="sample"),
     label="Distance and angle",
 )
 
@@ -675,15 +667,15 @@ with disp_distance_angle_model:
 ```{code-cell} ipython3
 ax = plot_golf_data(new_golf_data, ax=None)
 
-disp_distance_angle_post = disp_distance_angle_trace.posterior
+disp_distance_angle_post = az.extract(disp_distance_angle_trace)
 
 ax.plot(
     t,
     forward_distance_angle_model(
         distance_angle_post["variance_of_shot"],
         distance_angle_post["variance_of_distance"],
         t,
-    ).mean(("chain", "draw")),
+    ).mean(dim="sample"),
     label="Distance and angle",
 )
 ax.plot(
@@ -692,7 +684,7 @@ ax.plot(
         disp_distance_angle_post["variance_of_shot"],
         disp_distance_angle_post["variance_of_distance"],
         t,
-    ).mean(("chain", "draw")),
+    ).mean(dim="sample"),
     label="Dispersed model",
 )
 ax.set_title("Comparing dispersed model with binomial distance/angle model")
@@ -703,14 +695,14 @@ This new model does better between 10 and 30 feet, as we can also see using the
 
 ```{code-cell} ipython3
 const_data = distance_angle_trace.constant_data
-old_pp = distance_angle_trace.posterior_predictive
+old_pp = az.extract(distance_angle_trace, group="posterior_predictive")
 old_residuals = 100 * ((const_data["successes"] - old_pp["success"]) / const_data["tries"]).mean(
-    ("chain", "draw")
+    dim="sample"
 )
 
-pp = disp_distance_angle_trace.posterior_predictive
+pp = az.extract(disp_distance_angle_trace, group="posterior_predictive")
 residuals = 100 * (const_data["successes"] / const_data["tries"] - pp["p_success"]).mean(
-    ("chain", "draw")
+    dim="sample"
 )
 
 fig, ax = plt.subplots()
@@ -789,7 +781,7 @@ Note that this is again something we might check experimentally. In particular,
 def expected_num_putts(trace, distance_to_hole, trials=100_000):
     distance_to_hole = distance_to_hole * np.ones(trials)
 
-    combined_trace = trace.posterior.stack(sample=("chain", "draw"))
+    combined_trace = az.extract(trace)
 
     n_samples = combined_trace.dims["sample"]
 
@@ -844,6 +836,7 @@ fig.suptitle("Simulated number of putts from\na few distances");
 * Adapted by Colin Carroll from the [Model building and expansion for golf putting] case study in the Stan documentation ([pymc#3666](https://github.com/pymc-devs/pymc/pull/3666))
 * Updated by Marco Gorelli ([pymc-examples#39](https://github.com/pymc-devs/pymc-examples/pull/39))
 * Updated by Oriol Abril-Pla to use PyMC v4 and xarray-einstats
+* Updated by [Benjamin T. Vincent](https://github.com/drbenvincent) to use `az.extract` in February 2023 ([pymc-examples#522](https://github.com/pymc-devs/pymc-examples/pull/522))
 
 +++
 
@@ -864,7 +857,3 @@ fig.suptitle("Simulated number of putts from\na few distances");
 
 :::{include} ../page_footer.md
 :::
-
-```{code-cell} ipython3
-
-```
diff --git a/examples/case_studies/stochastic_volatility.ipynb b/examples/case_studies/stochastic_volatility.ipynb
diff --git a/examples/case_studies/stochastic_volatility.myst.md b/examples/case_studies/stochastic_volatility.myst.md
@@ -5,7 +5,7 @@ jupytext:
     format_name: myst
     format_version: 0.13
 kernelspec:
-  display_name: Python 3 (ipykernel)
+  display_name: pymc_env
   language: python
   name: python3
 ---
@@ -106,7 +106,7 @@ pm.model_to_graphviz(stochastic_vol_model)
 with stochastic_vol_model:
     idata = pm.sample_prior_predictive(500, random_seed=rng)
 
-prior_predictive = idata.prior_predictive.stack(pooled_chain=("chain", "draw"))
+prior_predictive = az.extract(idata, group="prior_predictive")
 ```
 
 We plot and inspect the prior predictive. This is *many* orders of magnitude larger than the actual returns we observed. In fact, I cherry-picked a few draws to keep the plot from looking silly. This may suggest changing our priors: a return that our model considers plausible would violate all sorts of constraints by a huge margin: the total value of all goods and services the world produces is ~$\$10^9$, so we might reasonably *not* expect any returns above that magnitude.
@@ -117,7 +117,7 @@ That said, we get somewhat reasonable results fitting this model anyways, and it
 fig, ax = plt.subplots(figsize=(14, 4))
 returns["change"].plot(ax=ax, lw=1, color="black")
 ax.plot(
-    prior_predictive["returns"].isel(pooled_chain=slice(4, 6, None)),
+    prior_predictive["returns"][:, 0::10],
     "g",
     alpha=0.5,
     lw=1,
@@ -138,17 +138,17 @@ Once we are happy with our model, we can sample from the posterior. This is a so
 
 ```{code-cell} ipython3
 with stochastic_vol_model:
-    idata.extend(pm.sample(2000, tune=2000, random_seed=rng))
+    idata.extend(pm.sample(random_seed=rng))
 
-posterior = idata.posterior.stack(pooled_chain=("chain", "draw"))
+posterior = az.extract(idata)
 posterior["exp_volatility"] = np.exp(posterior["volatility"])
 ```
 
 ```{code-cell} ipython3
 with stochastic_vol_model:
     idata.extend(pm.sample_posterior_predictive(idata, random_seed=rng))
 
-posterior_predictive = idata.posterior_predictive.stack(pooled_chain=("chain", "draw"))
+posterior_predictive = az.extract(idata, group="posterior_predictive")
 ```
 
 Note that the `step_size` parameter does not look perfect: the different chains look somewhat different. This again indicates some weakness in our model: it may make sense to allow the step_size to change over time, especially over this 11 year time span.
@@ -162,7 +162,7 @@ Now we can look at our posterior estimates of the volatility in S&P 500 returns
 ```{code-cell} ipython3
 fig, ax = plt.subplots(figsize=(14, 4))
 
-y_vals = posterior["exp_volatility"].isel(pooled_chain=slice(None, None, 5))
+y_vals = posterior["exp_volatility"]
 x_vals = y_vals.time.astype(np.datetime64)
 
 plt.plot(x_vals, y_vals, "k", alpha=0.002)
@@ -177,9 +177,9 @@ Finally, we can use the posterior predictive distribution to see the how the lea
 fig, axes = plt.subplots(nrows=2, figsize=(14, 7), sharex=True)
 returns["change"].plot(ax=axes[0], color="black")
 
-axes[1].plot(posterior["exp_volatility"].isel(pooled_chain=slice(None, None, 100)), "r", alpha=0.5)
+axes[1].plot(posterior["exp_volatility"], "r", alpha=0.5)
 axes[0].plot(
-    posterior_predictive["returns"].isel(pooled_chain=slice(None, None, 100)),
+    posterior_predictive["returns"],
     "g",
     alpha=0.5,
     zorder=-10,
@@ -215,6 +215,7 @@ axes[1].set_title("Posterior volatility");
 * Updated by Michael Osthege on June 1, 2022 ([pymc-examples#343](https://github.com/pymc-devs/pymc-examples/pull/343))
 * Updated by Christopher Krapu on June 17, 2022 ([pymc-examples#378](https://github.com/pymc-devs/pymc-examples/pull/378))
 * Updated for compatibility with PyMC v5 by Beryl Kanali and Sangam Swadik on Jan 22, 2023 ([pymc-examples#517](https://github.com/pymc-devs/pymc-examples/pull/517))
+* Updated by [Benjamin T. Vincent](https://github.com/drbenvincent) to use `az.extract` ([pymc-examples#522](https://github.com/pymc-devs/pymc-examples/pull/522))
 
 +++
 

diff --git a/examples/generalized_linear_models/GLM-binomial-regression.ipynb b/examples/generalized_linear_models/GLM-binomial-regression.ipynb