Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Meta learners #170

Draft
wants to merge 62 commits into
base: main
Choose a base branch
from
Draft
Changes from 1 commit
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
2c0d551
implemented frequentist S, T and X learners
Feb 24, 2023
ab999b6
Reformatted. Added bootstrapping. Added DRLearner.
Feb 26, 2023
9791b9b
Fixed doc-string for DRLearner
Feb 26, 2023
100f8d7
renamed meta_learners.py to skl_meta_learners.py
Feb 26, 2023
5874281
imported skl_meta_learners
Feb 26, 2023
df90a52
minor code style fixes
Feb 27, 2023
b8a3dff
mostly stylistic changes
Feb 27, 2023
020a65f
fixed an import
Feb 27, 2023
667d3b4
bootstraping does not overwrite self.models anymore
matekadlicsko Feb 28, 2023
d05c156
fixed a citation in docstring
matekadlicsko Mar 1, 2023
542e129
added _fit function to reduce boilerplate code
matekadlicsko Mar 1, 2023
5f8a62f
refactored
matekadlicsko Mar 1, 2023
759b9e2
added BARTModel
matekadlicsko Mar 1, 2023
8c03319
outlined pymc meta-learners
matekadlicsko Mar 1, 2023
18baff5
minor changes helping pymc integration
matekadlicsko Mar 2, 2023
f9d9817
minor changes
matekadlicsko Mar 2, 2023
9917a83
continuing to integrate pymc models
matekadlicsko Mar 2, 2023
a8d6467
bugfix
matekadlicsko Mar 2, 2023
55b43df
more minor bugfixes
matekadlicsko Mar 2, 2023
9d5bb61
added logistic regression
matekadlicsko Mar 2, 2023
3f77e76
added bayesian DRLearner
matekadlicsko Mar 4, 2023
faf0db5
fixed some issues with X and DR learners
matekadlicsko Mar 5, 2023
c1bbf33
small bugfixes
matekadlicsko Mar 6, 2023
2f689dd
added (incomplete) notebook explaining meta-learners
matekadlicsko Mar 6, 2023
b57e31a
wrote section on X-learner
matekadlicsko Mar 7, 2023
483d55b
fixed major error in DRLearner implementation
matekadlicsko Mar 7, 2023
d62eb18
minor changes
matekadlicsko Mar 8, 2023
95e010e
implemented cross_fitting option for DR-learner
matekadlicsko Mar 9, 2023
3e1182d
wrote subsection on DR-learner
matekadlicsko Mar 9, 2023
806cd0f
added docstring + some small changes suggested by @juanitorduz
matekadlicsko Mar 10, 2023
21d0b15
fixed a dependency
matekadlicsko Mar 12, 2023
c4f124b
improvements on LogisticRegression
matekadlicsko Mar 12, 2023
90fddd7
several improvements
matekadlicsko Mar 12, 2023
917216c
BayesianDR now works
matekadlicsko Mar 15, 2023
bb588b9
BayesianXLearner now works
matekadlicsko Mar 15, 2023
f39b856
removed redundant _compute_cate function
matekadlicsko Mar 15, 2023
2ca0ebd
formatting
matekadlicsko Mar 15, 2023
48c8105
added score method
matekadlicsko Mar 16, 2023
ddaebb4
formatting
matekadlicsko Mar 16, 2023
3bb16fe
reworded introduction + included some suggestions by @juanitorduz
matekadlicsko Mar 16, 2023
0d98c53
minor changes
matekadlicsko Mar 16, 2023
02b78e1
formatting
matekadlicsko Mar 17, 2023
3e845bf
added correct docstring
matekadlicsko Mar 17, 2023
d4830cc
added aesera to list of dependencies
matekadlicsko Mar 22, 2023
02d592c
improved docstrings.
matekadlicsko Mar 27, 2023
2007685
XLearner computations were wrong
matekadlicsko Mar 27, 2023
a936306
added summary file
matekadlicsko Mar 29, 2023
e682b27
summary now returns a summary object
matekadlicsko Mar 29, 2023
4751aeb
minor fix
matekadlicsko Mar 29, 2023
aba9255
new summary objects are displayed
matekadlicsko Mar 29, 2023
5fe6c53
changed plot method
matekadlicsko Apr 2, 2023
8fd71ec
Added some docstrings
matekadlicsko Apr 2, 2023
14fac30
fixed pymc-bart import
matekadlicsko Apr 9, 2023
1cbe477
summary now performs bootstrapping only once
matekadlicsko Apr 9, 2023
46a33d2
added summary
matekadlicsko Apr 9, 2023
d88472c
imported summary
matekadlicsko Apr 9, 2023
c154979
Merge branch 'pymc-labs:main' into meta-learners
matekadlicsko Apr 13, 2023
18b6934
made notebook a bit more clear
matekadlicsko Apr 17, 2023
1beda78
Merge branch 'meta-learners' of https://github.com/matekadlicsko/Caus…
matekadlicsko Apr 17, 2023
b43752e
Merge branch 'pymc-labs:main' into meta-learners
matekadlicsko Apr 20, 2023
92b655d
Merge branch 'pymc-labs:main' into meta-learners
matekadlicsko May 10, 2023
9d26c40
Merge branch 'pymc-labs:main' into meta-learners
matekadlicsko Jun 8, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
fixed major error in DRLearner implementation
  • Loading branch information
matekadlicsko committed Mar 7, 2023
commit 483d55bc6922b89159a6ff8e9615c5ddb6e1e7ee
79 changes: 53 additions & 26 deletions causalpy/skl_meta_learners.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
from copy import deepcopy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from copy import deepcopy
from sklearn.linear_model import LogisticRegression
from sklearn.utils import check_consistent_length
from sklearn.model_selection import train_test_split

from causalpy.utils import _is_variable_dummy_coded, _fit

Expand Down Expand Up @@ -86,7 +87,6 @@ def bootstrap(

results = []

# TODO: paralellize this loop
for _ in range(n_iter):
# Take sample with replacement from our data in a way that we have
# the same number of treated and untreated data points as in the whole
Expand Down Expand Up @@ -148,15 +148,28 @@ def bias(
q: float = .05,
n_iter: int = 1000
):
cates = self.bootstrap(X_ins, y, treated, X, n_iter)
"Calculates bootstrap estimate of bias of CATE estimator."
if X is None:
X = X_ins

pred = self.predict_cate(X=X)
bs_pred = self.bootstrap(X_ins, y, treated, X, n_iter).mean(axis=0)

return (bs_pred - pred).mean()



def summary(self, n_iter=1000):
# TODO: we run self.bootstrap twice independently.
bias = self.bias(self.X, self.y, self.treated, self.X, n_iter=n_iter)
conf_ints = self.ate_confidence_interval(
self.X, self.y, self.treated, self.X, n_iter=n_iter)
print(f"Number of observations: {self.X.shape[0]}")
print(f"Number of treated observations: {self.treated.sum()}")
print(f"Average treatement effect (ATE): {self.predict_ate(self.X)}")
print(f"Confidence interval for ATE: {conf_ints}")
self.X, self.y, self.treated, self.X, n_iter=n_iter
)
print(f"Number of observations: {self.X.shape[0]}")
print(f"Number of treated observations: {self.treated.sum()}")
print(f"Average treatement effect (ATE): {self.predict_ate(self.X)}")
print(f"95% Confidence interval for ATE: {conf_ints}")
print(f"Estimated bias: {bias}")


class SLearner(SkMetaLearner):
Expand Down Expand Up @@ -372,9 +385,13 @@ def __init__(
model=None,
treated_model=None,
untreated_model=None,
propensity_score_model=LogisticRegression(penalty=None)
pseudo_outcome_model=None,
propensity_score_model=LogisticRegression(penalty=None),
cross_fitting=False
):
super().__init__(X=X, y=y, treated=treated)

self.cross_fitting = cross_fitting

if model is None and (untreated_model is None or treated_model is None):
raise ValueError(
Expand All @@ -390,36 +407,36 @@ def __init__(
if model is not None:
treated_model = deepcopy(model)
untreated_model = deepcopy(model)
pseudo_outcome_model = deepcopy(model)

# Estimate response function
self.models = {
"treated": treated_model,
"untreated": untreated_model,
"propensity": propensity_score_model
"propensity": propensity_score_model,
"pseudo_outcome": pseudo_outcome_model
}

COORDS = {"coeffs": X.columns, "obs_indx": np.arange(X.shape[0])}
self.fit(X, y, treated, coords=COORDS)

# Estimate CATE
self.cate = self._compute_cate(X, y, treated)

def _compute_cate(self, X, y, treated):
g = self.models["propensity"].predict_proba(X)[:, 1]
m0 = self.models["untreated"].predict(X)
m1 = self.models["treated"].predict(X)
self.cate = pseudo_outcome_model.predict(X)

cate = (treated * (y - m1) / g + m1
- ((1 - treated) * (y - m0) / (1 - g) + m0))

return cate

def fit(self, X: pd.DataFrame, y: pd.Series, treated: pd.Series, coords=None):
# Split data to two independent samples of equal size
(
X0, X1,
y0, y1,
treated0, treated1
) = train_test_split(X, y, treated, stratify=treated, test_size=.5)

# Split data to treated and untreated subsets
X_t, y_t = X[treated == 1], y[treated == 1]
X_u, y_u = X[treated == 0], y[treated == 0]
X_t, y_t = X0[treated0 == 1], y0[treated0 == 1]
X_u, y_u = X0[treated0 == 0], y0[treated0 == 0]

treated_model, untreated_model, propensity_score_model = self.models.values()
treated_model, untreated_model, propensity_score_model, pseudo_outcome_model = self.models.values()

# Estimate response functions
_fit(treated_model, X_t, y_t, coords)
Expand All @@ -428,9 +445,19 @@ def fit(self, X: pd.DataFrame, y: pd.Series, treated: pd.Series, coords=None):
# Fit propensity score model
_fit(propensity_score_model, X, treated, coords)

g = propensity_score_model.predict_proba(X1)[:, 1]
mu_0 = untreated_model.predict(X1)
mu_1 = treated_model.predict(X1)
mu_w = np.where(treated1==0, mu_0, mu_1)

pseudo_outcome = (
(treated1 - g) / (g * (1 - g)) * (y1 - mu_w) + mu_1 - mu_0
)

# Fit pseudo-outcome model
_fit(pseudo_outcome_model, X1, pseudo_outcome, coords)

return self

def predict_cate(self, X):
m1 = self.models["treated"].predict(X)
m0 = self.models["untreated"].predict(X)
return m1 - m0
return self.models["pseudo_outcome"].predict(X)