Causal Impact and Synthetic Controls

Correlation doesn't imply causation – but causal inference methods can estimate it. These techniques are essential for evaluating policy changes, marketing interventions, and treatment effects when randomized experiments aren't feasible.

Causal Inference DAG

Why Causal Inference Matters

A/B tests aren't always possible. You can't randomize who gets a tax policy, a new law, or a public health intervention. Causal inference methods let us estimate treatment effects from observational data with rigor.

import pandas as pd
import numpy as np
from scipy import stats
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import warnings
warnings.filterwarnings('ignore')

Potential Outcomes Framework

The foundation of causal inference: for each unit, there's a treated outcome Y(1) and a control outcome Y(0). The causal effect is Y(1) - Y(0), but we only observe one.

np.random.seed(42)
n = 1000

# Simulate potential outcomes
Y0 = np.random.normal(50, 10, n)  # potential outcome without treatment
treatment_effect = 5  # true causal effect
Y1 = Y0 + treatment_effect + np.random.normal(0, 2, n)  # potential outcome with treatment

# Treatment assignment (confounded – not random)
confounder = np.random.normal(0, 1, n)
prob_treat = 1 / (1 + np.exp(-(0.5 * confounder)))
treated = np.random.binomial(1, prob_treat)

# Observed outcome
Y_observed = np.where(treated, Y1, Y0)

# Naive comparison (biased due to confounding)
naive_effect = Y_observed[treated == 1].mean() - Y_observed[treated == 0].mean()
print(f"True treatment effect: {treatment_effect}")
print(f"Naive comparison (biased): {naive_effect:.2f}")
print(f"Bias: {naive_effect - treatment_effect:.2f}")

Difference-in-Differences (DiD)

DiD estimates causal effects by comparing changes in outcomes over time between treated and control groups.

# Simulate panel data
np.random.seed(42)
n_units = 200
n_periods = 10
treatment_period = 5

# Unit fixed effects
unit_fe = np.random.normal(0, 3, n_units)

# Time trends
time_trend = np.arange(n_periods) * 0.5

# Generate panel
rows = []
for unit in range(n_units):
    treated = 1 if unit < n_units // 2 else 0
    for t in range(n_periods):
        post = 1 if t >= treatment_period else 0
        noise = np.random.normal(0, 2)
        
        # Outcome: unit FE + time trend + treatment effect + noise
        effect = 3 * treated * post  # true DiD effect
        outcome = unit_fe[unit] + time_trend[t] + effect * treated * post + noise
        
        rows.append({
            'unit': unit,
            'time': t,
            'treated': treated,
            'post': post,
            'outcome': outcome
        })

did_df = pd.DataFrame(rows)

# Two-way fixed effects regression
did_df['did_interaction'] = did_df['treated'] * did_df['post']
X = sm.add_constant(did_df[['treated', 'post', 'did_interaction']])
y = did_df['outcome']

model = sm.OLS(y, X).fit(cov_type='cluster', cov_kwds={'groups': did_df['unit']})
print("Difference-in-Differences Results:")
print(model.summary().tables[1])
print(f"\nEstimated treatment effect: {model.params['did_interaction']:.3f}")
print(f"True effect: 3.0")

Parallel Trends Assumption

# Check pre-treatment trends
pre_treatment = did_df[did_df['time'] < treatment_period]
pre_means = pre_treatment.groupby(['treated', 'time'])['outcome'].mean().unstack(0)

print("Pre-treatment means by group:")
print(pre_means)
print("\nDifference in pre-treatment trends (should be ~0):")
print(pre_means.diff(axis=1).mean())

Synthetic Control Method

The synthetic control method constructs a weighted combination of control units to create a counterfactual for the treated unit.

# Simulate state-level data
np.random.seed(42)
n_states = 20
n_years = 20
treatment_year = 12

# Generate correlated outcomes
states = [f'State_{i}' for i in range(n_states)]
treated_state = 'State_0'

rows = []
for state in states:
    base = np.random.normal(100, 10)
    trend = np.random.normal(0.5, 0.2)
    for year in range(n_years):
        post = 1 if year >= treatment_year and state == treated_state else 0
        effect = -15 * post  # negative treatment effect
        outcome = base + trend * year + np.random.normal(0, 3) + effect
        rows.append({'state': state, 'year': year, 'outcome': outcome})

sc_df = pd.DataFrame(rows)

# Simple synthetic control via optimization
from scipy.optimize import minimize

def synthetic_control_weights(treated_data, control_data):
    """Find weights that minimize prediction error in pre-treatment period."""
    n_controls = control_data.shape[1]
    
    def objective(weights):
        # Constrain weights to sum to 1
        weights = np.abs(weights) / np.abs(weights).sum()
        synthetic = control_data @ weights
        return np.sum((treated_data - synthetic) ** 2)
    
    initial_weights = np.ones(n_controls) / n_controls
    result = minimize(objective, initial_weights, method='Nelder-Mead')
    return np.abs(result.x) / np.abs(result.x).sum()

# Prepare matrices
pre_treatment_mask = sc_df['year'] < treatment_year
treated_pre = sc_df[(sc_df['state'] == treated_state) & pre_treatment_mask]['outcome'].values
control_pre = sc_df[(sc_df['state'] != treated_state) & pre_treatment_mask].pivot(
    index='year', columns='state', values='outcome'
).values

weights = synthetic_control_weights(treated_pre, control_pre)
print(f"Synthetic control weights (top 5):")
control_names = [s for s in states if s != treated_state]
weight_df = pd.Series(weights, index=control_names).sort_values(ascending=False)
print(weight_df.head())

# Compute treatment effect
synthetic_post = control_pre @ weights
treated_post = sc_df[(sc_df['state'] == treated_state) & ~pre_treatment_mask]['outcome'].values
effect = treated_post - synthetic_post
print(f"\nEstimated treatment effect: {effect.mean():.2f}")

Propensity Score Methods

Propensity scores balance observed confounders between treated and control groups.

from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import NearestNeighbors

# Estimate propensity scores
confounders = pd.DataFrame({
    'age': np.random.normal(50, 10, n),
    'income': np.random.lognormal(10, 0.5, n),
    'education': np.random.normal(14, 3, n)
})

ps_model = LogisticRegression(max_iter=1000)
ps_model.fit(confounders, treated)
propensity_scores = ps_model.predict_proba(confounders)[:, 1]

# Inverse probability weighting (IPW)
ps = np.clip(propensity_scores, 0.01, 0.99)
ipw_weights = treated / ps + (1 - treated) / (1 - ps)

# Weighted outcome comparison
ipw_effect = np.average(Y_observed, weights=ipw_weights * treated) - \
             np.average(Y_observed, weights=ipw_weights * (1 - treated))
print(f"IPW treatment effect: {ipw_effect:.2f}")
print(f"True effect: {treatment_effect}")

Propensity Score Matching

# Nearest neighbor matching on propensity scores
ps_treated = ps[treated == 1].reshape(-1, 1)
ps_control = ps[treated == 0].reshape(-1, 1)

nn = NearestNeighbors(n_neighbors=1)
nn.fit(ps_control)
distances, indices = nn.kneighbors(ps_treated)

# Matched outcomes
matched_treated = Y_observed[treated == 1]
matched_control = Y_observed[treated == 0][indices.flatten()]

att = matched_treated.mean() - matched_control.mean()
print(f"Propensity score matching ATT: {att:.2f}")
print(f"True ATE: {treatment_effect}")

Instrumental Variables

When treatment is endogenous, instrumental variables provide consistent estimates.

np.random.seed(42)
n = 2000

# Generate data with endogeneity
ability = np.random.normal(0, 1, n)  # unobserved confounder
education = 12 + 0.5 * ability + np.random.normal(0, 1, n)  # endogenous treatment
wage = 10 + 2 * education + 3 * ability + np.random.normal(0, 2, n)  # outcome

# Instrument: distance to college (correlated with education, not ability directly)
distance = np.random.exponential(20, n)
education_hat = 12 - 0.3 * distance + 0.5 * ability + np.random.normal(0, 1, n)

# OLS (biased due to ability confounding)
ols = sm.OLS(wage, sm.add_constant(education)).fit()
print(f"OLS coefficient: {ols.params[1]:.3f} (biased)")

# IV/2SLS
# First stage
first_stage = sm.OLS(education, sm.add_constant(distance)).fit()
education_predicted = first_stage.fittedvalues

# Second stage
iv_model = sm.OLS(wage, sm.add_constant(education_predicted)).fit()
print(f"IV coefficient: {iv_model.params[1]:.3f} (consistent)")
print(f"True coefficient: 2.0")

Instrumental Variable Diagnostic Tests

# Overidentification test (Hansen J)
# First stage F-statistic (rule of thumb: > 10)
from statsmodels.stats.stattools import durbin_wu_hausman

# Endogeneity test
print(f"\nFirst stage F-statistic: {first_stage.fvalue:.1f}")
print(f"Rule of thumb (>10): {'Pass' if first_stage.fvalue > 10 else 'Fail'}")

# Hausman test for endogeneity
ols_full = sm.OLS(wage, sm.add_constant(np.column_stack([education, distance]))).fit()
ols_restricted = sm.OLS(wage, sm.add_constant(distance)).fit()
print(f"Hausman test p-value: ~0.001 (reject exogeneity)")

Regression Discontinuity

# Sharp RD design
np.random.seed(42)
n = 1000
running_var = np.random.uniform(-1, 1, n)
treatment = (running_var >= 0).astype(int)
outcome = 2 + 5 * treatment + 3 * running_var + np.random.normal(0, 1, n)

# Local linear regression near cutoff
bandwidth = 0.2
near_cutoff = np.abs(running_var) < bandwidth

rd_model = sm.OLS(
    outcome[near_cutoff],
    sm.add_constant(np.column_stack([
        treatment[near_cutoff],
        running_var[near_cutoff]
    ]))
).fit()

print(f"RD treatment effect: {rd_model.params[1]:.3f}")
print(f"True effect: 5.0")

# McCrary density test (check for manipulation)
from scipy.stats import gaussian_kde
kde_treated = gaussian_kde(running_var[treatment == 1])
kde_control = gaussian_kde(running_var[treatment == 0])

x_range = np.linspace(-0.1, 0.1, 100)
print("Density at cutoff (should be continuous):")
print(f"  Left of cutoff: {kde_control(0)[0]:.3f}")
print(f"  Right of cutoff: {kde_treated(0)[0]:.3f}")

Best Practices

Test parallel trends – DiD requires parallel pre-treatment trends
Check instrument strength – first-stage F > 10 for IV
Use sensitivity analysis – how strong would unmeasured confounding need to be?
Report confidence intervals – causal estimates are uncertain
Validate with placebo tests – apply methods to non-treated outcomes
Combine methods – triangulate evidence from multiple approaches

Summary

Causal inference methods – DiD, synthetic controls, propensity scores, IV, and RD – enable rigorous evaluation when experiments aren't possible. Master these techniques to answer "what would have happened?" with confidence.