The Interview Question
ℹ️
Question: You're building a Bayesian model to estimate click-through rates (CTR) for ads:
- Prior knowledge: Most ads have CTR between 0.5% and 5%
- Observed data: 1000 impressions, 15 clicks
- Requirements: Posterior distribution, credible intervals, decision-making under uncertainty
Walk through your Bayesian analysis:
- How do you choose appropriate prior distributions?
- How do you compute the posterior distribution?
- How do you make decisions using Bayesian inference?
- When is Bayesian better than frequentist approaches?
Detailed Answer
1. Bayesian Inference Framework
Bayesian statistics provides a principled way to update beliefs given new evidence. Unlike frequentist statistics, it treats parameters as random variables with distributions.
import numpy as np
import pandas as pd
from scipy import stats
from scipy.stats import beta, binom, norm
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
class BayesianAnalyzer:
"""Bayesian inference framework"""
def __init__(self):
self.prior = None
self.likelihood = None
self.posterior = None
def beta_binomial_conjugate(self, alpha_prior, beta_prior,
successes, trials):
"""
Beta-Binomial conjugate update
Prior: Beta(α, β)
Likelihood: Binomial(n, p)
Posterior: Beta(α + successes, β + failures)
"""
# Prior parameters
self.prior = beta(alpha_prior, beta_prior)
# Calculate posterior parameters
alpha_posterior = alpha_prior + successes
beta_posterior = beta_prior + (trials - successes)
# Posterior distribution
self.posterior = beta(alpha_posterior, beta_posterior)
# Summary statistics
results = {
'prior': {
'alpha': alpha_prior,
'beta': beta_prior,
'mean': alpha_prior / (alpha_prior + beta_prior),
'variance': (alpha_prior * beta_prior) /
((alpha_prior + beta_prior)**2 * (alpha_prior + beta_prior + 1))
},
'data': {
'successes': successes,
'trials': trials,
'observed_rate': successes / trials
},
'posterior': {
'alpha': alpha_posterior,
'beta': beta_posterior,
'mean': alpha_posterior / (alpha_posterior + beta_posterior),
'variance': (alpha_posterior * beta_posterior) /
((alpha_posterior + beta_posterior)**2 * (alpha_posterior + beta_posterior + 1)),
'std': np.sqrt((alpha_posterior * beta_posterior) /
((alpha_posterior + beta_posterior)**2 * (alpha_posterior + beta_posterior + 1)))
}
}
# Credible intervals
results['credible_intervals'] = {
'90%': (self.posterior.ppf(0.05), self.posterior.ppf(0.95)),
'95%': (self.posterior.ppf(0.025), self.posterior.ppf(0.975)),
'99%': (self.posterior.ppf(0.005), self.posterior.ppf(0.995))
}
# Probability of exceeding thresholds
results['probabilities'] = {
'p_gt_0.01': 1 - self.posterior.cdf(0.01),
'p_gt_0.02': 1 - self.posterior.cdf(0.02),
'p_gt_0.05': 1 - self.posterior.cdf(0.05)
}
return results
def normal_normal_conjugate(self, prior_mean, prior_var,
data_mean, data_var, n_obs):
"""
Normal-Normal conjugate update for mean estimation
Prior: N(μ₀, σ₀²)
Likelihood: N(μ, σ²/n)
Posterior: N(μₙ, σₙ²)
"""
# Prior precision (inverse variance)
prior_precision = 1 / prior_var
# Data precision
data_precision = n_obs / data_var
# Posterior parameters
posterior_precision = prior_precision + data_precision
posterior_var = 1 / posterior_precision
posterior_mean = (prior_precision * prior_mean + data_precision * data_mean) / posterior_precision
self.prior = norm(prior_mean, np.sqrt(prior_var))
self.posterior = norm(posterior_mean, np.sqrt(posterior_var))
results = {
'prior': {
'mean': prior_mean,
'std': np.sqrt(prior_var)
},
'data': {
'mean': data_mean,
'std': np.sqrt(data_var),
'n': n_obs
},
'posterior': {
'mean': posterior_mean,
'std': np.sqrt(posterior_var)
}
}
return results
def plot_distributions(self, x_range=None):
"""Plot prior, likelihood, and posterior"""
if self.prior is None or self.posterior is None:
print("Run analysis first")
return
if x_range is None:
x_range = np.linspace(
max(0, self.prior.ppf(0.01)),
min(1, self.prior.ppf(0.99)),
1000
)
fig, axes = plt.subplots(1, 3, figsize=(15, 5))
# Prior
axes[0].plot(x_range, self.prior.pdf(x_range), 'b-', linewidth=2, label='Prior')
axes[0].fill_between(x_range, self.prior.pdf(x_range), alpha=0.3)
axes[0].set_title('Prior Distribution')
axes[0].set_xlabel('Parameter Value')
axes[0].set_ylabel('Density')
axes[0].legend()
# Posterior
axes[1].plot(x_range, self.posterior.pdf(x_range), 'r-', linewidth=2, label='Posterior')
axes[1].fill_between(x_range, self.posterior.pdf(x_range), alpha=0.3, color='red')
axes[1].set_title('Posterior Distribution')
axes[1].set_xlabel('Parameter Value')
axes[1].set_ylabel('Density')
axes[1].legend()
# Comparison
axes[2].plot(x_range, self.prior.pdf(x_range), 'b-', linewidth=2, label='Prior')
axes[2].plot(x_range, self.posterior.pdf(x_range), 'r-', linewidth=2, label='Posterior')
axes[2].set_title('Prior vs Posterior')
axes[2].set_xlabel('Parameter Value')
axes[2].set_ylabel('Density')
axes[2].legend()
plt.tight_layout()
plt.savefig('bayesian_distributions.png', dpi=150, bbox_inches='tight')
plt.show()
# Example usage for CTR estimation
analyzer = BayesianAnalyzer()
# Prior: Beta(2, 20) - weakly informative, centered around 0.1
# This represents belief that CTR is likely low
alpha_prior = 2
beta_prior = 20
# Data: 15 clicks out of 1000 impressions
successes = 15
trials = 1000
results = analyzer.beta_binomial_conjugate(alpha_prior, beta_prior, successes, trials)
print("Bayesian CTR Analysis")
print("=" * 60)
print(f"Prior: Beta({results['prior']['alpha']}, {results['prior']['beta']})")
print(f"Prior mean: {results['prior']['mean']:.4f}")
print(f"\nData: {results['data']['successes']} clicks / {results['data']['trials']} impressions")
print(f"Observed CTR: {results['data']['observed_rate']:.4f}")
print(f"\nPosterior: Beta({results['posterior']['alpha']}, {results['posterior']['beta']})")
print(f"Posterior mean: {results['posterior']['mean']:.4f}")
print(f"Posterior std: {results['posterior']['std']:.4f}")
print(f"\n95% Credible Interval: ({results['credible_intervals']['95%'][0]:.4f}, {results['credible_intervals']['95%'][1]:.4f})")
print(f"\nP(CTR > 1%): {results['probabilities']['p_gt_0.01']:.4f}")
print(f"P(CTR > 2%): {results['probabilities']['p_gt_0.02']:.4f}")
2. MCMC Sampling
import pymc as pm
import arviz as az
class MCMCSampler:
"""Markov Chain Monte Carlo sampling"""
def __init__(self):
self.trace = None
self.model = None
def beta_binomial_model(self, observed_clicks, observed_impressions,
prior_alpha=1, prior_beta=1):
"""PyMC model for Beta-Binomial"""
with pm.Model() as model:
# Prior
ctr = pm.Beta('ctr', alpha=prior_alpha, beta=prior_beta)
# Likelihood
clicks = pm.Binomial('clicks',
n=observed_impressions,
p=ctr,
observed=observed_clicks)
# Inference
trace = pm.sample(2000, tune=1000, cores=2, return_inferencedata=True)
self.model = model
self.trace = trace
return trace
def hierarchical_model(self, group_data):
"""Hierarchical Bayesian model for multiple groups"""
n_groups = len(group_data)
clicks = [d['clicks'] for d in group_data]
impressions = [d['impressions'] for d in group_data]
with pm.Model() as model:
# Hyperprior for group-level parameters
mu_prior = pm.Beta('mu_prior', alpha=2, beta=2)
kappa_prior = pm.Exponential('kappa_prior', lam=1)
# Group-level CTRs
ctr = pm.Beta('ctr',
alpha=mu_prior * kappa_prior,
beta=(1 - mu_prior) * kappa_prior,
shape=n_groups)
# Likelihood for each group
for i in range(n_groups):
pm.Binomial(f'clicks_{i}',
n=impressions[i],
p=ctr[i],
observed=clicks[i])
# Inference
trace = pm.sample(2000, tune=1000, cores=2)
self.model = model
self.trace = trace
return trace
def diagnose_convergence(self):
"""Diagnose MCMC convergence"""
if self.trace is None:
print("Run sampling first")
return
# Summary statistics
summary = az.summary(self.trace)
print("MCMC Summary:")
print(summary)
# Plot diagnostics
fig = az.plot_trace(self.trace, figsize=(12, 8))
plt.tight_layout()
plt.savefig('mcmc_diagnostics.png', dpi=150, bbox_inches='tight')
plt.show()
# Check R-hat (should be close to 1)
r_hat = summary['r_hat'].max()
print(f"\nMax R-hat: {r_hat:.4f}")
print(f"Convergence: {'Good' if r_hat < 1.1 else 'Poor'}")
# Check effective sample size
ess = summary['ess_bulk'].min()
print(f"Min Effective Sample Size: {ess:.0f}")
return summary
def posterior_predictive(self, n_samples=1000):
"""Generate posterior predictive samples"""
if self.trace is None:
print("Run sampling first")
return
with self.model:
ppc = pm.sample_posterior_predictive(self.trace, samples=n_samples)
return ppc
# Example usage
# sampler = MCMCSampler()
# trace = sampler.beta_binomial_model(
# observed_clicks=15,
# observed_impressions=1000,
# prior_alpha=2,
# prior_beta=20
# )
# summary = sampler.diagnose_convergence()
3. Bayesian Decision Making
class BayesianDecisionMaker:
"""Make decisions using Bayesian inference"""
def __init__(self, posterior_dist):
self.posterior = posterior_dist
def expected_loss(self, action, loss_function):
"""Calculate expected loss for an action"""
# Sample from posterior
samples = self.posterior.rvs(10000)
# Calculate loss for each sample
losses = loss_function(action, samples)
# Expected loss
expected_loss = np.mean(losses)
return expected_loss
def optimal_decision(self, actions, loss_functions):
"""Find optimal decision minimizing expected loss"""
results = []
for action, loss_func in zip(actions, loss_functions):
exp_loss = self.expected_loss(action, loss_func)
results.append({
'action': action,
'expected_loss': exp_loss
})
# Find minimum expected loss
optimal = min(results, key=lambda x: x['expected_loss'])
return optimal, results
def threshold_decision(self, threshold=0.02, cost_fp=1, cost_fn=10):
"""Decision based on threshold with asymmetric costs"""
samples = self.posterior.rvs(10000)
# Calculate expected costs
# False positive: declare "good" when actually "bad"
fp_cost = np.mean(samples < threshold) * cost_fp
# False negative: declare "bad" when actually "good"
fn_cost = np.mean(samples >= threshold) * cost_fn
# Decision
decision = "Promote" if fn_cost < fp_cost else "Don't Promote"
return {
'decision': decision,
'fp_cost': fp_cost,
'fn_cost': fn_cost,
'total_cost': fp_cost + fn_cost,
'probability_above_threshold': np.mean(samples >= threshold)
}
def simulate_decision(self, n_simulations=10000):
"""Simulate decision outcomes"""
samples = self.posterior.rvs(n_simulations)
# Simulate different decisions
decisions = {
'promote': samples >= 0.02,
'dont_promote': samples < 0.02
}
# Calculate outcomes
outcomes = {}
for decision, mask in decisions.items():
if mask.any():
outcomes[decision] = {
'probability': mask.mean(),
'expected_ctr': samples[mask].mean() if mask.any() else 0,
'worst_case': samples[mask].min() if mask.any() else 0,
'best_case': samples[mask].max() if mask.any() else 0
}
return outcomes
# Example usage
posterior = beta(results['posterior']['alpha'], results['posterior']['beta'])
decision_maker = BayesianDecisionMaker(posterior)
# Define actions and loss functions
actions = ['promote', 'dont_promote']
loss_functions = [
lambda action, samples: np.where(samples < 0.02, 10, 0), # Loss if promoting low CTR
lambda action, samples: np.where(samples >= 0.02, 5, 0) # Loss if not promoting high CTR
]
optimal, all_results = decision_maker.optimal_decision(actions, loss_functions)
print(f"Optimal decision: {optimal['action']}")
print(f"Expected loss: {optimal['expected_loss']:.4f}")
4. Prior Selection Strategies
class PriorSelector:
"""Strategies for selecting prior distributions"""
@staticmethod
def uninformative_prior(param_type='proportion'):
"""Uninformative (flat) prior"""
if param_type == 'proportion':
return {'alpha': 1, 'beta': 1} # Uniform
elif param_type == 'mean':
return {'mean': 0, 'var': 1e6} # Very wide normal
@staticmethod
def weakly_informative(param_type='proportion'):
"""Weakly informative prior"""
if param_type == 'proportion':
return {'alpha': 2, 'beta': 2} # Beta centered at 0.5
elif param_type == 'mean':
return {'mean': 0, 'var': 100}
@staticmethod
def informative_from_expert knowledge, confidence):
"""Informative prior from expert knowledge"""
# Convert expert knowledge to Beta parameters
# knowledge: expected proportion
# confidence: strength of belief (equivalent sample size)
alpha = knowledge * confidence
beta = (1 - knowledge) * confidence
return {'alpha': alpha, 'beta': beta}
@staticmethod
def empirical_prior(data, param_type='proportion'):
"""Empirical prior from historical data"""
if param_type == 'proportion':
# Estimate from historical data
p_hat = data.mean()
n_eff = len(data)
# Method of moments
alpha = p_hat * n_eff
beta = (1 - p_hat) * n_eff
return {'alpha': alpha, 'beta': beta}
@staticmethod
def compare_priors(priors, true_value=None):
"""Visualize different priors"""
x = np.linspace(0, 1, 1000)
plt.figure(figsize=(10, 6))
for name, params in priors.items():
if 'alpha' in params:
y = beta(params['alpha'], params['beta']).pdf(x)
plt.plot(x, y, label=f"{name} (α={params['alpha']}, β={params['beta']})")
if true_value is not None:
plt.axvline(x=true_value, color='black', linestyle='--',
label=f'True value: {true_value}')
plt.xlabel('Parameter Value')
plt.ylabel('Density')
plt.title('Comparison of Prior Distributions')
plt.legend()
plt.grid(True, alpha=0.3)
plt.savefig('prior_comparison.png', dpi=150, bbox_inches='tight')
plt.show()
# Example: Compare different priors
priors = {
'Uninformative': PriorSelector.uninformative_prior('proportion'),
'Weakly Informative': PriorSelector.weakly_informative('proportion'),
'Expert Knowledge': {'alpha': 5, 'beta': 95}, # CTR ~5%
'Historical Data': PriorSelector.empirical_prior(np.random.beta(2, 20, 1000))
}
# PriorSelector.compare_priors(priors, true_value=0.05)
5. Real-World Application: A/B Testing with Bayes
class BayesianABTest:
"""Bayesian A/B testing framework"""
def __init__(self):
self.results = {}
def test_conversion_rates(self, control_data, treatment_data,
prior_alpha=1, prior_beta=1):
"""Bayesian test for conversion rates"""
with pm.Model() as model:
# Priors
p_control = pm.Beta('p_control', alpha=prior_alpha, beta=prior_beta)
p_treatment = pm.Beta('p_treatment', alpha=prior_alpha, beta=prior_beta)
# Likelihoods
obs_control = pm.Binomial('obs_control',
n=control_data['impressions'],
p=p_control,
observed=control_data['clicks'])
obs_treatment = pm.Binomial('obs_treatment',
n=treatment_data['impressions'],
p=p_treatment,
observed=treatment_data['clicks'])
# Derived quantities
lift = pm.Deterministic('lift', (p_treatment - p_control) / p_control)
prob_treatment_better = pm.Deterministic('prob_better',
pm.math.switch(p_treatment > p_control, 1, 0))
# Sample
trace = pm.sample(2000, tune=1000, cores=2, return_inferencedata=True)
# Analyze results
results = {
'control_mean': trace.posterior['p_control'].mean().item(),
'treatment_mean': trace.posterior['p_treatment'].mean().item(),
'lift_mean': trace.posterior['lift'].mean().item(),
'prob_treatment_better': trace.posterior['prob_better'].mean().item(),
'lift_ci': (
np.percentile(trace.posterior['lift'].values, 2.5),
np.percentile(trace.posterior['lift'].values, 97.5)
)
}
# Decision
if results['prob_treatment_better'] > 0.95:
results['recommendation'] = 'Implement treatment'
elif results['prob_treatment_better'] > 0.90:
results['recommendation'] = 'Leaning toward treatment'
elif results['prob_treatment_better'] < 0.10:
results['recommendation'] = 'Implement control'
else:
results['recommendation'] = 'Continue testing'
self.results = results
return results
def test_revenue(self, control_revenue, treatment_revenue,
prior_mu=0, prior_sigma=100):
"""Bayesian test for revenue (continuous metric)"""
with pm.Model() as model:
# Priors for means
mu_control = pm.Normal('mu_control', mu=prior_mu, sigma=prior_sigma)
mu_treatment = pm.Normal('mu_treatment', mu=prior_mu, sigma=prior_sigma)
# Priors for standard deviations
sigma_control = pm.HalfNormal('sigma_control', sigma=50)
sigma_treatment = pm.HalfNormal('sigma_treatment', sigma=50)
# Likelihoods
obs_control = pm.Normal('obs_control',
mu=mu_control,
sigma=sigma_control,
observed=control_revenue)
obs_treatment = pm.Normal('obs_treatment',
mu=mu_treatment,
sigma=sigma_treatment,
observed=treatment_revenue)
# Derived quantities
difference = pm.Deterministic('difference', mu_treatment - mu_control)
prob_better = pm.Deterministic('prob_better',
pm.math.switch(mu_treatment > mu_control, 1, 0))
# Sample
trace = pm.sample(2000, tune=1000, cores=2, return_inferencedata=True)
results = {
'control_mean': trace.posterior['mu_control'].mean().item(),
'treatment_mean': trace.posterior['mu_treatment'].mean().item(),
'difference_mean': trace.posterior['difference'].mean().item(),
'prob_treatment_better': trace.posterior['prob_better'].mean().item()
}
return results
# Example usage
# bayesian_ab = BayesianABTest()
# results = bayesian_ab.test_conversion_rates(
# control_data={'impressions': 5000, 'clicks': 50},
# treatment_data={'impressions': 5000, 'clicks': 75}
# )
💡
Pro Tip: Bayesian methods are particularly valuable when you have prior knowledge, need uncertainty quantification, or want to make decisions based on probabilities rather than p-values.
6. Common Follow-Up Questions
Follow-up 1: When is Bayesian better than frequentist?
def compare_approaches(scenario):
"""Compare Bayesian and frequentist approaches"""
comparisons = {
'small_sample': {
'bayesian': 'Better - can incorporate prior information',
'frequentist': 'May lack power with small samples',
'recommendation': 'Bayesian'
},
'large_sample': {
'bayesian': 'Results converge with frequentist',
'frequentist': 'Provides valid inferences',
'recommendation': 'Either (results similar)'
},
'need_uncertainty': {
'bayesian': 'Provides full posterior distribution',
'frequentist': 'Provides confidence intervals (often misunderstood)',
'recommendation': 'Bayesian'
},
'sequential_analysis': {
'bayesian': 'Can update beliefs as data arrives',
'frequentist': 'Requires correction for multiple testing',
'recommendation': 'Bayesian'
},
'regulatory': {
'bayesian': 'May not be accepted by regulators',
'frequentist': 'Standard in many fields',
'recommendation': 'Frequentist'
}
}
return comparisons.get(scenario, "Unknown scenario")
# Example
# print(compare_approaches('small_sample'))
Follow-up 2: How do you choose between prior distributions?
def prior_sensitivity_analysis(data, priors):
"""Analyze sensitivity to prior choice"""
results = {}
for prior_name, prior_params in priors.items():
# Fit model with different priors
analyzer = BayesianAnalyzer()
result = analyzer.beta_binomial_conjugate(
prior_params['alpha'],
prior_params['beta'],
data['successes'],
data['trials']
)
results[prior_name] = {
'posterior_mean': result['posterior']['mean'],
'posterior_std': result['posterior']['std'],
'ci_95': result['credible_intervals']['95%']
}
# Compare results
comparison = pd.DataFrame(results).T
print("Sensitivity Analysis:")
print(comparison)
# Check if conclusions change
means = [r['posterior_mean'] for r in results.values()]
if max(means) - min(means) < 0.01:
print("\nResults are robust to prior choice")
else:
print("\nResults are sensitive to prior choice - consider using weakly informative prior")
return comparison
# Example
# priors = {
# 'Uninformative': {'alpha': 1, 'beta': 1},
# 'Weakly Informative': {'alpha': 2, 'beta': 2},
# 'Informative': {'alpha': 5, 'beta': 95}
# }
# sensitivity = prior_sensitivity_analysis({'successes': 15, 'trials': 1000}, priors)
Company-Specific Tips
ℹ️
Google Tips:
- Google values Bayesian methods for uncertainty quantification
- Be prepared to explain when Bayesian is preferred
- Know how to implement Bayesian A/B testing
- Understand hierarchical models for user behavior
Meta Tips:
- Meta uses Bayesian methods for recommendation systems
- Know how to do Bayesian optimization
- Be comfortable with probabilistic programming (PyMC, Stan)
- Understand how to incorporate prior knowledge
Quiz Section
Related Topics
- Hypothesis Testing — Frequentist approach comparison
- A/B Testing — Bayesian A/B testing
- Probabilistic Programming — PyMC, Stan, TensorFlow Probability
- Bayesian Optimization — Hyperparameter tuning