Two-Sample (Independent) T-Test
Tests whether two independent groups have equal population means.
Two Versions
Pooled T-Test (equal variances assumed)
Welch's T-Test (unequal variances — default recommendation)
Use Welch's by default — it performs well under both equal and unequal variances, while pooled fails under unequal variances.
Complete Python Implementation
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
np.random.seed(42)
# Scenario: Two teaching methods, do they produce different test scores?
method_a = np.random.normal(75, 10, 35) # n=35, μ=75, σ=10
method_b = np.random.normal(80, 12, 40) # n=40, μ=80, σ=12
def two_sample_t_test(group1, group2, alpha=0.05, equal_var=False):
n1, n2 = len(group1), len(group2)
x1, x2 = group1.mean(), group2.mean()
s1, s2 = group1.std(ddof=1), group2.std(ddof=1)
t_stat, p_value = stats.ttest_ind(group1, group2, equal_var=equal_var)
# Welch-Satterthwaite degrees of freedom
if not equal_var:
num = (s1**2/n1 + s2**2/n2)**2
denom = (s1**2/n1)**2/(n1-1) + (s2**2/n2)**2/(n2-1)
df = num / denom
else:
df = n1 + n2 - 2
# 95% CI for difference in means
se_diff = np.sqrt(s1**2/n1 + s2**2/n2)
t_crit = stats.t.ppf(1 - alpha/2, df=df)
diff = x1 - x2
ci = (diff - t_crit * se_diff, diff + t_crit * se_diff)
# Cohen's d (pooled)
sp = np.sqrt(((n1-1)*s1**2 + (n2-1)*s2**2) / (n1+n2-2))
cohen_d = (x1 - x2) / sp
test_name = "Welch's" if not equal_var else "Pooled"
print(f"=== {test_name} Two-Sample T-Test ===")
print(f"Group 1: n={n1}, x̄={x1:.2f}, s={s1:.2f}")
print(f"Group 2: n={n2}, x̄={x2:.2f}, s={s2:.2f}")
print(f"Difference (G1-G2): {diff:.2f}")
print(f"t({df:.1f}) = {t_stat:.4f}")
print(f"p-value = {p_value:.4f}")
print(f"95% CI for μ₁-μ₂: ({ci[0]:.2f}, {ci[1]:.2f})")
print(f"Cohen's d = {cohen_d:.4f}")
print(f"Decision: {'Reject H₀' if p_value < alpha else 'Fail to reject H₀'}")
return t_stat, p_value, ci, cohen_d
# Run both versions
two_sample_t_test(method_a, method_b, equal_var=False) # Welch's (recommended)
print()
two_sample_t_test(method_a, method_b, equal_var=True) # Pooled
# Test for equal variances first (Levene's test)
stat_lev, p_lev = stats.levene(method_a, method_b)
print(f"\nLevene's test for equal variances: F={stat_lev:.4f}, p={p_lev:.4f}")
print(f"Equal variances {'assumed' if p_lev > 0.05 else 'NOT assumed'}")
Visualization
fig, axes = plt.subplots(1, 2, figsize=(12, 5))
# Box plot comparison
axes[0].boxplot([method_a, method_b], labels=['Method A', 'Method B'],
patch_artist=True,
boxprops=dict(facecolor='lightblue'))
axes[0].set_title('Score Distribution by Teaching Method')
axes[0].set_ylabel('Test Score')
# Distribution overlap
x = np.linspace(40, 120, 500)
axes[1].plot(x, stats.norm.pdf(x, method_a.mean(), method_a.std()), 'b-', linewidth=2, label='Method A')
axes[1].plot(x, stats.norm.pdf(x, method_b.mean(), method_b.std()), 'r-', linewidth=2, label='Method B')
axes[1].fill_between(x, stats.norm.pdf(x, method_a.mean(), method_a.std()), alpha=0.3, color='blue')
axes[1].fill_between(x, stats.norm.pdf(x, method_b.mean(), method_b.std()), alpha=0.3, color='red')
axes[1].set_title('Distribution Overlap')
axes[1].legend()
plt.tight_layout()
plt.savefig('two_sample_t.png', dpi=150)
plt.show()
Key Takeaways
- Use Welch's t-test by default — robust to unequal variances
- Pooled t-test is valid only when variances are truly equal (verify with Levene's)
- scipy.stats.ttest_ind(..., equal_var=False) gives Welch's test (default in R)
- Report: t, df (Welch's df is non-integer), p, 95% CI for difference, Cohen's d
- Cohen's d: 0.2 small, 0.5 medium, 0.8 large effect size
- Independence assumption is critical — use paired t-test if subjects are matched