ARIMA and Prophet: Time Series Forecasting
This lesson covers two powerful forecasting approaches: classical ARIMA models and Facebook's Prophet for automated forecasting.
ARIMA Model
<svg width="600" height="400" viewBox="0 0 600 400" xmlns="http://www.w3.org/2000/svg">
<rect width="600" height="400" fill="#f8f9fa" rx="10"/>
<text x="300" y="30" text-anchor="middle" font-size="18" font-weight="bold" fill="#2c3e50">ARIMA(p,d,q) Components</text>
<!-- AR Component -->
<rect x="50" y="70" width="160" height="100" fill="#3498db" rx="5"/>
<text x="130" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">AR(p)</text>
<text x="130" y="115" text-anchor="middle" font-size="11" fill="white">Autoregressive</text>
<text x="130" y="135" text-anchor="middle" font-size="10" fill="white">Y_t = c + Φâ‚Y_{t-1}</text>
<text x="130" y="155" text-anchor="middle" font-size="10" fill="white">+ Φ₂Y_{t-2} + ...</text>
<!-- I Component -->
<rect x="220" y="70" width="160" height="100" fill="#2ecc71" rx="5"/>
<text x="300" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">I(d)</text>
<text x="300" y="115" text-anchor="middle" font-size="11" fill="white">Integrated</text>
<text x="300" y="135" text-anchor="middle" font-size="10" fill="white">Differencing</text>
<text x="300" y="155" text-anchor="middle" font-size="10" fill="white">d = 1: Y'_t = Y_t - Y_{t-1}</text>
<!-- MA Component -->
<rect x="390" y="70" width="160" height="100" fill="#e74c3c" rx="5"/>
<text x="470" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">MA(q)</text>
<text x="470" y="115" text-anchor="middle" font-size="11" fill="white">Moving Average</text>
<text x="470" y="135" text-anchor="middle" font-size="10" fill="white">Y_t = c + ε_t</text>
<text x="470" y="155" text-anchor="middle" font-size="10" fill="white">+ θâ‚ε_{t-1} + ...</text>
<!-- Selection Guide -->
<text x="300" y="210" text-anchor="middle" font-size="14" font-weight="bold" fill="#2c3e50">Model Selection Guide:</text>
<rect x="50" y="230" width="500" height="120" fill="white" stroke="#7f8c8d" stroke-width="1" rx="5"/>
<text x="70" y="255" font-size="11" fill="#3498db">• ACF cuts off at lag q → MA(q)</text>
<text x="70" y="275" font-size="11" fill="#3498db">• PACF cuts off at lag p → AR(p)</text>
<text x="70" y="295" font-size="11" fill="#2ecc71">• Both decay slowly → ARIMA(p,d,q)</text>
<text x="70" y="315" font-size="11" fill="#e74c3c">• Use AIC/BIC to select best (p,d,q)</text>
<text x="70" y="335" font-size="11" fill="#7f8c8d">• Auto-ARIMA tests multiple combinations</text>
</svg>
ARIMA Implementation
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')
# Load data
ts = pd.read_csv('sales.csv', index_col='date', parse_dates=True)['sales']
# Split train/test
train_size = int(len(ts) * 0.8)
train, test = ts[:train_size], ts[train_size:]
# Auto ARIMA for parameter selection
auto_model = auto_arima(
train,
start_p=0, max_p=5,
start_q=0, max_q=5,
d=None, # Let auto_arima determine d
seasonal=True,
m=12, # Seasonal period
start_P=0, max_P=2,
start_Q=0, max_Q=2,
D=None,
trace=True,
error_action='ignore',
suppress_warnings=True,
stepwise=True
)
print(f"Best ARIMA order: {auto_model.order}")
print(f"Best seasonal order: {auto_model.seasonal_order}")
# Fit ARIMA model
model = ARIMA(train, order=auto_model.order)
fitted = model.fit()
print(fitted.summary())
# Forecast
forecast = fitted.forecast(steps=len(test))
SARIMA (Seasonal ARIMA)
# SARIMA with seasonal component
model_sarima = SARIMAX(
train,
order=auto_model.order,
seasonal_order=auto_model.seasonal_order,
enforce_stationarity=False,
enforce_invertibility=False
)
fitted_sarima = model_sarima.fit(disp=False)
print(fitted_sarima.summary())
# Forecast with confidence intervals
forecast_result = fitted_sarima.get_forecast(steps=len(test))
forecast = forecast_result.predicted_mean
conf_int = forecast_result.conf_int()
# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(train.index, train, label='Training Data')
plt.plot(test.index, test, label='Actual')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.fill_between(test.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1],
color='pink', alpha=0.3, label='95% CI')
plt.legend()
plt.title('SARIMA Forecast')
plt.show()
Prophet Implementation
from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly
# Prepare data for Prophet (requires 'ds' and 'y' columns)
df_prophet = pd.DataFrame({
'ds': ts.index,
'y': ts.values
})
# Split data
train_prophet = df_prophet[:train_size]
test_prophet = df_prophet[train_size:]
# Initialize and fit model
model_prophet = Prophet(
yearly_seasonality=True,
weekly_seasonality=True,
daily_seasonality=False,
seasonality_mode='multiplicative',
changepoint_prior_scale=0.05,
seasonality_prior_scale=10
)
# Add custom seasonalities
model_prophet.add_seasonality(
name='monthly',
period=30.5,
fourier_order=5
)
model_prophet.fit(train_prophet)
# Make forecast
future = model_prophet.make_future_dataframe(periods=len(test), freq='D')
forecast_prophet = model_prophet.predict(future)
# Plot components
fig = model_prophet.plot_components(forecast_prophet)
plt.show()
# Plot forecast
fig = model_prophet.plot(forecast_prophet)
plt.plot(test_prophet['ds'], test_prophet['y'], 'r.', label='Actual')
plt.legend()
plt.show()
Model Evaluation
from sklearn.metrics import (
mean_absolute_error,
mean_squared_error,
mean_absolute_percentage_error
)
def evaluate_forecast(actual, predicted):
"""Calculate comprehensive forecast metrics"""
mae = mean_absolute_error(actual, predicted)
rmse = np.sqrt(mean_squared_error(actual, predicted))
mape = mean_absolute_percentage_error(actual, predicted) * 100
# Symmetric MAPE
smape = np.mean(2 * np.abs(actual - predicted) /
(np.abs(actual) + np.abs(predicted))) * 100
# Mean Absolute Scaled Error (vs naive forecast)
naive_forecast = actual.shift(1).dropna()
actual_trimmed = actual[1:]
mase = np.mean(np.abs(actual_trimmed - predicted[1:])) / \
np.mean(np.abs(actual_trimmed - naive_forecast))
metrics = {
'MAE': mae,
'RMSE': rmse,
'MAPE': mape,
'SMAPE': smape,
'MASE': mase
}
return metrics
# Evaluate ARIMA
arima_metrics = evaluate_forecast(test, forecast)
print("ARIMA Metrics:")
for metric, value in arima_metrics.items():
print(f" {metric}: {value:.4f}")
# Evaluate Prophet
prophet_forecast = forecast_prophet.set_index('ds').loc[test.index, 'yhat']
prophet_metrics = evaluate_forecast(test.values, prophet_forecast.values)
print("\nProphet Metrics:")
for metric, value in prophet_metrics.items():
print(f" {metric}: {value:.4f}")
Diagnostics
from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.gofplots import qqplot
def diagnostic_plots(fitted_model, ts):
"""Plot diagnostic checks for model residuals"""
residuals = fitted_model.resid
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# Residuals over time
axes[0, 0].plot(residuals)
axes[0, 0].axhline(y=0, color='r', linestyle='--')
axes[0, 0].set_title('Residuals Over Time')
# Histogram
axes[0, 1].hist(residuals, bins=50, edgecolor='black', density=True)
axes[0, 1].set_title('Residual Distribution')
# ACF of residuals
from statsmodels.graphics.tsaplots import plot_acf
plot_acf(residuals.dropna(), lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF of Residuals')
# Q-Q plot
qqplot(residuals.dropna(), line='45', fit=True, ax=axes[1, 1])
axes[1, 1].set_title('Q-Q Plot')
plt.tight_layout()
plt.show()
# Ljung-Box test
lb_test = acorr_ljungbox(residuals.dropna(), lags=[10, 20], return_df=True)
print("Ljung-Box Test:")
print(lb_test)
diagnostic_plots(fitted_sarima, train)
Key Takeaways
- Use auto_arima to find optimal (p,d,q) parameters
- SARIMA handles seasonal patterns with (P,D,Q,s)
- Prophet is more robust to missing data and outliers
- Always check residuals for white noise
- Compare multiple models using MAPE, RMSE, and MASE