CW

ARIMA and Prophet: Time Series Forecasting

Module 10: Specialized MLFree Lesson

Advertisement

ARIMA and Prophet: Time Series Forecasting

This lesson covers two powerful forecasting approaches: classical ARIMA models and Facebook's Prophet for automated forecasting.

ARIMA Model

<svg width="600" height="400" viewBox="0 0 600 400" xmlns="http://www.w3.org/2000/svg">
  <rect width="600" height="400" fill="#f8f9fa" rx="10"/>
  <text x="300" y="30" text-anchor="middle" font-size="18" font-weight="bold" fill="#2c3e50">ARIMA(p,d,q) Components</text>
  
  <!-- AR Component -->
  <rect x="50" y="70" width="160" height="100" fill="#3498db" rx="5"/>
  <text x="130" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">AR(p)</text>
  <text x="130" y="115" text-anchor="middle" font-size="11" fill="white">Autoregressive</text>
  <text x="130" y="135" text-anchor="middle" font-size="10" fill="white">Y_t = c + Φ₁Y_{t-1}</text>
  <text x="130" y="155" text-anchor="middle" font-size="10" fill="white">+ Φ₂Y_{t-2} + ...</text>
  
  <!-- I Component -->
  <rect x="220" y="70" width="160" height="100" fill="#2ecc71" rx="5"/>
  <text x="300" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">I(d)</text>
  <text x="300" y="115" text-anchor="middle" font-size="11" fill="white">Integrated</text>
  <text x="300" y="135" text-anchor="middle" font-size="10" fill="white">Differencing</text>
  <text x="300" y="155" text-anchor="middle" font-size="10" fill="white">d = 1: Y'_t = Y_t - Y_{t-1}</text>
  
  <!-- MA Component -->
  <rect x="390" y="70" width="160" height="100" fill="#e74c3c" rx="5"/>
  <text x="470" y="95" text-anchor="middle" font-size="14" font-weight="bold" fill="white">MA(q)</text>
  <text x="470" y="115" text-anchor="middle" font-size="11" fill="white">Moving Average</text>
  <text x="470" y="135" text-anchor="middle" font-size="10" fill="white">Y_t = c + ε_t</text>
  <text x="470" y="155" text-anchor="middle" font-size="10" fill="white">+ θ₁ε_{t-1} + ...</text>
  
  <!-- Selection Guide -->
  <text x="300" y="210" text-anchor="middle" font-size="14" font-weight="bold" fill="#2c3e50">Model Selection Guide:</text>
  
  <rect x="50" y="230" width="500" height="120" fill="white" stroke="#7f8c8d" stroke-width="1" rx="5"/>
  <text x="70" y="255" font-size="11" fill="#3498db">• ACF cuts off at lag q → MA(q)</text>
  <text x="70" y="275" font-size="11" fill="#3498db">• PACF cuts off at lag p → AR(p)</text>
  <text x="70" y="295" font-size="11" fill="#2ecc71">• Both decay slowly → ARIMA(p,d,q)</text>
  <text x="70" y="315" font-size="11" fill="#e74c3c">• Use AIC/BIC to select best (p,d,q)</text>
  <text x="70" y="335" font-size="11" fill="#7f8c8d">• Auto-ARIMA tests multiple combinations</text>
</svg>

ARIMA Implementation

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.arima.model import ARIMA
from statsmodels.tsa.statespace.sarimax import SARIMAX
from pmdarima import auto_arima
import warnings
warnings.filterwarnings('ignore')

# Load data
ts = pd.read_csv('sales.csv', index_col='date', parse_dates=True)['sales']

# Split train/test
train_size = int(len(ts) * 0.8)
train, test = ts[:train_size], ts[train_size:]

# Auto ARIMA for parameter selection
auto_model = auto_arima(
    train,
    start_p=0, max_p=5,
    start_q=0, max_q=5,
    d=None,  # Let auto_arima determine d
    seasonal=True,
    m=12,  # Seasonal period
    start_P=0, max_P=2,
    start_Q=0, max_Q=2,
    D=None,
    trace=True,
    error_action='ignore',
    suppress_warnings=True,
    stepwise=True
)

print(f"Best ARIMA order: {auto_model.order}")
print(f"Best seasonal order: {auto_model.seasonal_order}")

# Fit ARIMA model
model = ARIMA(train, order=auto_model.order)
fitted = model.fit()
print(fitted.summary())

# Forecast
forecast = fitted.forecast(steps=len(test))

SARIMA (Seasonal ARIMA)

# SARIMA with seasonal component
model_sarima = SARIMAX(
    train,
    order=auto_model.order,
    seasonal_order=auto_model.seasonal_order,
    enforce_stationarity=False,
    enforce_invertibility=False
)

fitted_sarima = model_sarima.fit(disp=False)
print(fitted_sarima.summary())

# Forecast with confidence intervals
forecast_result = fitted_sarima.get_forecast(steps=len(test))
forecast = forecast_result.predicted_mean
conf_int = forecast_result.conf_int()

# Plot forecast
plt.figure(figsize=(12, 6))
plt.plot(train.index, train, label='Training Data')
plt.plot(test.index, test, label='Actual')
plt.plot(test.index, forecast, label='Forecast', color='red')
plt.fill_between(test.index, conf_int.iloc[:, 0], conf_int.iloc[:, 1], 
                 color='pink', alpha=0.3, label='95% CI')
plt.legend()
plt.title('SARIMA Forecast')
plt.show()

Prophet Implementation

from prophet import Prophet
from prophet.plot import plot_plotly, plot_components_plotly

# Prepare data for Prophet (requires 'ds' and 'y' columns)
df_prophet = pd.DataFrame({
    'ds': ts.index,
    'y': ts.values
})

# Split data
train_prophet = df_prophet[:train_size]
test_prophet = df_prophet[train_size:]

# Initialize and fit model
model_prophet = Prophet(
    yearly_seasonality=True,
    weekly_seasonality=True,
    daily_seasonality=False,
    seasonality_mode='multiplicative',
    changepoint_prior_scale=0.05,
    seasonality_prior_scale=10
)

# Add custom seasonalities
model_prophet.add_seasonality(
    name='monthly',
    period=30.5,
    fourier_order=5
)

model_prophet.fit(train_prophet)

# Make forecast
future = model_prophet.make_future_dataframe(periods=len(test), freq='D')
forecast_prophet = model_prophet.predict(future)

# Plot components
fig = model_prophet.plot_components(forecast_prophet)
plt.show()

# Plot forecast
fig = model_prophet.plot(forecast_prophet)
plt.plot(test_prophet['ds'], test_prophet['y'], 'r.', label='Actual')
plt.legend()
plt.show()

Model Evaluation

from sklearn.metrics import (
    mean_absolute_error,
    mean_squared_error,
    mean_absolute_percentage_error
)

def evaluate_forecast(actual, predicted):
    """Calculate comprehensive forecast metrics"""
    mae = mean_absolute_error(actual, predicted)
    rmse = np.sqrt(mean_squared_error(actual, predicted))
    mape = mean_absolute_percentage_error(actual, predicted) * 100
    
    # Symmetric MAPE
    smape = np.mean(2 * np.abs(actual - predicted) / 
                    (np.abs(actual) + np.abs(predicted))) * 100
    
    # Mean Absolute Scaled Error (vs naive forecast)
    naive_forecast = actual.shift(1).dropna()
    actual_trimmed = actual[1:]
    mase = np.mean(np.abs(actual_trimmed - predicted[1:])) / \
           np.mean(np.abs(actual_trimmed - naive_forecast))
    
    metrics = {
        'MAE': mae,
        'RMSE': rmse,
        'MAPE': mape,
        'SMAPE': smape,
        'MASE': mase
    }
    
    return metrics

# Evaluate ARIMA
arima_metrics = evaluate_forecast(test, forecast)
print("ARIMA Metrics:")
for metric, value in arima_metrics.items():
    print(f"  {metric}: {value:.4f}")

# Evaluate Prophet
prophet_forecast = forecast_prophet.set_index('ds').loc[test.index, 'yhat']
prophet_metrics = evaluate_forecast(test.values, prophet_forecast.values)
print("\nProphet Metrics:")
for metric, value in prophet_metrics.items():
    print(f"  {metric}: {value:.4f}")

Diagnostics

from statsmodels.stats.diagnostic import acorr_ljungbox
from statsmodels.graphics.gofplots import qqplot

def diagnostic_plots(fitted_model, ts):
    """Plot diagnostic checks for model residuals"""
    residuals = fitted_model.resid
    
    fig, axes = plt.subplots(2, 2, figsize=(12, 8))
    
    # Residuals over time
    axes[0, 0].plot(residuals)
    axes[0, 0].axhline(y=0, color='r', linestyle='--')
    axes[0, 0].set_title('Residuals Over Time')
    
    # Histogram
    axes[0, 1].hist(residuals, bins=50, edgecolor='black', density=True)
    axes[0, 1].set_title('Residual Distribution')
    
    # ACF of residuals
    from statsmodels.graphics.tsaplots import plot_acf
    plot_acf(residuals.dropna(), lags=40, ax=axes[1, 0])
    axes[1, 0].set_title('ACF of Residuals')
    
    # Q-Q plot
    qqplot(residuals.dropna(), line='45', fit=True, ax=axes[1, 1])
    axes[1, 1].set_title('Q-Q Plot')
    
    plt.tight_layout()
    plt.show()
    
    # Ljung-Box test
    lb_test = acorr_ljungbox(residuals.dropna(), lags=[10, 20], return_df=True)
    print("Ljung-Box Test:")
    print(lb_test)

diagnostic_plots(fitted_sarima, train)

Key Takeaways

  1. Use auto_arima to find optimal (p,d,q) parameters
  2. SARIMA handles seasonal patterns with (P,D,Q,s)
  3. Prophet is more robust to missing data and outliers
  4. Always check residuals for white noise
  5. Compare multiple models using MAPE, RMSE, and MASE

Advertisement

Need Expert Data Science Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement