CW

Time Series Basics: Trend, Seasonality and Stationarity

Module 10: Specialized MLFree Lesson

Advertisement

Time Series Basics: Trend, Seasonality and Stationarity

Time series data requires special handling due to temporal dependencies. This lesson covers fundamental concepts for time series analysis.

Time Series Components

<svg width="600" height="400" viewBox="0 0 600 400" xmlns="http://www.w3.org/2000/svg">
  <rect width="600" height="400" fill="#f8f9fa" rx="10"/>
  <text x="300" y="30" text-anchor="middle" font-size="18" font-weight="bold" fill="#2c3e50">Time Series Decomposition</text>
  
  <!-- Original Series -->
  <text x="50" y="70" font-size="12" font-weight="bold" fill="#2c3e50">Y(t) = T(t) + S(t) + R(t)</text>
  
  <rect x="50" y="80" width="500" height="60" fill="white" stroke="#3498db" stroke-width="2" rx="5"/>
  <text x="300" y="115" text-anchor="middle" font-size="12" fill="#3498db">Original Series (Y)</text>
  <path d="M60,110 Q150,90 250,110 T450,100 T540,105" stroke="#3498db" stroke-width="2" fill="none"/>
  
  <!-- Trend -->
  <rect x="50" y="150" width="500" height="60" fill="white" stroke="#2ecc71" stroke-width="2" rx="5"/>
  <text x="300" y="185" text-anchor="middle" font-size="12" fill="#2ecc71">Trend (T) - Long-term movement</text>
  <path d="M60,195 L540,175" stroke="#2ecc71" stroke-width="2" fill="none"/>
  
  <!-- Seasonality -->
  <rect x="50" y="220" width="500" height="60" fill="white" stroke="#e74c3c" stroke-width="2" rx="5"/>
  <text x="300" y="255" text-anchor="middle" font-size="12" fill="#e74c3c">Seasonal (S) - Regular patterns</text>
  <path d="M60,260 Q100,245 140,260 T220,260 T300,260 T380,260 T460,260 T540,260" stroke="#e74c3c" stroke-width="2" fill="none"/>
  
  <!-- Residual -->
  <rect x="50" y="290" width="500" height="60" fill="white" stroke="#f39c12" stroke-width="2" rx="5"/>
  <text x="300" y="325" text-anchor="middle" font-size="12" fill="#f39c12">Residual (R) - Random noise</text>
  <path d="M60,330 L100,320 L140,335 L180,325 L220,330 L260,315 L300,340 L340,320 L380,335 L420,325 L460,330 L500,320 L540,335" stroke="#f39c12" stroke-width="2" fill="none"/>
  
  <text x="300" y="380" text-anchor="middle" font-size="11" fill="#7f8c8d">Additive model: Y = T + S + R | Multiplicative: Y = T × S × R</text>
</svg>

Loading and Preparing Time Series

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

# Load time series data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
df = df.set_index('date')

# Create time series
ts = df['sales']

# Ensure regular frequency
ts = ts.asfreq('D')  # Daily frequency
ts = ts.fillna(method='ffill')  # Forward fill missing values

# Check frequency
print(f"Frequency: {ts.index.freq}")
print(f"Date range: {ts.index.min()} to {ts.index.max()}")
print(f"Length: {len(ts)}")

Visual Inspection

def plot_time_series(ts, title="Time Series"):
    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
    
    # Raw series
    axes[0, 0].plot(ts, linewidth=0.8)
    axes[0, 0].set_title(f'{title} - Raw')
    axes[0, 0].set_ylabel('Value')
    
    # Rolling statistics
    rolling_mean = ts.rolling(window=12).mean()
    rolling_std = ts.rolling(window=12).std()
    axes[0, 1].plot(ts, linewidth=0.8, label='Original')
    axes[0, 1].plot(rolling_mean, label='Rolling Mean')
    axes[0, 1].plot(rolling_std, label='Rolling Std')
    axes[0, 1].legend()
    axes[0, 1].set_title('Rolling Statistics')
    
    # Distribution
    axes[1, 0].hist(ts.dropna(), bins=50, edgecolor='black')
    axes[1, 0].set_title('Distribution')
    
    # Monthly boxplot
    ts_box = ts.copy()
    ts_box.index = ts_box.index.month
    ts_box.boxplot(ax=axes[1, 1])
    axes[1, 1].set_title('Monthly Distribution')
    axes[1, 1].set_xlabel('Month')
    
    plt.tight_layout()
    plt.show()

plot_time_series(ts)

Time Series Decomposition

from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL

# Additive decomposition
decomposition_add = seasonal_decompose(ts, model='additive', period=12)
fig = decomposition_add.plot()
plt.tight_layout()
plt.show()

# Multiplicative decomposition
decomposition_mul = seasonal_decompose(ts, model='multiplicative', period=12)
fig = decomposition_mul.plot()
plt.tight_layout()
plt.show()

# STL decomposition (robust to outliers)
stl = STL(ts, period=12, robust=True)
result = stl.fit()
fig = result.plot()
plt.tight_layout()
plt.show()

Stationarity Tests

from statsmodels.tsa.stattools import adfuller, kpss, acf, pacf

def adf_test(series):
    """Augmented Dickey-Fuller test"""
    result = adfuller(series.dropna())
    print('ADF Test Results:')
    print(f'ADF Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print('Critical Values:')
    for key, value in result[4].items():
        print(f'  {key}: {value:.4f}')
    print(f'Stationary: {result[1] < 0.05}')

def kpss_test(series):
    """KPSS test (opposite hypotheses)"""
    result = kpss(series.dropna(), regression='ct', nlags='auto')
    print('KPSS Test Results:')
    print(f'KPSS Statistic: {result[0]:.4f}')
    print(f'p-value: {result[1]:.4f}')
    print(f'Stationary: {result[1] > 0.05}')

# Test for stationarity
print("Original Series:")
adf_test(ts)
kpss_test(ts)

Making Series Stationary

# 1. Differencing
ts_diff1 = ts.diff().dropna()
ts_diff2 = ts.diff().diff().dropna()

# Seasonal differencing
ts_seasonal_diff = ts.diff(12).dropna()

# 2. Log transformation (for multiplicative seasonality)
ts_log = np.log(ts)
ts_log_diff = ts_log.diff().dropna()

# 3. Box-Cox transformation
from scipy.stats import boxcox
ts_boxcox, lambda_opt = boxcox(ts.dropna())

# Plot transformed series
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].plot(ts_diff1)
axes[0].set_title('First Difference')
axes[1].plot(ts_seasonal_diff)
axes[1].set_title('Seasonal Difference')
axes[2].plot(ts_log_diff)
axes[2].set_title('Log Transform + Difference')
plt.tight_layout()
plt.show()

# Test stationarity after transformation
print("\nAfter Differencing:")
adf_test(ts_diff1)

ACF and PACF

from statsmodels.graphics.tsaplots import plot_acf, plot_pacf

fig, axes = plt.subplots(2, 2, figsize=(12, 8))

# ACF and PACF for original series
plot_acf(ts.dropna(), lags=40, ax=axes[0, 0])
axes[0, 0].set_title('ACF - Original')

plot_pacf(ts.dropna(), lags=40, ax=axes[0, 1])
axes[0, 1].set_title('PACF - Original')

# ACF and PACF for differenced series
plot_acf(ts_diff1, lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF - Differenced')

plot_pacf(ts_diff1, lags=40, ax=axes[1, 1])
axes[1, 1].set_title('PACF - Differenced')

plt.tight_layout()
plt.show()

Autocorrelation Analysis

def analyze_autocorrelation(ts, max_lags=40):
    """Analyze ACF and PACF patterns"""
    acf_values = acf(ts.dropna(), nlags=max_lags)
    pacf_values = pacf(ts.dropna(), nlags=max_lags)
    
    # Find significant lags
    n = len(ts.dropna())
    significance_level = 1.96 / np.sqrt(n)
    
    significant_acf = np.where(np.abs(acf_values) > significance_level)[0]
    significant_pacf = np.where(np.abs(pacf_values) > significance_level)[0]
    
    print(f"Significant ACF lags: {significant_acf[1:]}")
    print(f"Significant PACF lags: {significant_pacf[1:]}")
    
    # Interpret patterns
    if len(significant_acf) > 2:
        print("Pattern: Slow decay in ACF suggests non-stationarity")
    if len(significant_pacf) > 0:
        print(f"Pattern: PACF cuts off after lag {significant_pacf[-1]}")
    
    return acf_values, pacf_values

acf_vals, pacf_vals = analyze_autocorrelation(ts_diff1)

Key Takeaways

  1. Always visualize time series before modeling
  2. Test for stationarity using ADF and KPSS tests
  3. Use differencing or transformations to achieve stationarity
  4. ACF/PACF plots help identify model orders
  5. Consider seasonal patterns in decomposition

Advertisement

Need Expert Data Science Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement