Time Series Basics: Trend, Seasonality and Stationarity
Time series data requires special handling due to temporal dependencies. This lesson covers fundamental concepts for time series analysis.
Time Series Components
<svg width="600" height="400" viewBox="0 0 600 400" xmlns="http://www.w3.org/2000/svg">
<rect width="600" height="400" fill="#f8f9fa" rx="10"/>
<text x="300" y="30" text-anchor="middle" font-size="18" font-weight="bold" fill="#2c3e50">Time Series Decomposition</text>
<!-- Original Series -->
<text x="50" y="70" font-size="12" font-weight="bold" fill="#2c3e50">Y(t) = T(t) + S(t) + R(t)</text>
<rect x="50" y="80" width="500" height="60" fill="white" stroke="#3498db" stroke-width="2" rx="5"/>
<text x="300" y="115" text-anchor="middle" font-size="12" fill="#3498db">Original Series (Y)</text>
<path d="M60,110 Q150,90 250,110 T450,100 T540,105" stroke="#3498db" stroke-width="2" fill="none"/>
<!-- Trend -->
<rect x="50" y="150" width="500" height="60" fill="white" stroke="#2ecc71" stroke-width="2" rx="5"/>
<text x="300" y="185" text-anchor="middle" font-size="12" fill="#2ecc71">Trend (T) - Long-term movement</text>
<path d="M60,195 L540,175" stroke="#2ecc71" stroke-width="2" fill="none"/>
<!-- Seasonality -->
<rect x="50" y="220" width="500" height="60" fill="white" stroke="#e74c3c" stroke-width="2" rx="5"/>
<text x="300" y="255" text-anchor="middle" font-size="12" fill="#e74c3c">Seasonal (S) - Regular patterns</text>
<path d="M60,260 Q100,245 140,260 T220,260 T300,260 T380,260 T460,260 T540,260" stroke="#e74c3c" stroke-width="2" fill="none"/>
<!-- Residual -->
<rect x="50" y="290" width="500" height="60" fill="white" stroke="#f39c12" stroke-width="2" rx="5"/>
<text x="300" y="325" text-anchor="middle" font-size="12" fill="#f39c12">Residual (R) - Random noise</text>
<path d="M60,330 L100,320 L140,335 L180,325 L220,330 L260,315 L300,340 L340,320 L380,335 L420,325 L460,330 L500,320 L540,335" stroke="#f39c12" stroke-width="2" fill="none"/>
<text x="300" y="380" text-anchor="middle" font-size="11" fill="#7f8c8d">Additive model: Y = T + S + R | Multiplicative: Y = T × S × R</text>
</svg>
Loading and Preparing Time Series
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime, timedelta
# Load time series data
df = pd.read_csv('sales_data.csv', parse_dates=['date'])
df = df.set_index('date')
# Create time series
ts = df['sales']
# Ensure regular frequency
ts = ts.asfreq('D') # Daily frequency
ts = ts.fillna(method='ffill') # Forward fill missing values
# Check frequency
print(f"Frequency: {ts.index.freq}")
print(f"Date range: {ts.index.min()} to {ts.index.max()}")
print(f"Length: {len(ts)}")
Visual Inspection
def plot_time_series(ts, title="Time Series"):
fig, axes = plt.subplots(2, 2, figsize=(14, 10))
# Raw series
axes[0, 0].plot(ts, linewidth=0.8)
axes[0, 0].set_title(f'{title} - Raw')
axes[0, 0].set_ylabel('Value')
# Rolling statistics
rolling_mean = ts.rolling(window=12).mean()
rolling_std = ts.rolling(window=12).std()
axes[0, 1].plot(ts, linewidth=0.8, label='Original')
axes[0, 1].plot(rolling_mean, label='Rolling Mean')
axes[0, 1].plot(rolling_std, label='Rolling Std')
axes[0, 1].legend()
axes[0, 1].set_title('Rolling Statistics')
# Distribution
axes[1, 0].hist(ts.dropna(), bins=50, edgecolor='black')
axes[1, 0].set_title('Distribution')
# Monthly boxplot
ts_box = ts.copy()
ts_box.index = ts_box.index.month
ts_box.boxplot(ax=axes[1, 1])
axes[1, 1].set_title('Monthly Distribution')
axes[1, 1].set_xlabel('Month')
plt.tight_layout()
plt.show()
plot_time_series(ts)
Time Series Decomposition
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.tsa.seasonal import STL
# Additive decomposition
decomposition_add = seasonal_decompose(ts, model='additive', period=12)
fig = decomposition_add.plot()
plt.tight_layout()
plt.show()
# Multiplicative decomposition
decomposition_mul = seasonal_decompose(ts, model='multiplicative', period=12)
fig = decomposition_mul.plot()
plt.tight_layout()
plt.show()
# STL decomposition (robust to outliers)
stl = STL(ts, period=12, robust=True)
result = stl.fit()
fig = result.plot()
plt.tight_layout()
plt.show()
Stationarity Tests
from statsmodels.tsa.stattools import adfuller, kpss, acf, pacf
def adf_test(series):
"""Augmented Dickey-Fuller test"""
result = adfuller(series.dropna())
print('ADF Test Results:')
print(f'ADF Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')
print('Critical Values:')
for key, value in result[4].items():
print(f' {key}: {value:.4f}')
print(f'Stationary: {result[1] < 0.05}')
def kpss_test(series):
"""KPSS test (opposite hypotheses)"""
result = kpss(series.dropna(), regression='ct', nlags='auto')
print('KPSS Test Results:')
print(f'KPSS Statistic: {result[0]:.4f}')
print(f'p-value: {result[1]:.4f}')
print(f'Stationary: {result[1] > 0.05}')
# Test for stationarity
print("Original Series:")
adf_test(ts)
kpss_test(ts)
Making Series Stationary
# 1. Differencing
ts_diff1 = ts.diff().dropna()
ts_diff2 = ts.diff().diff().dropna()
# Seasonal differencing
ts_seasonal_diff = ts.diff(12).dropna()
# 2. Log transformation (for multiplicative seasonality)
ts_log = np.log(ts)
ts_log_diff = ts_log.diff().dropna()
# 3. Box-Cox transformation
from scipy.stats import boxcox
ts_boxcox, lambda_opt = boxcox(ts.dropna())
# Plot transformed series
fig, axes = plt.subplots(1, 3, figsize=(15, 4))
axes[0].plot(ts_diff1)
axes[0].set_title('First Difference')
axes[1].plot(ts_seasonal_diff)
axes[1].set_title('Seasonal Difference')
axes[2].plot(ts_log_diff)
axes[2].set_title('Log Transform + Difference')
plt.tight_layout()
plt.show()
# Test stationarity after transformation
print("\nAfter Differencing:")
adf_test(ts_diff1)
ACF and PACF
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
fig, axes = plt.subplots(2, 2, figsize=(12, 8))
# ACF and PACF for original series
plot_acf(ts.dropna(), lags=40, ax=axes[0, 0])
axes[0, 0].set_title('ACF - Original')
plot_pacf(ts.dropna(), lags=40, ax=axes[0, 1])
axes[0, 1].set_title('PACF - Original')
# ACF and PACF for differenced series
plot_acf(ts_diff1, lags=40, ax=axes[1, 0])
axes[1, 0].set_title('ACF - Differenced')
plot_pacf(ts_diff1, lags=40, ax=axes[1, 1])
axes[1, 1].set_title('PACF - Differenced')
plt.tight_layout()
plt.show()
Autocorrelation Analysis
def analyze_autocorrelation(ts, max_lags=40):
"""Analyze ACF and PACF patterns"""
acf_values = acf(ts.dropna(), nlags=max_lags)
pacf_values = pacf(ts.dropna(), nlags=max_lags)
# Find significant lags
n = len(ts.dropna())
significance_level = 1.96 / np.sqrt(n)
significant_acf = np.where(np.abs(acf_values) > significance_level)[0]
significant_pacf = np.where(np.abs(pacf_values) > significance_level)[0]
print(f"Significant ACF lags: {significant_acf[1:]}")
print(f"Significant PACF lags: {significant_pacf[1:]}")
# Interpret patterns
if len(significant_acf) > 2:
print("Pattern: Slow decay in ACF suggests non-stationarity")
if len(significant_pacf) > 0:
print(f"Pattern: PACF cuts off after lag {significant_pacf[-1]}")
return acf_values, pacf_values
acf_vals, pacf_vals = analyze_autocorrelation(ts_diff1)
Key Takeaways
- Always visualize time series before modeling
- Test for stationarity using ADF and KPSS tests
- Use differencing or transformations to achieve stationarity
- ACF/PACF plots help identify model orders
- Consider seasonal patterns in decomposition