Time Series Forecasting with LLMs
Time Series Tokenization
LLMs process sequential tokens. Converting time series to token-like representations enables leveraging pre-trained language models for temporal forecasting.
import numpy as np
import pandas as pd
from typing import List, Tuple
class TimeSeriesTokenizer:
def __init__(self, vocab_size: int = 256):
self.vocab_size = vocab_size
self.breakpoints = None
def fit(self, data: np.ndarray):
self.breakpoints = np.percentile(
data,
np.linspace(0, 100, self.vocab_size + 1)[1:-1]
)
def tokenize(self, data: np.ndarray) -> np.ndarray:
return np.digitize(data, self.breakpoints)
def detokenize(self, tokens: np.ndarray) -> np.ndarray:
return self.breakpoints[tokens.clip(0, len(self.breakpoints) - 1)]
class TimeSeriesTokenizer:
def __init__(self, window_size: int = 512, stride: int = 128):
self.window_size = window_size
self.stride = stride
def create_windows(self, series: np.ndarray) -> np.ndarray:
n_windows = (len(series) - self.window_size) // self.stride + 1
windows = np.zeros((n_windows, self.window_size))
for i in range(n_windows):
start = i * self.stride
windows[i] = series[start:start + self.window_size]
return windows
tokenizer = TimeSeriesTokenizer(window_size=512)
windows = tokenizer.create_windows(time_series_data)
LLM-Based Forecasting
import openai
from typing import List, Dict
class LLMForecaster:
def __init__(self, api_key: str):
self.client = openai.OpenAI(api_key=api_key)
def forecast(
self,
historical_values: List[float],
forecast_horizon: int = 12,
frequency: str = "daily"
) -> Dict:
values_str = ", ".join([f"{v:.2f}" for v in historical_values[-100:]])
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": """You are a time series forecasting expert.
Analyze the data and provide forecasts with confidence intervals.
Return JSON with "forecast", "lower_bound", "upper_bound", and "explanation" fields."""},
{"role": "user", "content": f"""Historical {frequency} values: {values_str}
Provide {forecast_horizon}-step ahead forecast."""}
],
temperature=0.2,
response_format={"type": "json_object"}
)
import json
return json.loads(response.choices[0].message.content)
def explain_patterns(self, values: List[float]) -> str:
values_str = ", ".join([f"{v:.2f}" for v in values[-50:]])
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "Analyze time series patterns and provide insights."},
{"role": "user", "content": f"Analyze these values: {values_str}"}
],
temperature=0.3
)
return response.choices[0].message.content
forecaster = LLMForecaster(api_key="your-api-key")
forecast = forecaster.forecast(
historical_values=[100, 102, 105, 103, 108, 112, 110, 115],
forecast_horizon=7
)
print(f"Forecast: {forecast['forecast']}")
Traditional Time Series with Deep Learning
import torch
import torch.nn as nn
class TemporalFusionTransformer(nn.Module):
def __init__(
self,
input_dim: int,
hidden_dim: int = 128,
num_heads: int = 4,
forecast_horizon: int = 12
):
super().__init__()
self.input_projection = nn.Linear(input_dim, hidden_dim)
self.temporal_encoding = nn.Parameter(
torch.randn(1, 512, hidden_dim)
)
self.attention = nn.MultiheadAttention(
hidden_dim, num_heads, batch_first=True
)
self.forecast_head = nn.Sequential(
nn.Linear(hidden_dim, hidden_dim),
nn.ReLU(),
nn.Linear(hidden_dim, forecast_horizon)
)
def forward(self, x: torch.Tensor) -> torch.Tensor:
batch_size, seq_len, _ = x.shape
projected = self.input_projection(x)
encoded = projected + self.temporal_encoding[:, :seq_len, :]
attended, _ = self.attention(encoded, encoded, encoded)
context = attended[:, -1, :]
forecast = self.forecast_head(context)
return forecast
model = TemporalFusionTransformer(input_dim=10, forecast_horizon=24)
predictions = model(time_series_tensor)
Evaluation Metrics
import numpy as np
class TimeSeriesEvaluator:
@staticmethod
def mae(actual: np.ndarray, predicted: np.ndarray) -> float:
return np.mean(np.abs(actual - predicted))
@staticmethod
def rmse(actual: np.ndarray, predicted: np.ndarray) -> float:
return np.sqrt(np.mean((actual - predicted) ** 2))
@staticmethod
def mape(actual: np.ndarray, predicted: np.ndarray) -> float:
return np.mean(np.abs((actual - predicted) / actual)) * 100
@staticmethod
def mase(actual: np.ndarray, predicted: np.ndarray, seasonal_period: int = 1) -> float:
naive_errors = np.abs(actual[seasonal_period:] - actual[:-seasonal_period])
forecast_errors = np.abs(actual - predicted)
return np.mean(forecast_errors) / np.mean(naive_errors)
evaluator = TimeSeriesEvaluator()
print(f"MAE: {evaluator.mae(actual, predicted):.4f}")
print(f"RMSE: {evaluator.rmse(actual, predicted):.4f}")
Best Practices
- Normalize time series before feeding to LLMs
- Include temporal context (day of week, month, etc.)
- Use appropriate window sizes for different frequencies
- Combine statistical methods with neural approaches
- Validate with walk-forward cross-validation
- Provide uncertainty estimates with predictions