Interview Question: "Explain the difference between valid time and transaction time. How do you query historical data? What are the benefits of system-versioned temporal tables?" — Asked at Oracle, SAP, Teradata for Data Architect roles
ℹ️
Difficulty: Advanced | Companies: Oracle, SAP, Teradata, Microsoft, IBM | Time: 60-75 minutes
Temporal Data Model
Temporal databases track data across time with two dimensions:
-- Create bitemporal employee table
CREATE TABLE employees_temporal (
emp_id INT,
name VARCHAR(100),
department VARCHAR(50),
salary DECIMAL(12,2),
-- Valid time: when the fact is true in reality
valid_start DATE,
valid_end DATE DEFAULT '9999-12-31',
-- Transaction time: when the fact was stored in database
txn_start TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
txn_end TIMESTAMP DEFAULT '9999-12-31 23:59:59',
PRIMARY KEY (emp_id, valid_start, txn_start)
);
-- Insert initial data
INSERT INTO employees_temporal VALUES
(1, 'Alice', 'Engineering', 100000.00, '2020-01-01', '9999-12-31', '2020-01-01 10:00:00', '9999-12-31 23:59:59'),
(2, 'Bob', 'Marketing', 85000.00, '2020-01-01', '9999-12-31', '2020-01-01 10:00:00', '9999-12-31 23:59:59'),
(3, 'Charlie', 'Engineering', 95000.00, '2020-01-01', '9999-12-31', '2020-01-01 10:00:00', '9999-12-31 23:59:59');
-- Update Alice's salary (creates new version)
UPDATE employees_temporal
SET valid_end = '2022-12-31', txn_end = CURRENT_TIMESTAMP
WHERE emp_id = 1 AND valid_end = '9999-12-31';
INSERT INTO employees_temporal VALUES
(1, 'Alice', 'Engineering', 120000.00, '2023-01-01', '9999-12-31', CURRENT_TIMESTAMP, '9999-12-31 23:59:59');
-- Update Bob's department
UPDATE employees_temporal
SET valid_end = '2023-06-30', txn_end = CURRENT_TIMESTAMP
WHERE emp_id = 2 AND valid_end = '9999-12-31';
INSERT INTO employees_temporal VALUES
(2, 'Bob', 'Sales', 90000.00, '2023-07-01', '9999-12-31', CURRENT_TIMESTAMP, '9999-12-31 23:59:59');
System-Versioned Temporal Tables
PostgreSQL doesn't have built-in system-versioning, but we can implement it:
-- Create audit log table
CREATE TABLE employees_audit (
audit_id SERIAL PRIMARY KEY,
emp_id INT,
name VARCHAR(100),
department VARCHAR(50),
salary DECIMAL(12,2),
valid_start DATE,
valid_end DATE,
operation CHAR(1), -- 'I'nsert, 'U'pdate, 'D'elete
changed_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
changed_by VARCHAR(100) DEFAULT CURRENT_USER
);
-- Create trigger function for auto-versioning
CREATE OR REPLACE FUNCTION temporal_versioning()
RETURNS TRIGGER AS $$
BEGIN
IF TG_OP = 'UPDATE' THEN
-- Close old version
INSERT INTO employees_audit
(emp_id, name, department, salary, valid_start, valid_end, operation)
VALUES
(OLD.emp_id, OLD.name, OLD.department, OLD.salary,
OLD.valid_start, OLD.valid_end, 'U');
-- Update valid_end of old record
NEW.valid_end := CURRENT_DATE;
OLD.valid_end := CURRENT_DATE;
RETURN OLD;
ELSIF TG_OP = 'DELETE' THEN
INSERT INTO employees_audit
(emp_id, name, department, salary, valid_start, valid_end, operation)
VALUES
(OLD.emp_id, OLD.name, OLD.department, OLD.salary,
OLD.valid_start, OLD.valid_end, 'D');
RETURN OLD;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER temporal_trigger
BEFORE UPDATE OR DELETE ON employees_temporal
FOR EACH ROW EXECUTE FUNCTION temporal_versioning();
Time Travel Queries
-- Query as of specific date
SELECT emp_id, name, department, salary
FROM employees_temporal
WHERE valid_start <= '2023-06-15'
AND valid_end > '2023-06-15'
AND txn_start <= '2023-06-15 12:00:00'
AND txn_end > '2023-06-15 12:00:00';
-- Query current state
SELECT emp_id, name, department, salary
FROM employees_temporal
WHERE valid_end = '9999-12-31'
AND txn_end = '9999-12-31 23:59:59';
-- Query history of specific employee
SELECT
emp_id,
name,
department,
salary,
valid_start,
valid_end,
txn_start,
EXTRACT(DAY FROM valid_end - valid_start) AS days_in_position
FROM employees_temporal
WHERE emp_id = 1
ORDER BY valid_start;
-- Find all changes during period
SELECT
emp_id,
name,
department,
salary,
valid_start,
valid_end,
operation,
changed_at,
changed_by
FROM employees_audit
WHERE changed_at BETWEEN '2023-01-01' AND '2023-12-31'
ORDER BY changed_at;
Time Travel Output:
| emp_id | name | department | salary | valid_start | valid_end | days_in_position |
|---|---|---|---|---|---|---|
| 1 | Alice | Engineering | 100000.00 | 2020-01-01 | 2022-12-31 | 1095 |
| 1 | Alice | Engineering | 120000.00 | 2023-01-01 | 9999-12-31 | 2922065 |
Bitemporal Query Patterns
-- Bitemporal range overlap query
-- Find all versions that were valid and stored during specific period
SELECT emp_id, name, department, salary
FROM employees_temporal
WHERE valid_start < '2023-12-31'
AND valid_end > '2023-01-01'
AND txn_start < '2023-12-31 23:59:59'
AND txn_end > '2023-01-01 00:00:00';
-- Bitemporal intersection
-- Find versions valid at time T1 that were stored at time T2
WITH query_time AS (
SELECT
'2023-06-15'::DATE AS valid_time,
'2023-07-01 12:00:00'::TIMESTAMP AS txn_time
)
SELECT e.emp_id, e.name, e.department, e.salary
FROM employees_temporal e, query_time q
WHERE e.valid_start <= q.valid_time
AND e.valid_end > q.valid_time
AND e.txn_start <= q.txn_time
AND e.txn_end > q.txn_time;
-- Temporal join: Match employees with their departments at specific time
CREATE TABLE departments_temporal (
dept_id INT,
dept_name VARCHAR(50),
budget DECIMAL(15,2),
valid_start DATE,
valid_end DATE DEFAULT '9999-12-31'
);
INSERT INTO departments_temporal VALUES
(1, 'Engineering', 500000.00, '2020-01-01', '9999-12-31'),
(2, 'Marketing', 300000.00, '2020-01-01', '9999-12-31'),
(3, 'Sales', 400000.00, '2023-07-01', '9999-12-31');
SELECT
e.emp_id,
e.name,
e.salary,
d.dept_name,
d.budget
FROM employees_temporal e
JOIN departments_temporal d ON e.department = d.dept_name
WHERE e.valid_start <= '2023-06-15'
AND e.valid_end > '2023-06-15'
AND d.valid_start <= '2023-06-15'
AND d.valid_end > '2023-06-15';
Period Predicates
The SQL standard defines temporal predicates:
-- Implement period predicates
CREATE OR REPLACE FUNCTION period_overlaps(
start1 DATE, end1 DATE,
start2 DATE, end2 DATE
) RETURNS BOOLEAN AS $$
BEGIN
RETURN start1 <= end2 AND start2 <= end1;
END;
$$ LANGUAGE plpgsql;
CREATE OR REPLACE FUNCTION period_precedes(
start1 DATE, end1 DATE,
start2 DATE, end2 DATE
) RETURNS BOOLEAN AS $$
BEGIN
RETURN end1 < start2;
END;
$$ LANGUAGE plpgsql;
-- Query using period predicates
SELECT
emp_id,
name,
valid_start,
valid_end,
CASE
WHEN period_precedes(valid_start, valid_end, '2023-01-01', '2023-12-31')
THEN 'Precedes query period'
WHEN valid_start <= '2023-12-31' AND '2023-01-01' <= valid_end
THEN 'Overlaps query period'
ELSE 'Succeeds query period'
END AS temporal_relationship
FROM employees_temporal
WHERE emp_id = 1
ORDER BY valid_start;
Temporal Aggregation
-- Aggregate over temporal periods
WITH continuous_periods AS (
SELECT
emp_id,
name,
salary,
valid_start,
valid_end,
-- Find gaps in time
LAG(valid_end) OVER (PARTITION BY emp_id ORDER BY valid_start) AS prev_end
FROM employees_temporal
)
SELECT
emp_id,
name,
MIN(valid_start) AS period_start,
MAX(valid_end) AS period_end,
COUNT(*) AS version_count,
AVG(salary) AS avg_salary,
SUM(valid_end - valid_start) AS total_days
FROM continuous_periods
GROUP BY emp_id, name
ORDER BY emp_id;
-- Temporal lead/lag analysis
SELECT
emp_id,
name,
salary,
valid_start,
valid_end,
LAG(salary) OVER (PARTITION BY emp_id ORDER BY valid_start) AS prev_salary,
salary - LAG(salary) OVER (PARTITION BY emp_id ORDER BY valid_start) AS salary_change,
ROUND(
(salary - LAG(salary) OVER (PARTITION BY emp_id ORDER BY valid_start)) * 100.0 /
NULLIF(LAG(salary) OVER (PARTITION BY emp_id ORDER BY valid_start), 0),
2
) AS pct_change
FROM employees_temporal
ORDER BY emp_id, valid_start;
Temporal Constraints
-- Ensure no overlapping valid times for same entity
CREATE OR REPLACE FUNCTION check_no_overlap()
RETURNS TRIGGER AS $$
BEGIN
IF EXISTS (
SELECT 1 FROM employees_temporal
WHERE emp_id = NEW.emp_id
AND valid_start < NEW.valid_end
AND valid_end > NEW.valid_start
AND txn_start != NEW.txn_start
) THEN
RAISE EXCEPTION 'Overlapping valid times for employee %', NEW.emp_id;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER prevent_overlap
BEFORE INSERT OR UPDATE ON employees_temporal
FOR EACH ROW EXECUTE FUNCTION check_no_overlap();
Mathematical Properties
For bitemporal intervals and :
The volume of a bitemporal region:
ℹ️
ISO 8601: Use ISO 8601 format for temporal data: 2023-06-15T12:30:00Z for timestamps, 2023-06-15 for dates.
Temporal Indexing
-- Index for temporal queries
CREATE INDEX idx_employees_temporal_valid
ON employees_temporal(valid_start, valid_end);
CREATE INDEX idx_employees_temporal_txn
ON employees_temporal(txn_start, txn_end);
-- Composite index for bitemporal queries
CREATE INDEX idx_employees_bitemporal
ON employees_temporal(emp_id, valid_start, valid_end, txn_start, txn_end);
-- GiST index for range queries
CREATE INDEX idx_employees_valid_range
ON employees_temporal USING gist(
tstzrange(valid_start::timestamp, valid_end::timestamp)
);
Temporal Report Patterns
-- Generate monthly snapshot report
WITH months AS (
SELECT generate_series(
'2020-01-01'::date,
'2024-01-01'::date,
'1 month'::interval
)::date AS month_start
)
SELECT
m.month_start,
e.emp_id,
e.name,
e.department,
e.salary
FROM months m
CROSS JOIN employees_temporal e
WHERE e.valid_start <= m.month_start
AND e.valid_end > m.month_start
ORDER BY m.month_start, e.emp_id;
-- Year-over-year comparison
SELECT
e1.emp_id,
e1.name,
e1.salary AS salary_2023,
e2.salary AS salary_2024,
e2.salary - e1.salary AS salary_change,
ROUND((e2.salary - e1.salary) * 100.0 / e1.salary, 2) AS pct_change
FROM employees_temporal e1
JOIN employees_temporal e2 ON e1.emp_id = e2.emp_id
WHERE e1.valid_start <= '2023-06-15'
AND e1.valid_end > '2023-06-15'
AND e2.valid_start <= '2024-06-15'
AND e2.valid_end > '2024-06-15';
⚠️
Storage Overhead: Temporal tables can grow significantly. Consider partitioning by time period and archiving old versions.