β° Temporal Analytics
Uber & Netflix Interview Deep Dive
π Interview Question
βΉοΈπ΄ Uber/Netflix Interview Question
"Design a temporal data model for tracking product price history. Implement: 1) SCD Type 2 for slow-changing attributes, 2) Time-travel queries to see data at any point in time, 3) Effective dating for historical analysis, 4) Bi-temporal modeling (valid time vs transaction time)."
Companies: Uber, Netflix | Difficulty: Hard | Time: 45 minutes
π Setup: Product Pricing History
-- Create product pricing history table
CREATE TABLE product_prices (
product_id INT,
product_name VARCHAR(200),
category VARCHAR(100),
price DECIMAL(10, 2),
currency VARCHAR(3) DEFAULT 'USD',
effective_date DATE,
expiration_date DATE,
is_current BOOLEAN DEFAULT true,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Insert historical data
INSERT INTO product_prices (product_id, product_name, category, price, effective_date, expiration_date, is_current) VALUES
(1, 'Laptop Pro', 'Electronics', 1299.99, '2023-01-01', '2023-06-30', false),
(1, 'Laptop Pro', 'Electronics', 1199.99, '2023-07-01', '2023-12-31', false),
(1, 'Laptop Pro', 'Electronics', 1349.99, '2024-01-01', NULL, true),
(2, 'Wireless Mouse', 'Accessories', 49.99, '2023-01-01', '2023-09-30', false),
(2, 'Wireless Mouse', 'Accessories', 59.99, '2023-10-01', NULL, true),
(3, 'USB Cable', 'Accessories', 19.99, '2023-01-01', NULL, true);
π Part 1: SCD Type 2 Implementation
βΉοΈπ SCD Type 2
SCD Type 2 tracks full history by creating new records for each change. Key elements:
- effective_date: When this version became active
- expiration_date: When this version expired (NULL if current)
- is_current: Flag for current record
SCD Type 2 Table Structure
-- Enhanced SCD Type 2 table
CREATE TABLE products_scd2 (
surrogate_key SERIAL PRIMARY KEY,
product_id INT,
product_name VARCHAR(200),
category VARCHAR(100),
price DECIMAL(10, 2),
-- SCD Type 2 columns
effective_date TIMESTAMP DEFAULT NOW(),
expiration_date TIMESTAMP,
is_current BOOLEAN DEFAULT true,
version INT DEFAULT 1,
created_at TIMESTAMP DEFAULT NOW(),
updated_at TIMESTAMP DEFAULT NOW()
);
-- Insert initial records
INSERT INTO products_scd2 (product_id, product_name, category, price, effective_date, is_current, version) VALUES
(1, 'Laptop Pro', 'Electronics', 1299.99, '2023-01-01', true, 1),
(2, 'Wireless Mouse', 'Accessories', 49.99, '2023-01-01', true, 1),
(3, 'USB Cable', 'Accessories', 19.99, '2023-01-01', true, 1);
SCD Type 2 Update Procedure
-- Procedure to update SCD Type 2
CREATE OR REPLACE PROCEDURE update_scd2(
p_product_id INT,
p_new_name VARCHAR,
p_new_category VARCHAR,
p_new_price DECIMAL
)
LANGUAGE plpgsql
AS $$
DECLARE
v_current_record RECORD;
v_hash_diff CHAR(32);
BEGIN
-- Get current record
SELECT * INTO v_current_record
FROM products_scd2
WHERE product_id = p_product_id
AND is_current = true;
-- Calculate hash of new values
v_hash_diff := MD5(p_new_name || p_new_category || p_new_price::text);
-- Check if values changed
IF v_current_record IS NULL OR
MD5(v_current_record.product_name || v_current_record.category || v_current_record.price::text) != v_hash_diff THEN
-- Expire current record
UPDATE products_scd2
SET is_current = false,
expiration_date = NOW(),
updated_at = NOW()
WHERE product_id = p_product_id
AND is_current = true;
-- Insert new version
INSERT INTO products_scd2 (
product_id, product_name, category, price,
effective_date, is_current, version
)
VALUES (
p_product_id, p_new_name, p_new_category, p_new_price,
NOW(), true,
COALESCE(v_current_record.version, 0) + 1
);
RAISE NOTICE 'Updated product % to version %', p_product_id, COALESCE(v_current_record.version, 0) + 1;
ELSE
RAISE NOTICE 'No changes detected for product %', p_product_id;
END IF;
END;
$$;
-- Example update
CALL update_scd2(1, 'Laptop Pro', 'Electronics', 1399.99);
β° Part 2: Time-Travel Queries
-- Query data as of a specific date
SELECT *
FROM products_scd2
WHERE product_id = 1
AND effective_date <= '2023-07-15'
AND (expiration_date > '2023-07-15' OR expiration_date IS NULL);
-- Query all versions of a product
SELECT
product_id,
product_name,
price,
effective_date,
expiration_date,
version,
CASE
WHEN is_current THEN 'Current'
ELSE 'Historical'
END AS status
FROM products_scd2
WHERE product_id = 1
ORDER BY effective_date;
-- Find price at any point in time
CREATE OR REPLACE FUNCTION get_price_at_time(
p_product_id INT,
p_point_in_time TIMESTAMP
)
RETURNS TABLE (
product_name VARCHAR,
price DECIMAL,
effective_date TIMESTAMP
)
LANGUAGE sql
AS $$
SELECT
product_name,
price,
effective_date
FROM products_scd2
WHERE product_id = p_product_id
AND effective_date <= p_point_in_time
AND (expiration_date > p_point_in_time OR expiration_date IS NULL)
ORDER BY effective_date DESC
LIMIT 1;
$$;
-- Usage
SELECT * FROM get_price_at_time(1, '2023-08-15');
π Part 3: Bi-Temporal Modeling
βΉοΈπ Bi-Temporal
Bi-temporal tracks two time dimensions:
- Valid time: When the data was true in the real world
- Transaction time: When the data was recorded in the database
-- Bi-temporal table
CREATE TABLE products_bitemporal (
product_id INT,
product_name VARCHAR(200),
price DECIMAL(10, 2),
-- Valid time: when this was true in reality
valid_from TIMESTAMP,
valid_to TIMESTAMP,
-- Transaction time: when this was recorded
transaction_from TIMESTAMP DEFAULT NOW(),
transaction_to TIMESTAMP,
is_current_valid BOOLEAN DEFAULT true,
is_current_transaction BOOLEAN DEFAULT true
);
-- Insert with bi-temporal data
INSERT INTO products_bitemporal (product_id, product_name, price, valid_from, valid_to, transaction_from) VALUES
(1, 'Laptop Pro', 1299.99, '2023-01-01', '2023-06-30', '2023-01-01'),
(1, 'Laptop Pro', 1199.99, '2023-07-01', '2023-12-31', '2023-07-01'),
(1, 'Laptop Pro', 1349.99, '2024-01-01', NULL, '2024-01-01');
-- Query: What did we think was true on a specific date?
SELECT *
FROM products_bitemporal
WHERE product_id = 1
AND valid_from <= '2023-08-15'
AND (valid_to > '2023-08-15' OR valid_to IS NULL)
AND transaction_from <= '2023-08-15'
AND (transaction_to > '2023-08-15' OR transaction_to IS NULL);
-- Query: What is the current state?
SELECT *
FROM products_bitemporal
WHERE product_id = 1
AND is_current_valid = true
AND is_current_transaction = true;
π Part 4: Time-Series Aggregations
-- Daily price changes
WITH price_changes AS (
SELECT
product_id,
product_name,
effective_date,
price,
LAG(price) OVER (
PARTITION BY product_id
ORDER BY effective_date
) AS previous_price
FROM products_scd2
WHERE is_current = true OR expiration_date IS NOT NULL
)
SELECT
product_id,
product_name,
effective_date,
price,
previous_price,
price - previous_price AS price_change,
ROUND(
(price - previous_price) * 100.0 / NULLIF(previous_price, 0),
2
) AS pct_change
FROM price_changes
WHERE previous_price IS NOT NULL
ORDER BY product_id, effective_date;
Price Duration Analysis
-- Calculate how long each price was in effect
SELECT
product_id,
product_name,
price,
effective_date,
COALESCE(expiration_date, NOW()) AS ended_date,
COALESCE(expiration_date, NOW())::date - effective_date::date AS days_at_price,
version
FROM products_scd2
ORDER BY product_id, effective_date;
π Part 5: Temporal Joins
-- Join product prices with sales at specific point in time
WITH sales_data AS (
SELECT
product_id,
sale_date,
quantity,
amount
FROM sales
)
SELECT
s.product_id,
s.sale_date,
s.quantity,
s.amount,
pp.price AS price_at_sale,
pp.product_name
FROM sales_data s
JOIN products_scd2 pp
ON s.product_id = pp.product_id
AND s.sale_date >= pp.effective_date
AND (s.sale_date < pp.expiration_date OR pp.expiration_date IS NULL)
WHERE pp.is_current = true OR pp.expiration_date IS NOT NULL;
π― Quiz Section
π Best Practices for Interviews
π‘β Temporal Data Best Practices
1. Always Include Timestamps:
-- Add effective/expiration dates
effective_date TIMESTAMP DEFAULT NOW(),
expiration_date TIMESTAMP,
is_current BOOLEAN DEFAULT true
2. Create Indexes for Time Queries:
-- Index for point-in-time queries
CREATE INDEX idx_effective ON products(effective_date, expiration_date);
CREATE INDEX idx_current ON products(is_current) WHERE is_current = true;
3. Use Surrogate Keys:
-- Natural keys may change; surrogates are stable
surrogate_key SERIAL PRIMARY KEY,
product_id INT, -- Natural key (may change)
4. Automate SCD Updates:
-- Use stored procedures for consistent SCD handling
CALL update_scd2(product_id, new_values...);
5. Consider Partitioning:
-- Partition by effective_date for large temporal tables
CREATE TABLE prices (
...
) PARTITION BY RANGE (effective_date);
β οΈβ οΈ Common Pitfalls
- Missing indexes: Slow point-in-time queries
- Overlapping date ranges: Data integrity issues
- Forgetting NULL handling: Current records have NULL expiration
- Not tracking transaction time: Can't audit when changes occurred
- Large history tables: Need archival strategy