🎉 75% of content is free forever — Unlock Premium from $10/mo →
CW
Search courses…
💼 Servicesℹ️ About✉️ ContactView Pricing Plansfrom $10

CTE Patterns: Lateral Joins, Apply, Unnest, Generate Series

Advanced SQLCTE Patterns⭐ Premium

Advertisement

Interview Question: "Explain the difference between LATERAL JOIN and CROSS APPLY. When would you use GENERATE_SERIES vs recursive CTE? How does UNNEST work with arrays?" — Asked at Amazon, Google, Netflix for Senior Data Engineer roles

ℹ️

Difficulty: Advanced | Companies: Amazon, Google, Netflix, Uber, Stripe | Time: 45-60 minutes

LATERAL Join Fundamentals

LATERAL allows subqueries to reference preceding FROM items:

LATERAL:Subquery can reference outer table\text{LATERAL}: \text{Subquery can reference } \text{outer} \text{ table}
-- Create sample data
CREATE TABLE departments (
    dept_id INT PRIMARY KEY,
    dept_name VARCHAR(100)
);

CREATE TABLE employees (
    emp_id INT PRIMARY KEY,
    name VARCHAR(100),
    dept_id INT,
    salary DECIMAL(12,2)
);

INSERT INTO departments VALUES
(1, 'Engineering'),
(2, 'Marketing'),
(3, 'Sales');

INSERT INTO employees VALUES
(1, 'Alice', 1, 120000),
(2, 'Bob', 1, 110000),
(3, 'Charlie', 1, 100000),
(4, 'Diana', 2, 95000),
(5, 'Eve', 2, 90000),
(6, 'Frank', 3, 85000),
(7, 'Grace', 3, 80000),
(8, 'Heidi', 3, 75000);

-- LATERAL: Get top 2 employees per department
SELECT 
    d.dept_name,
    e.name,
    e.salary
FROM departments d
CROSS JOIN LATERAL (
    SELECT name, salary
    FROM employees
    WHERE dept_id = d.dept_id
    ORDER BY salary DESC
    LIMIT 2
) e
ORDER BY d.dept_name, e.salary DESC;

-- LATERAL with array
SELECT 
    d.dept_name,
    e.top_employees
FROM departments d,
LATERAL (
    SELECT ARRAY_AGG(name ORDER BY salary DESC) AS top_employees
    FROM employees
    WHERE dept_id = d.dept_id
) e;

Output:

dept_namenamesalary
EngineeringAlice120000
EngineeringBob110000
MarketingDiana95000
MarketingEve90000
SalesFrank85000
SalesGrace80000

CROSS APPLY (SQL Server)

-- SQL Server syntax (PostgreSQL uses LATERAL)
SELECT 
    d.dept_name,
    e.name,
    e.salary
FROM departments d
CROSS APPLY (
    SELECT TOP 2 name, salary
    FROM employees
    WHERE dept_id = d.dept_id
    ORDER BY salary DESC
) e;

-- OUTER APPLY (like LEFT JOIN LATERAL)
SELECT 
    d.dept_name,
    e.name,
    e.salary
FROM departments d
OUTER APPLY (
    SELECT TOP 1 name, salary
    FROM employees
    WHERE dept_id = d.dept_id
        AND salary > 100000
) e;

UNNEST with Arrays

-- Create table with array column
CREATE TABLE user_preferences (
    user_id INT PRIMARY KEY,
    name VARCHAR(100),
    interests TEXT[],
    scores INT[]
);

INSERT INTO user_preferences VALUES
(1, 'Alice', ARRAY['hiking', 'reading', 'coding'], ARRAY[90, 85, 95]),
(2, 'Bob', ARRAY['gaming', 'cooking'], ARRAY[80, 70]),
(3, 'Charlie', ARRAY['travel', 'photography', 'music', 'sports'], ARRAY[88, 75, 92, 85]);

-- UNNEST array to rows
SELECT 
    name,
    UNNEST(interests) AS interest
FROM user_preferences
ORDER BY name, interest;

-- UNNEST with ordinality
SELECT 
    name,
    interest,
    position
FROM user_preferences,
LATERAL UNNEST(interests) WITH ORDINALITY AS t(interest, position)
ORDER BY name, position;

-- UNNEST multiple arrays simultaneously
SELECT 
    name,
    interest,
    score
FROM user_preferences,
LATERAL UNNEST(interests, scores) AS t(interest, score)
ORDER BY name, score DESC;

Output:

nameinterestscore
Alicecoding95
Alicehiking90
Alicereading85
Charliemusic92
Charlietravel88
Charliesports85
Charliephotography75
Bobgaming80
Bobcooking70

GENERATE_SERIES Patterns

-- Generate date series
SELECT 
    generate_series(
        '2024-01-01'::date,
        '2024-12-31'::date,
        '1 day'::interval
    )::date AS date_series;

-- Generate number series with step
SELECT 
    generate_series(1, 100, 5) AS number_series;

-- Fill gaps in time series
WITH date_series AS (
    SELECT generate_series(
        '2024-01-01'::date,
        '2024-01-10'::date,
        '1 day'::interval
    )::date AS sale_date
)
SELECT 
    ds.sale_date,
    COALESCE(s.amount, 0) AS amount
FROM date_series ds
LEFT JOIN sales s ON ds.sale_date = s.sale_date
ORDER BY ds.sale_date;

-- Generate hourly intervals
SELECT 
    generate_series(
        '2024-01-01 00:00:00'::timestamp,
        '2024-01-01 23:00:00'::timestamp,
        '1 hour'::interval
    ) AS hour_series;

Advanced CTE Patterns

-- Recursive CTE with LATERAL
WITH RECURSIVE org_chart AS (
    SELECT 
        emp_id,
        name,
        1 AS level,
        ARRAY[name] AS path
    FROM employees
    WHERE emp_id = 1
    
    UNION ALL
    
    SELECT 
        e.emp_id,
        e.name,
        oc.level + 1,
        oc.path || e.name
    FROM org_chart oc
    CROSS JOIN LATERAL (
        SELECT emp_id, name
        FROM employees
        WHERE manager_id = oc.emp_id
    ) e
    WHERE oc.level < 5
)
SELECT * FROM org_chart;

-- CTE with window functions
WITH ranked_employees AS (
    SELECT 
        dept_id,
        name,
        salary,
        RANK() OVER (PARTITION BY dept_id ORDER BY salary DESC) AS rank
    FROM employees
)
SELECT 
    d.dept_name,
    r.name,
    r.salary,
    r.rank
FROM ranked_employees r
JOIN departments d ON r.dept_id = d.dept_id
WHERE r.rank <= 2
ORDER BY d.dept_name, r.rank;

JSONB with LATERAL

-- Create table with JSONB
CREATE TABLE orders (
    order_id SERIAL PRIMARY KEY,
    customer_id INT,
    items JSONB
);

INSERT INTO orders (customer_id, items) VALUES
(1, '[{"product": "A", "qty": 2, "price": 10}, {"product": "B", "qty": 1, "price": 20}]'),
(2, '[{"product": "C", "qty": 3, "price": 15}]'),
(3, '[{"product": "A", "qty": 1, "price": 10}, {"product": "C", "qty": 2, "price": 15}]');

-- Unnest JSON array with LATERAL
SELECT 
    o.order_id,
    item->>'product' AS product,
    (item->>'qty')::int AS quantity,
    (item->>'price')::decimal AS price
FROM orders o,
LATERAL jsonb_array_elements(o.items) AS item
ORDER BY o.order_id;

-- Aggregate JSON items
SELECT 
    o.order_id,
    SUM((item->>'qty')::int * (item->>'price')::decimal) AS total
FROM orders o,
LATERAL jsonb_array_elements(o.items) AS item
GROUP BY o.order_id;

Mathematical Properties

For a LATERAL join with nn outer rows and mm inner rows:

Complexity=O(n×m)\text{Complexity} = O(n \times m)

For GENERATE_SERIES with range [a,b][a, b] and step ss:

Rows Generated=bas+1\text{Rows Generated} = \left\lfloor \frac{b - a}{s} \right\rfloor + 1

ℹ️

Performance Tip: LATERAL joins are powerful but can be expensive. Ensure the inner query is indexed and returns few rows per outer row.

Time Series Gap Filling

-- Create time series with gaps
CREATE TABLE metrics (
    metric_time TIMESTAMP,
    value DECIMAL(10,2)
);

INSERT INTO metrics VALUES
('2024-01-01 00:00:00', 100),
('2024-01-01 01:00:00', 110),
('2024-01-01 03:00:00', 95),  -- Gap at hour 2
('2024-01-01 05:00:00', 105); -- Gap at hour 4

-- Fill gaps with interpolation
WITH time_series AS (
    SELECT generate_series(
        '2024-01-01 00:00:00'::timestamp,
        '2024-01-01 05:00:00'::timestamp,
        '1 hour'::interval
    ) AS ts
)
SELECT 
    ts.ts AS metric_time,
    COALESCE(m.value, 
        LAG(m.value) OVER (ORDER BY ts.ts) + 
        (ts.ts - LAG(ts.ts) OVER (ORDER BY ts.ts))::interval / 
        (LEAD(ts.ts) OVER (ORDER BY ts.ts) - LAG(ts.ts) OVER (ORDER BY ts.ts))::interval *
        (LEAD(m.value) OVER (ORDER BY ts.ts) - LAG(m.value) OVER (ORDER BY ts.ts))
    ) AS interpolated_value
FROM time_series ts
LEFT JOIN metrics m ON ts.ts = m.metric_time
ORDER BY ts.ts;

⚠️

Common Pitfall: LATERAL with correlated subqueries can cause N+1 query problems. Always check execution plans.

Advertisement