JSON Processing in SQL
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
PostgreSQL JSON Operators
-- Basic JSON extraction operators
SELECT
id,
data->>'name' AS name, -- Text extraction
data->'address'->>'city' AS city, -- Nested extraction
data->'preferences'->'notifications'->>'email' AS email_pref,
(data->>'age')::INT AS age, -- Type casting
jsonb_array_length(data->'tags') AS tag_count
FROM users;
βΉοΈ
Key Insight: Use -> for JSON object (returns JSON), ->> for text extraction. For nested access, chain operators. PostgreSQL's jsonb type is preferred over json for better performance and indexing.
JSON Path Expressions
-- Advanced JSON path queries
SELECT
id,
data @? '$.orders[*]?(@.amount > 1000)' AS has_large_orders,
jsonb_path_query_array(data, '$.tags[*]') AS all_tags,
jsonb_path_query_first(data, '$.orders[0].items[0].name') AS first_item,
jsonb_path_query(data, '$.orders[*]?(@.status == "pending")') AS pending_orders
FROM users;
BigQuery JSON Functions
-- BigQuery JSON extraction
SELECT
id,
JSON_EXTRACT_SCALAR(data, '$.name') AS name,
JSON_EXTRACT_SCALAR(data, '$.address.city') AS city,
JSON_EXTRACT(data, '$.tags') AS tags_json,
JSON_EXTRACT_ARRAY(data, '$.orders') AS orders_array,
ARRAY(
SELECT JSON_EXTRACT_SCALAR(item, '$.name')
FROM UNNEST(JSON_EXTRACT_ARRAY(data, '$.tags')) AS item
) AS tag_names
FROM `project.dataset.users`;
JSON Aggregation
-- Aggregate rows into JSON array
SELECT
department_id,
jsonb_agg(
jsonb_build_object(
'employee_id', employee_id,
'name', name,
'salary', salary
)
) AS employees,
jsonb_agg(name ORDER BY salary DESC) AS names_by_salary
FROM employees
GROUP BY department_id;
-- Aggregate into JSON object
SELECT
jsonb_object_agg(
employee_id::TEXT,
jsonb_build_object('name', name, 'salary', salary)
) AS employee_map
FROM employees
WHERE department_id = 1;
JSON Transformation
-- Transform JSON structure
SELECT
id,
jsonb_build_object(
'full_name', data->>'first_name' || ' ' || data->>'last_name',
'contact', jsonb_build_object(
'email', data->>'email',
'phone', data->'phone_numbers'->0
),
'order_count', jsonb_array_length(data->'orders'),
'total_spent', (
SELECT SUM((item->>'amount')::NUMERIC)
FROM jsonb_array_elements(data->'orders') AS item
)
) AS transformed_data
FROM users;
β οΈ
Performance Tip: Use jsonb instead of json in PostgreSQL. jsonb is stored in a decomposed binary format that supports GIN indexing and is significantly faster for queries.
JSON Indexing
-- GIN index for general JSON queries
CREATE INDEX idx_users_data ON users USING GIN (data);
-- Specific path index
CREATE INDEX idx_users_email ON users ((data->>'email'));
-- Partial index for specific values
CREATE INDEX idx_active_users ON users ((data->>'status'))
WHERE data->>'status' = 'active';
-- Composite index on JSON fields
CREATE INDEX idx_users_name_email ON users (
(data->>'last_name'),
(data->>'email')
);
JSON Array Processing
-- Unnest JSON array
SELECT
u.id,
u.data->>'name' AS user_name,
tag.value AS tag
FROM users u,
LATERAL jsonb_array_elements_text(u.data->'tags') AS tag(value);
-- Filter JSON array elements
SELECT
u.id,
jsonb_path_query_array(
u.data->'orders',
'$[*]?(@.amount > 100)'
) AS large_orders
FROM users u;
-- Aggregate JSON array values
SELECT
u.id,
(SELECT SUM((item->>'amount')::NUMERIC)
FROM jsonb_array_elements(u.data->'orders') AS item
WHERE item->>'status' = 'completed') AS total_completed
FROM users u;
JSON Merge and Patch
-- Merge two JSON objects
SELECT
jsonb_merge_patch(
'{"name": "John", "age": 30}'::jsonb,
'{"age": 31, "city": "NYC"}'::jsonb
) AS merged_result;
-- Result: {"name": "John", "age": 31, "city": "NYC"}
-- Patch with null removal
SELECT
jsonb_merge_patch(
'{"a": 1, "b": 2, "c": 3}'::jsonb,
'{"b": null, "d": 4}'::jsonb
) AS patched_result;
-- Result: {"a": 1, "c": 3, "d": 4}
JSON in UPDATE Statements
-- Update specific JSON path
UPDATE users
SET data = jsonb_set(
data,
'{address, city}',
'"San Francisco"'
)
WHERE id = 101;
-- Add new key to JSON object
UPDATE users
SET data = data || '{"newsletter_subscribed": true}'::jsonb
WHERE data->>'status' = 'active';
-- Remove key from JSON object
UPDATE users
SET data = data - 'temp_field'
WHERE data ? 'temp_field';
-- Update array element
UPDATE users
SET data = jsonb_set(
data,
'{preferences, 0}',
'"new_value"'
)
WHERE id = 101;
JSON Search Queries
-- Check if key exists
SELECT * FROM users WHERE data ? 'email';
-- Check if any array element matches
SELECT * FROM users WHERE data ?| ARRAY['email', 'phone'];
-- Check all array elements match
SELECT * FROM users WHERE data ?& ARRAY['email', 'phone', 'address'];
-- Full-text search within JSON
SELECT * FROM users
WHERE to_tsvector(data->>'description') @@ to_tsquery('english', 'SQL & interview');
Complex JSON Aggregation
-- Build nested JSON from multiple tables
SELECT
jsonb_build_object(
'customer', jsonb_build_object(
'id', c.customer_id,
'name', c.customer_name,
'email', c.email
),
'orders', (
SELECT jsonb_agg(
jsonb_build_object(
'order_id', o.order_id,
'date', o.order_date,
'total', o.total_amount,
'items', (
SELECT jsonb_agg(
jsonb_build_object(
'product', p.product_name,
'quantity', oi.quantity,
'price', oi.unit_price
)
)
FROM order_items oi
INNER JOIN products p ON oi.product_id = p.product_id
WHERE oi.order_id = o.order_id
)
)
)
FROM orders o
WHERE o.customer_id = c.customer_id
)
) AS customer_data
FROM customers c
WHERE c.customer_id = 101;
Follow-Up Questions
- What's the difference between
jsonandjsonbin PostgreSQL? - How would you index a JSON column for efficient range queries?
- Explain the performance implications of using JSON vs normalized tables.
- How do you handle schema evolution in JSON documents?
- What's the best approach for querying deeply nested JSON structures?
- How would you validate JSON data against a schema in SQL?