Interview Question: "What's the difference between JSON and JSONB in PostgreSQL? How do you index JSON fields? Explain JSON_TABLE and its use cases." — Asked at MongoDB, Couchbase, Elastic for Senior Data Engineer roles
ℹ️
Difficulty: Advanced | Companies: MongoDB, Couchbase, Elastic, Stripe, Shopify | Time: 45-60 minutes
JSON Data Types
PostgreSQL offers two JSON types:
-- Create table with JSON data
CREATE TABLE user_events (
event_id SERIAL PRIMARY KEY,
user_id INT,
event_type VARCHAR(50),
event_data JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Insert JSON data
INSERT INTO user_events (user_id, event_type, event_data) VALUES
(1, 'purchase', '{
"product_id": 123,
"product_name": "Laptop",
"price": 999.99,
"quantity": 1,
"payment": {
"method": "credit_card",
"last_four": "4242",
"currency": "USD"
},
"shipping": {
"address": {
"street": "123 Main St",
"city": "San Francisco",
"state": "CA",
"zip": "94102"
},
"method": "express",
"cost": 15.00
},
"tags": ["electronics", "computers", "sale"]
}'),
(2, 'page_view', '{
"page": "/products",
"referrer": "google.com",
"duration_seconds": 45,
"device": {
"type": "mobile",
"os": "iOS",
"browser": "Safari"
},
"location": {
"country": "US",
"city": "New York"
}
}'),
(3, 'signup', '{
"method": "email",
"plan": "premium",
"features": ["analytics", "api_access", "priority_support"],
"referral": {
"source": "friend",
"code": "FRIEND2024"
}
}');
JSON Path Expressions
PostgreSQL uses JSONB path expressions:
-- Basic JSON access operators
SELECT
event_data -> 'product_id' AS product_id_json,
event_data ->> 'product_id' AS product_id_text,
event_data -> 'payment' ->> 'method' AS payment_method,
event_data #>> '{shipping,address,city}' AS city
FROM user_events
WHERE event_type = 'purchase';
-- JSON path expressions (PostgreSQL 12+)
SELECT
event_data @? '$.payment.method ? (@ == "credit_card")' AS is_credit_card,
event_data @@ '$.price > 500' AS is_expensive,
event_data ->> '$.tags[0]' AS first_tag,
event_data ->> '$.tags[*]' AS all_tags
FROM user_events
WHERE event_type = 'purchase';
Output:
| product_id_json | product_id_text | payment_method | city |
|---|---|---|---|
| 123 | 123 | credit_card | San Francisco |
JSON_TABLE Equivalent
PostgreSQL doesn't have JSON_TABLE (Oracle/MySQL feature), but we can emulate it:
-- Emulate JSON_TABLE with LATERAL JOIN
SELECT
u.event_id,
u.user_id,
p.product_id,
p.product_name,
p.price,
p.quantity
FROM user_events u
CROSS JOIN LATERAL jsonb_to_recordset(u.event_data->'items') AS p(
product_id INT,
product_name TEXT,
price DECIMAL,
quantity INT
)
WHERE u.event_type = 'purchase';
-- Unnest JSON arrays
SELECT
event_id,
user_id,
jsonb_array_elements_text(event_data->'tags') AS tag
FROM user_events
WHERE event_type = 'purchase';
-- With ordinality for array position
SELECT
event_id,
user_id,
t.tag,
t.ordinality AS position
FROM user_events,
LATERAL jsonb_array_elements_text(event_data->'tags') WITH ORDINALITY AS t(tag, ordinality)
WHERE event_type = 'purchase';
Output:
| event_id | user_id | tag | position |
|---|---|---|---|
| 1 | 1 | electronics | 1 |
| 1 | 1 | computers | 2 |
| 1 | 1 | sale | 3 |
JSON Aggregation
-- Aggregate rows into JSON array
SELECT
event_type,
jsonb_agg(event_data) AS events
FROM user_events
GROUP BY event_type;
-- Aggregate into JSON object
SELECT
user_id,
jsonb_object_agg(event_type, event_data) AS user_events
FROM user_events
GROUP BY user_id;
-- Build JSON from scratch
SELECT
jsonb_build_object(
'user_id', user_id,
'event_count', COUNT(*),
'event_types', jsonb_agg(DISTINCT event_type)
) AS user_summary
FROM user_events
GROUP BY user_id;
Output:
{
"user_id": 1,
"event_count": 1,
"event_types": ["purchase"]
}
JSONB Indexing
-- GIN index for containment queries
CREATE INDEX idx_events_gin ON user_events USING gin(event_data);
-- Specific path index
CREATE INDEX idx_events_product ON user_events USING btree((event_data->>'product_id'));
-- Expression index for common queries
CREATE INDEX idx_events_price ON user_events USING btree(
((event_data->>'price')::decimal)
) WHERE event_type = 'purchase';
-- Check index usage
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events
WHERE event_data @> '{"event_type": "purchase"}';
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events
WHERE event_data->>'product_id' = '123';
-- Compare query plans
EXPLAIN (ANALYZE, BUFFERS)
SELECT * FROM user_events
WHERE event_data->'payment'->>'method' = 'credit_card';
JSON Transformation Functions
-- Convert JSON to record
SELECT *
FROM jsonb_to_record(event_data) AS x(
product_id INT,
product_name TEXT,
price DECIMAL
)
WHERE event_type = 'purchase';
-- Convert JSON array to rows
SELECT *
FROM jsonb_to_recordset('[{"id": 1, "name": "A"}, {"id": 2, "name": "B"}]') AS x(
id INT,
name TEXT
);
-- Flatten nested JSON
SELECT
event_id,
event_data->>'user_id' AS user_id,
event_data->'shipping'->'address'->>'street' AS street,
event_data->'shipping'->'address'->>'city' AS city
FROM user_events
WHERE event_type = 'purchase';
-- Pretty print JSON
SELECT jsonb_pretty(event_data)
FROM user_events
WHERE event_id = 1;
JSON Query Patterns
-- Filter by nested value
SELECT * FROM user_events
WHERE event_data @> '{
"payment": {"method": "credit_card"}
}';
-- Array contains
SELECT * FROM user_events
WHERE event_data->'tags' @> '["electronics"]';
-- Multiple conditions
SELECT * FROM user_events
WHERE event_data->>'event_type' = 'purchase'
AND (event_data->>'price')::decimal > 500
AND event_data->'shipping'->>'method' = 'express';
-- Regex on JSON values
SELECT * FROM user_events
WHERE event_data->>'product_name' ~* 'laptop';
-- Exists check
SELECT * FROM user_events
WHERE event_data ? 'payment';
-- Any key exists
SELECT * FROM user_events
WHERE event_data ?| ARRAY['payment', 'shipping'];
-- All keys exist
SELECT * FROM user_events
WHERE event_data ?& ARRAY['payment', 'shipping'];
JSON Modification
-- Add field
UPDATE user_events
SET event_data = event_data || '{"processed": true}'::jsonb
WHERE event_id = 1;
-- Remove field
UPDATE user_events
SET event_data = event_data - 'tags'
WHERE event_id = 1;
-- Set nested field
UPDATE user_events
SET event_data = jsonb_set(
event_data,
'{payment,currency}',
'"EUR"'
)
WHERE event_id = 1;
-- Delete nested field
UPDATE user_events
SET event_data = event_data #- '{shipping,address}'
WHERE event_id = 1;
-- Merge JSON objects
UPDATE user_events
SET event_data = event_data || jsonb_build_object(
'metadata', jsonb_build_object(
'updated_at', NOW(),
'version', (event_data->>'version')::int + 1
)
)
WHERE event_id = 1;
JSON Statistics and Analysis
-- Count distinct values in JSON
SELECT
event_type,
jsonb_object_keys(event_data) AS key,
COUNT(*) AS occurrences
FROM user_events
GROUP BY event_type, key
ORDER BY event_type, occurrences DESC;
-- Extract and aggregate
SELECT
event_type,
AVG((event_data->>'price')::decimal) AS avg_price,
SUM((event_data->>'price')::decimal) AS total_revenue
FROM user_events
WHERE event_data ? 'price'
GROUP BY event_type;
-- JSON histogram
SELECT
width_bucket(
(event_data->>'price')::decimal,
0, 2000, 10
) AS price_bucket,
COUNT(*) AS frequency
FROM user_events
WHERE event_data ? 'price'
GROUP BY price_bucket
ORDER BY price_bucket;
JSON Performance Optimization
-- Check JSONB storage size
SELECT
event_id,
pg_column_size(event_data) AS bytes,
pg_size_pretty(pg_column_size(event_data)) AS size,
jsonb_object_keys(event_data)::text[] AS keys
FROM user_events;
-- JSONB compression
SELECT
event_id,
pg_column_size(event_data) AS original_size,
pg_column_size(to_jsonb(event_data)) AS compressed_size
FROM user_events;
-- Avoid JSON in hot paths
-- Instead of:
-- SELECT event_data->>'field' FROM events WHERE event_data->>'field' = 'value';
-- Use:
-- SELECT field FROM events WHERE field = 'value';
Mathematical Formulas
JSON path evaluation cost:
GIN index lookup cost:
JSON containment check:
ℹ️
Pro Tip: Use JSONB for query performance, JSON for storage. Convert between them with ::jsonb and ::json.
JSON Schema Validation
-- Create validation function
CREATE OR REPLACE FUNCTION validate_event_data()
RETURNS TRIGGER AS $$
BEGIN
-- Check required fields
IF NOT (NEW.event_data ? 'event_type') THEN
RAISE EXCEPTION 'Missing event_type field';
END IF;
-- Validate specific types
IF NEW.event_type = 'purchase' THEN
IF NOT (NEW.event_data ? 'product_id' AND
jsonb_typeof(NEW.event_data->'product_id') = 'number') THEN
RAISE EXCEPTION 'product_id must be a number';
END IF;
IF NOT (NEW.event_data ? 'price' AND
(NEW.event_data->>'price')::decimal > 0) THEN
RAISE EXCEPTION 'price must be positive';
END IF;
END IF;
RETURN NEW;
END;
$$ LANGUAGE plpgsql;
CREATE TRIGGER validate_event
BEFORE INSERT OR UPDATE ON user_events
FOR EACH ROW EXECUTE FUNCTION validate_event_data();
⚠️
JSONB Pitfalls: JSONB doesn't preserve duplicate keys. Use JSON type if you need to preserve duplicates.