πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

JSON Processing in SQL

Advanced SQLSemi-Structured Data⭐ Premium

Advertisement

JSON Processing in SQL

Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber

PostgreSQL JSON Operators

-- Basic JSON extraction operators
SELECT
  id,
  data->>'name' AS name,           -- Text extraction
  data->'address'->>'city' AS city, -- Nested extraction
  data->'preferences'->'notifications'->>'email' AS email_pref,
  (data->>'age')::INT AS age,       -- Type casting
  jsonb_array_length(data->'tags') AS tag_count
FROM users;

ℹ️

Key Insight: Use -> for JSON object (returns JSON), ->> for text extraction. For nested access, chain operators. PostgreSQL's jsonb type is preferred over json for better performance and indexing.

JSON Path Expressions

-- Advanced JSON path queries
SELECT
  id,
  data @? '$.orders[*]?(@.amount > 1000)' AS has_large_orders,
  jsonb_path_query_array(data, '$.tags[*]') AS all_tags,
  jsonb_path_query_first(data, '$.orders[0].items[0].name') AS first_item,
  jsonb_path_query(data, '$.orders[*]?(@.status == "pending")') AS pending_orders
FROM users;

BigQuery JSON Functions

-- BigQuery JSON extraction
SELECT
  id,
  JSON_EXTRACT_SCALAR(data, '$.name') AS name,
  JSON_EXTRACT_SCALAR(data, '$.address.city') AS city,
  JSON_EXTRACT(data, '$.tags') AS tags_json,
  JSON_EXTRACT_ARRAY(data, '$.orders') AS orders_array,
  ARRAY(
    SELECT JSON_EXTRACT_SCALAR(item, '$.name')
    FROM UNNEST(JSON_EXTRACT_ARRAY(data, '$.tags')) AS item
  ) AS tag_names
FROM `project.dataset.users`;

JSON Aggregation

-- Aggregate rows into JSON array
SELECT
  department_id,
  jsonb_agg(
    jsonb_build_object(
      'employee_id', employee_id,
      'name', name,
      'salary', salary
    )
  ) AS employees,
  jsonb_agg(name ORDER BY salary DESC) AS names_by_salary
FROM employees
GROUP BY department_id;

-- Aggregate into JSON object
SELECT
  jsonb_object_agg(
    employee_id::TEXT,
    jsonb_build_object('name', name, 'salary', salary)
  ) AS employee_map
FROM employees
WHERE department_id = 1;

JSON Transformation

-- Transform JSON structure
SELECT
  id,
  jsonb_build_object(
    'full_name', data->>'first_name' || ' ' || data->>'last_name',
    'contact', jsonb_build_object(
      'email', data->>'email',
      'phone', data->'phone_numbers'->0
    ),
    'order_count', jsonb_array_length(data->'orders'),
    'total_spent', (
      SELECT SUM((item->>'amount')::NUMERIC)
      FROM jsonb_array_elements(data->'orders') AS item
    )
  ) AS transformed_data
FROM users;

⚠️

Performance Tip: Use jsonb instead of json in PostgreSQL. jsonb is stored in a decomposed binary format that supports GIN indexing and is significantly faster for queries.

JSON Indexing

-- GIN index for general JSON queries
CREATE INDEX idx_users_data ON users USING GIN (data);

-- Specific path index
CREATE INDEX idx_users_email ON users ((data->>'email'));

-- Partial index for specific values
CREATE INDEX idx_active_users ON users ((data->>'status'))
WHERE data->>'status' = 'active';

-- Composite index on JSON fields
CREATE INDEX idx_users_name_email ON users (
  (data->>'last_name'),
  (data->>'email')
);

JSON Array Processing

-- Unnest JSON array
SELECT
  u.id,
  u.data->>'name' AS user_name,
  tag.value AS tag
FROM users u,
LATERAL jsonb_array_elements_text(u.data->'tags') AS tag(value);

-- Filter JSON array elements
SELECT
  u.id,
  jsonb_path_query_array(
    u.data->'orders',
    '$[*]?(@.amount > 100)'
  ) AS large_orders
FROM users u;

-- Aggregate JSON array values
SELECT
  u.id,
  (SELECT SUM((item->>'amount')::NUMERIC)
   FROM jsonb_array_elements(u.data->'orders') AS item
   WHERE item->>'status' = 'completed') AS total_completed
FROM users u;

JSON Merge and Patch

-- Merge two JSON objects
SELECT
  jsonb_merge_patch(
    '{"name": "John", "age": 30}'::jsonb,
    '{"age": 31, "city": "NYC"}'::jsonb
  ) AS merged_result;
-- Result: {"name": "John", "age": 31, "city": "NYC"}

-- Patch with null removal
SELECT
  jsonb_merge_patch(
    '{"a": 1, "b": 2, "c": 3}'::jsonb,
    '{"b": null, "d": 4}'::jsonb
  ) AS patched_result;
-- Result: {"a": 1, "c": 3, "d": 4}

JSON in UPDATE Statements

-- Update specific JSON path
UPDATE users
SET data = jsonb_set(
  data,
  '{address, city}',
  '"San Francisco"'
)
WHERE id = 101;

-- Add new key to JSON object
UPDATE users
SET data = data || '{"newsletter_subscribed": true}'::jsonb
WHERE data->>'status' = 'active';

-- Remove key from JSON object
UPDATE users
SET data = data - 'temp_field'
WHERE data ? 'temp_field';

-- Update array element
UPDATE users
SET data = jsonb_set(
  data,
  '{preferences, 0}',
  '"new_value"'
)
WHERE id = 101;

JSON Search Queries

-- Check if key exists
SELECT * FROM users WHERE data ? 'email';

-- Check if any array element matches
SELECT * FROM users WHERE data ?| ARRAY['email', 'phone'];

-- Check all array elements match
SELECT * FROM users WHERE data ?& ARRAY['email', 'phone', 'address'];

-- Full-text search within JSON
SELECT * FROM users
WHERE to_tsvector(data->>'description') @@ to_tsquery('english', 'SQL & interview');

Complex JSON Aggregation

-- Build nested JSON from multiple tables
SELECT
  jsonb_build_object(
    'customer', jsonb_build_object(
      'id', c.customer_id,
      'name', c.customer_name,
      'email', c.email
    ),
    'orders', (
      SELECT jsonb_agg(
        jsonb_build_object(
          'order_id', o.order_id,
          'date', o.order_date,
          'total', o.total_amount,
          'items', (
            SELECT jsonb_agg(
              jsonb_build_object(
                'product', p.product_name,
                'quantity', oi.quantity,
                'price', oi.unit_price
              )
            )
            FROM order_items oi
            INNER JOIN products p ON oi.product_id = p.product_id
            WHERE oi.order_id = o.order_id
          )
        )
      )
      FROM orders o
      WHERE o.customer_id = c.customer_id
    )
  ) AS customer_data
FROM customers c
WHERE c.customer_id = 101;

Follow-Up Questions

  1. What's the difference between json and jsonb in PostgreSQL?
  2. How would you index a JSON column for efficient range queries?
  3. Explain the performance implications of using JSON vs normalized tables.
  4. How do you handle schema evolution in JSON documents?
  5. What's the best approach for querying deeply nested JSON structures?
  6. How would you validate JSON data against a schema in SQL?

Advertisement