Array & Nested Data Types
Difficulty: Hard | Companies: Google, Amazon, Meta, Netflix, Uber
PostgreSQL Array Fundamentals
-- Array column operations
SELECT
id,
tags,
array_length(tags, 1) AS tag_count,
tags[1] AS first_tag,
array_append(tags, 'new_tag') AS with_new_tag,
array_remove(tags, 'deprecated') AS without_deprecated,
array_cat(tags, ARRAY['extra']) AS concatenated,
array_agg(DISTINCT tag) AS unique_tags
FROM posts
GROUP BY id, tags;
βΉοΈ
Key Insight: PostgreSQL arrays are 1-indexed. Use array_length() for count, array_append()/array_remove() for modification, and unnest() to expand arrays into rows.
Unnesting Arrays
-- Expand array to rows
SELECT
post_id,
unnest(tags) AS tag
FROM posts;
-- With ordinality for position tracking
SELECT
post_id,
tag,
ordinality AS position
FROM posts,
LATERAL unnest(tags) WITH ORDINALITY AS t(tag, ordinality);
-- Multiple arrays in parallel
SELECT
post_id,
t.tag,
s.score
FROM posts,
LATERAL unnest(tags) AS t(tag),
LATERAL unnest(scores) AS s(score)
WHERE array_length(tags, 1) = array_length(scores, 1);
Array Aggregation
-- Aggregate values into array
SELECT
department_id,
array_agg(employee_name ORDER BY salary DESC) AS employees_by_salary,
array_agg(DISTINCT skill) AS unique_skills,
array_remove(
array_agg(skill),
NULL
) AS all_skills
FROM employees
GROUP BY department_id;
-- Aggregate with filtering
SELECT
department_id,
array_agg(employee_name) FILTER (WHERE salary > 80000) AS high_earners
FROM employees
GROUP BY department_id;
Array Searching
-- Check if array contains element
SELECT * FROM posts WHERE 'sql' = ANY(tags);
-- Check if array contains all elements
SELECT * FROM posts WHERE tags @> ARRAY['sql', 'interview'];
-- Check if arrays overlap
SELECT * FROM posts WHERE tags && ARRAY['sql', 'python'];
-- Array position
SELECT
post_id,
array_position(tags, 'sql') AS sql_position
FROM posts
WHERE 'sql' = ANY(tags);
β οΈ
Indexing Tip: Create a GIN index for array containment queries: CREATE INDEX idx_tags ON posts USING GIN (tags);. This significantly speeds up @> and && operations.
Array Transformation
-- Transform array elements
SELECT
post_id,
array(SELECT upper(unnest(tags))) AS uppercase_tags,
array(SELECT DISTINCT unnest(tags) ORDER BY 1) AS sorted_unique_tags,
array(
SELECT unnest(tags) WHERE length(unnest(tags)) > 3
) AS long_tags
FROM posts;
-- Array slicing
SELECT
post_id,
tags[1:3] AS first_three_tags,
tags[2:] AS skip_first_tag,
tags[:2] AS first_two_tags
FROM posts;
BigQuery Array Functions
-- BigQuery ARRAY operations
SELECT
id,
ARRAY_LENGTH(tags) AS tag_count,
tags[OFFSET(0)] AS first_tag,
ARRAY_CONCAT(tags, ['new_tag']) AS with_new_tag,
ARRAY(
SELECT DISTINCT tag
FROM UNNEST(tags) AS tag
ORDER BY tag
) AS unique_sorted_tags
FROM `project.dataset.posts`;
-- Flatten nested arrays
SELECT
post_id,
tag
FROM `project.dataset.posts`,
UNNEST(tags) AS tag;
-- ARRAY_AGG with filtering
SELECT
department_id,
ARRAY_AGG(name ORDER BY salary DESC LIMIT 3) AS top_3_earners
FROM employees
GROUP BY department_id;
Array Comparisons
-- Compare arrays
SELECT
post_id,
tags,
CASE
WHEN tags = ARRAY['sql', 'interview'] THEN 'exact match'
WHEN tags @> ARRAY['sql'] THEN 'contains sql'
WHEN tags && ARRAY['sql', 'python'] THEN 'overlap'
ELSE 'no match'
END AS match_type
FROM posts;
-- Array equality with ordering
SELECT
post_id,
tags,
ARRAY(SELECT unnest(tags) ORDER BY 1) AS sorted_tags
FROM posts
WHERE ARRAY(SELECT unnest(tags) ORDER BY 1) =
ARRAY(SELECT unnest(ARRAY['sql', 'interview']) ORDER BY 1);
Multidimensional Arrays
-- 2D array operations (matrix)
CREATE TABLE matrix_data (
id SERIAL PRIMARY KEY,
matrix INT[][]
);
INSERT INTO matrix_data (matrix) VALUES
(ARRAY[[1, 2, 3], [4, 5, 6], [7, 8, 9]]);
SELECT
id,
matrix[1][2] AS element, -- Access element
array_length(matrix, 1) AS rows,
array_length(matrix, 2) AS columns
FROM matrix_data;
Array Window Functions
-- Window functions with arrays
SELECT
post_id,
tags,
array_agg(tag) OVER (
ORDER BY created_at
ROWS BETWEEN 2 PRECEDING AND CURRENT ROW
) AS recent_tags
FROM posts,
LATERAL unnest(tags) AS t(tag);
Array Aggregation with GROUPING
-- Group and aggregate arrays
SELECT
department_id,
array_agg(DISTINCT skill) AS all_skills,
array_length(array_agg(DISTINCT skill), 1) AS skill_count,
(
SELECT array_agg(s ORDER BY s)
FROM unnest(array_agg(DISTINCT skill)) AS s
WHERE length(s) > 5
) AS long_skills
FROM employee_skills
GROUP BY department_id;
Array Search and Replace
-- Search and replace in arrays
SELECT
post_id,
array(
SELECT CASE
WHEN tag = 'js' THEN 'javascript'
WHEN tag = 'ts' THEN 'typescript'
ELSE tag
END
FROM unnest(tags) AS tag
) AS replaced_tags
FROM posts;
-- Filter array elements
SELECT
post_id,
array(
SELECT tag
FROM unnest(tags) AS tag
WHERE length(tag) > 3
) AS filtered_tags
FROM posts;
Follow-Up Questions
- When should you use arrays vs normalized tables for storing collections?
- How do you create and use custom array types in PostgreSQL?
- What's the performance impact of GIN indexes on array columns?
- How would you implement array intersection and difference operations?
- Explain the difference between
unnest()andLATERAL unnest(). - How do you handle NULL values in array operations?