π Advanced SQL JOINs
Meta & Apple Interview Deep Dive
π Interview Question
βΉοΈπ΄ Meta/Apple Interview Question
"Given a social media database with users, posts, comments, and likes tables, write queries using different JOIN types to: 1) Find users who have never posted, 2) Find the most liked post per user, 3) Find users who liked their own posts, 4) Create a user activity feed combining all interactions."
Companies: Meta, Apple | Difficulty: Medium-Hard | Time: 35 minutes
π Setup: Social Media Schema
CREATE TABLE users (
user_id SERIAL PRIMARY KEY,
username VARCHAR(50) UNIQUE,
email VARCHAR(100),
full_name VARCHAR(100),
created_at TIMESTAMP DEFAULT NOW(),
is_verified BOOLEAN DEFAULT false
);
CREATE TABLE posts (
post_id SERIAL PRIMARY KEY,
user_id INT REFERENCES users(user_id),
content TEXT,
media_url VARCHAR(500),
post_type VARCHAR(20) DEFAULT 'text',
created_at TIMESTAMP DEFAULT NOW(),
is_public BOOLEAN DEFAULT true
);
CREATE TABLE comments (
comment_id SERIAL PRIMARY KEY,
post_id INT REFERENCES posts(post_id),
user_id INT REFERENCES users(user_id),
content TEXT,
created_at TIMESTAMP DEFAULT NOW()
);
CREATE TABLE likes (
like_id SERIAL PRIMARY KEY,
post_id INT REFERENCES posts(post_id),
user_id INT REFERENCES users(user_id),
created_at TIMESTAMP DEFAULT NOW(),
UNIQUE(post_id, user_id)
);
CREATE TABLE follows (
follower_id INT REFERENCES users(user_id),
following_id INT REFERENCES users(user_id),
created_at TIMESTAMP DEFAULT NOW(),
PRIMARY KEY (follower_id, following_id)
);
-- Insert sample data
INSERT INTO users (username, email, full_name, created_at, is_verified) VALUES
('alice', 'alice@example.com', 'Alice Johnson', '2020-01-15', true),
('bob', 'bob@example.com', 'Bob Smith', '2020-02-20', true),
('charlie', 'charlie@example.com', 'Charlie Brown', '2020-03-10', false),
('diana', 'diana@example.com', 'Diana Ross', '2020-04-05', false),
('eve', 'eve@example.com', 'Eve Davis', '2020-05-01', true),
('frank', 'frank@example.com', 'Frank Wilson', '2020-06-15', false),
('grace', 'grace@example.com', 'Grace Lee', '2020-07-20', true);
INSERT INTO posts (user_id, content, post_type, created_at) VALUES
(1, 'Hello world! My first post.', 'text', '2021-01-01'),
(1, 'Check out this photo!', 'photo', '2021-02-15'),
(2, 'Working on a new project.', 'text', '2021-01-10'),
(2, 'Great day today!', 'text', '2021-03-01'),
(3, 'Learning SQL is fun!', 'text', '2021-02-20'),
(5, 'Travel adventures!', 'photo', '2021-04-01'),
(5, 'Code review tips', 'text', '2021-04-15'),
(6, 'Coffee and code', 'photo', '2021-05-01');
INSERT INTO comments (post_id, user_id, content, created_at) VALUES
(1, 2, 'Welcome!', '2021-01-01'),
(1, 3, 'Nice post!', '2021-01-02'),
(2, 4, 'Beautiful photo!', '2021-02-15'),
(3, 1, 'What project?', '2021-01-10'),
(5, 1, 'I agree!', '2021-02-20'),
(6, 2, 'Looks amazing!', '2021-04-01'),
(6, 3, 'Where is this?', '2021-04-01');
INSERT INTO likes (post_id, user_id, created_at) VALUES
(1, 2, '2021-01-01'), (1, 3, '2021-01-01'), (1, 4, '2021-01-02'),
(2, 3, '2021-02-15'), (2, 4, '2021-02-15'), (2, 5, '2021-02-16'),
(3, 1, '2021-01-10'), (3, 4, '2021-01-11'),
(5, 1, '2021-02-20'), (5, 2, '2021-02-20'),
(6, 2, '2021-04-01'), (6, 3, '2021-04-01'), (6, 4, '2021-04-02'),
(7, 1, '2021-04-15'), (7, 3, '2021-04-15');
INSERT INTO follows (follower_id, following_id) VALUES
(1, 2), (1, 3), (1, 4),
(2, 1), (2, 3),
(3, 1), (3, 2), (3, 4),
(4, 1), (4, 5),
(5, 1), (5, 2), (5, 6),
(6, 5);
π Part 1: LEFT JOIN (Finding Missing Records)
Users Who Never Posted
-- LEFT JOIN to find users without posts
SELECT
u.user_id,
u.username,
u.full_name,
u.created_at AS member_since,
COUNT(p.post_id) AS post_count
FROM users u
LEFT JOIN posts p ON u.user_id = p.user_id
GROUP BY u.user_id, u.username, u.full_name, u.created_at
HAVING COUNT(p.post_id) = 0;
-- Alternative using NOT EXISTS (often faster)
SELECT u.*
FROM users u
WHERE NOT EXISTS (
SELECT 1 FROM posts p WHERE p.user_id = u.user_id
);
-- Alternative using LEFT JOIN + NULL check
SELECT u.*
FROM users u
LEFT JOIN posts p ON u.user_id = p.user_id
WHERE p.post_id IS NULL;
π‘π‘ LEFT JOIN vs NOT EXISTS
Both approaches find missing records, but:
LEFT JOIN ... IS NULL: Can be faster when posts table has proper indexesNOT EXISTS: Often clearer semantically and can be optimized differentlyNOT IN: Avoid due to NULL handling issues
π Part 2: Self JOIN
Most Liked Post Per User
-- Self JOIN to compare likes within same table
SELECT
p.user_id,
u.username,
p.post_id,
p.content,
p.post_type,
COUNT(l.like_id) AS like_count
FROM posts p
LEFT JOIN likes l ON p.post_id = l.post_id
JOIN users u ON p.user_id = u.user_id
GROUP BY p.user_id, u.username, p.post_id, p.content, p.post_type
HAVING COUNT(l.like_id) = (
SELECT COUNT(l2.like_id)
FROM posts p2
LEFT JOIN likes l2 ON p2.post_id = l2.post_id
WHERE p2.user_id = p.user_id
GROUP BY p2.post_id
ORDER BY COUNT(l2.like_id) DESC
LIMIT 1
)
ORDER BY like_count DESC;
-- Window function approach (more efficient)
WITH post_likes AS (
SELECT
p.user_id,
u.username,
p.post_id,
p.content,
COUNT(l.like_id) AS like_count,
ROW_NUMBER() OVER (
PARTITION BY p.user_id
ORDER BY COUNT(l.like_id) DESC
) AS rank
FROM posts p
LEFT JOIN likes l ON p.post_id = l.post_id
JOIN users u ON p.user_id = u.user_id
GROUP BY p.user_id, u.username, p.post_id, p.content
)
SELECT *
FROM post_likes
WHERE rank = 1
ORDER BY like_count DESC;
Users Who Liked Their Own Posts
-- Self JOIN: Find users who liked their own posts
SELECT
u.username,
p.post_id,
p.content,
l.created_at AS liked_at
FROM likes l
JOIN posts p ON l.post_id = p.post_id
JOIN users u ON l.user_id = u.user_id
WHERE l.user_id = p.user_id -- Self-referencing condition
ORDER BY u.username, p.post_id;
-- Alternative using EXISTS
SELECT
u.username,
p.post_id,
p.content
FROM users u
JOIN posts p ON u.user_id = p.user_id
WHERE EXISTS (
SELECT 1
FROM likes l
WHERE l.post_id = p.post_id
AND l.user_id = u.user_id
);
π Part 3: FULL OUTER JOIN
User Activity Comparison
-- Find all users and their activity counts (including zero activity)
SELECT
COALESCE(u.username, 'Unknown') AS username,
COALESCE(uc.post_count, 0) AS posts,
COALESCE(cc.comment_count, 0) AS comments,
COALESCE(lc.like_count, 0) AS likes,
COALESCE(fc.followers, 0) AS followers,
COALESCE(fc.following, 0) AS following
FROM users u
FULL OUTER JOIN (
SELECT user_id, COUNT(*) AS post_count
FROM posts GROUP BY user_id
) uc ON u.user_id = uc.user_id
FULL OUTER JOIN (
SELECT user_id, COUNT(*) AS comment_count
FROM comments GROUP BY user_id
) cc ON u.user_id = cc.user_id
FULL OUTER JOIN (
SELECT user_id, COUNT(*) AS like_count
FROM likes GROUP BY user_id
) lc ON u.user_id = lc.user_id
FULL OUTER JOIN (
SELECT
follower_id AS user_id,
COUNT(DISTINCT following_id) AS following,
COUNT(DISTINCT follower_id) FILTER (WHERE follower_id = following_id) AS followers
FROM follows
GROUP BY follower_id
) fc ON u.user_id = fc.user_id
ORDER BY username;
β‘ Part 4: LATERAL JOIN
βΉοΈπ LATERAL JOIN
LATERAL JOIN allows the right side of a JOIN to reference columns from the left side. It's like a correlated subquery in FROM clause. Essential for "top N per group" problems.
Top 2 Comments Per Post
-- LATERAL JOIN for top N per group
SELECT
p.post_id,
p.content AS post_content,
u.username AS post_author,
c.top_comments
FROM posts p
JOIN users u ON p.user_id = u.user_id
CROSS JOIN LATERAL (
SELECT
ARRAY_AGG(
ROW(c2.comment_id, c2.content, u2.username)::comment_info
ORDER BY c2.created_at DESC
) AS top_comments
FROM comments c2
JOIN users u2 ON c2.user_id = u2.user_id
WHERE c2.post_id = p.post_id
ORDER BY c2.created_at DESC
LIMIT 3
) c
ORDER BY p.post_id;
-- Simpler LATERAL example: Get latest 2 comments per post
SELECT
p.post_id,
p.content,
lc.latest_comments
FROM posts p
CROSS JOIN LATERAL (
SELECT
ARRAY_AGG(
json_build_object(
'comment_id', c.comment_id,
'author', u.username,
'content', c.content,
'created_at', c.created_at
)
) AS latest_comments
FROM comments c
JOIN users u ON c.user_id = u.user_id
WHERE c.post_id = p.post_id
ORDER BY c.created_at DESC
LIMIT 2
) lc;
Unnest with LATERAL
-- LATERAL with generate_series for expanding arrays
SELECT
u.user_id,
u.username,
day.date::date AS activity_date
FROM users u
CROSS JOIN LATERAL (
SELECT generate_series(
u.created_at::date,
LEAST(u.created_at::date + INTERVAL '7 days', CURRENT_DATE),
'1 day'::interval
) AS date
) day
WHERE u.user_id = 1
ORDER BY day.date;
βοΈ Part 5: CROSS JOIN
Generate All Combinations
-- CROSS JOIN to generate date x user matrix
SELECT
d.date,
u.username,
COALESCECOUNT(p.post_id) AS posts_that_day
FROM (
SELECT generate_series(
'2021-01-01'::date,
'2021-01-07'::date,
'1 day'::interval
)::date AS date
) d
CROSS JOIN users u
LEFT JOIN posts p ON DATE(p.created_at) = d.date AND p.user_id = u.user_id
GROUP BY d.date, u.username
ORDER BY d.date, u.username;
-- Product combinations
CREATE TABLE colors (color VARCHAR(20));
CREATE TABLE sizes (size VARCHAR(10));
INSERT INTO colors VALUES ('Red'), ('Blue'), ('Green');
INSERT INTO sizes VALUES ('S'), ('M'), ('L'), ('XL');
-- Generate all color x size combinations
SELECT
c.color,
s.size,
CONCAT(c.color, ' - ', s.size) AS product_name
FROM colors c
CROSS JOIN sizes s
ORDER BY c.color, s.size;
πΏ Part 6: NATURAL JOIN
β οΈβ οΈ NATURAL JOIN Warning
NATURAL JOIN automatically joins on all columns with the same name. This is dangerous because:
- Schema changes can silently change join behavior
- It's implicit and harder to read
- It can produce unexpected results with common column names (user_id, id, etc.)
-- DANGEROUS: NATURAL JOIN
SELECT * FROM posts NATURAL JOIN users;
-- Joins on user_id (common column name)
-- SAFE: Explicit JOIN
SELECT *
FROM posts p
JOIN users u ON p.user_id = u.user_id;
-- When NATURAL JOIN might be acceptable (same schema)
CREATE TABLE orders_2020 (order_id INT, customer_id INT, amount DECIMAL);
CREATE TABLE orders_2021 (order_id INT, customer_id INT, amount DECIMAL);
-- This is safer because tables have identical structure
SELECT * FROM orders_2020 NATURAL JOIN orders_2021;
π Part 7: Complex Multi-Join Scenarios
User Activity Feed
-- Activity feed combining all interactions
WITH user_activity AS (
-- Posts
SELECT
p.user_id,
u.username,
'post' AS activity_type,
p.post_id AS reference_id,
p.content AS activity_content,
p.created_at,
0 AS engagement_count
FROM posts p
JOIN users u ON p.user_id = u.user_id
UNION ALL
-- Comments
SELECT
c.user_id,
u.username,
'comment' AS activity_type,
c.post_id AS reference_id,
c.content AS activity_content,
c.created_at,
(SELECT COUNT(*) FROM likes l WHERE l.post_id = c.post_id) AS engagement_count
FROM comments c
JOIN users u ON c.user_id = u.user_id
UNION ALL
-- Likes
SELECT
l.user_id,
u.username,
'like' AS activity_type,
l.post_id AS reference_id,
'liked a post' AS activity_content,
l.created_at,
0 AS engagement_count
FROM likes l
JOIN users u ON l.user_id = u.user_id
),
activity_with_rank AS (
SELECT
*,
ROW_NUMBER() OVER (
PARTITION BY user_id
ORDER BY created_at DESC
) AS activity_rank
FROM user_activity
)
SELECT
username,
activity_type,
reference_id,
activity_content,
created_at,
engagement_count
FROM activity_with_rank
WHERE activity_rank <= 10
ORDER BY username, created_at DESC;
Engagement Metrics
-- Comprehensive engagement metrics using multiple JOINs
SELECT
u.username,
COUNT(DISTINCT p.post_id) AS total_posts,
COUNT(DISTINCT c.comment_id) AS total_comments,
COUNT(DISTINCT l.like_id) AS total_likes,
COUNT(DISTINCT f1.following_id) AS following_count,
COUNT(DISTINCT f2.follower_id) AS follower_count,
ROUND(
(COUNT(DISTINCT l.like_id) + COUNT(DISTINCT c.comment_id))::DECIMAL /
NULLIF(COUNT(DISTINCT p.post_id), 0),
2
) AS avg_engagement_per_post
FROM users u
LEFT JOIN posts p ON u.user_id = p.user_id
LEFT JOIN comments c ON u.user_id = c.user_id
LEFT JOIN likes l ON u.user_id = l.user_id
LEFT JOIN follows f1 ON u.user_id = f1.follower_id
LEFT JOIN follows f2 ON u.user_id = f2.following_id
GROUP BY u.user_id, u.username
ORDER BY total_posts DESC;
β±οΈ Complexity Analysis
| JOIN Type | Time Complexity | Space Complexity | Use Case |
|---|---|---|---|
| INNER JOIN | O(n * m) worst | O(min(n,m)) | Matching records |
| LEFT JOIN | O(n * m) worst | O(n) | All left + matches |
| FULL JOIN | O(n * m) worst | O(n + m) | All records both |
| CROSS JOIN | O(n * m) | O(n * m) | Cartesian product |
| LATERAL | Varies | Depends on subquery | Top N per group |
| Self JOIN | O(n * m) worst | O(n) | Self-referencing |
π― Quiz Section
π Best Practices for Interviews
π‘β JOIN Best Practices
1. Always Specify Join Condition Explicitly:
-- BAD: NATURAL JOIN
SELECT * FROM posts NATURAL JOIN users;
-- GOOD: Explicit condition
SELECT * FROM posts p JOIN users u ON p.user_id = u.user_id;
2. Use Table Aliases for Readability:
-- BAD
SELECT users.username, posts.content FROM users JOIN posts ON users.user_id = posts.user_id;
-- GOOD
SELECT u.username, p.content FROM users u JOIN posts p ON u.user_id = p.user_id;
3. Consider Index Usage:
-- Ensure join columns are indexed
CREATE INDEX idx_posts_user_id ON posts(user_id);
CREATE INDEX idx_likes_post_id ON likes(post_id);
4. Watch for Duplicate Rows:
-- One-to-many JOIN can multiply rows
SELECT u.username, COUNT(p.post_id)
FROM users u
JOIN posts p ON u.user_id = p.user_id
GROUP BY u.user_id, u.username;
5. Use LATERAL for Top-N Per Group:
-- Instead of complex window functions
SELECT p.*, lc.top_comments
FROM posts p
CROSS JOIN LATERAL (
SELECT ARRAY_AGG(c.*) AS top_comments
FROM comments c WHERE c.post_id = p.post_id
LIMIT 3
) lc;
π Common Follow-Up Questions
- "When would you use a FULL JOIN?" β Comparing two datasets, finding differences
- "How do you optimize multi-table JOINs?" β Index join columns, filter early, reduce result set
- "What is the difference between INNER and CROSS JOIN?" β INNER has ON condition, CROSS is Cartesian
- "How do you handle many-to-many relationships?" β Junction/bridge table with two JOINs
β οΈβ οΈ Interview Tip
When writing JOIN queries in interviews, always:
- Identify the relationship type (1:1, 1:N, M:N)
- Choose the appropriate JOIN type
- Verify with sample data that results are correct
- Consider NULL handling edge cases