π Complex Analytical Queries
Meta & Uber Interview Deep Dive
π Interview Question
βΉοΈπ΄ Meta/Uber Interview Question
"Analyze user engagement for a social media platform. Write queries to: 1) Calculate monthly retention cohorts, 2) Identify power users vs churned users, 3) Build a conversion funnel from signup to first post, 4) Calculate rolling 7-day active users."
Companies: Meta, Uber | Difficulty: Hard | Time: 50 minutes
π Setup: Social Media Schema
CREATE TABLE users (
user_id SERIAL PRIMARY KEY,
signup_date DATE,
signup_source VARCHAR(50),
country VARCHAR(50),
age_group VARCHAR(20)
);
CREATE TABLE daily_activity (
user_id INT,
activity_date DATE,
posts_created INT DEFAULT 0,
comments_made INT DEFAULT 0,
likes_given INT DEFAULT 0,
minutes_active DECIMAL(5, 2),
PRIMARY KEY (user_id, activity_date)
);
CREATE TABLE posts (
post_id SERIAL PRIMARY KEY,
user_id INT,
created_at TIMESTAMP,
post_type VARCHAR(20),
engagement_score DECIMAL(5, 2)
);
CREATE TABLE onboarding_events (
user_id INT,
event_type VARCHAR(50),
event_timestamp TIMESTAMP,
completed BOOLEAN DEFAULT false
);
-- Insert sample data
INSERT INTO users (signup_date, signup_source, country, age_group)
SELECT
(DATE '2023-01-01' + (random() * 365)::INT),
CASE (random() * 3)::INT
WHEN 0 THEN 'organic'
WHEN 1 THEN 'paid_ad'
WHEN 2 THEN 'referral'
ELSE 'social_media'
END,
CASE (random() * 4)::INT
WHEN 0 THEN 'USA'
WHEN 1 THEN 'UK'
WHEN 2 THEN 'Canada'
ELSE 'Germany'
END,
CASE (random() * 3)::INT
WHEN 0 THEN '18-24'
WHEN 1 THEN '25-34'
ELSE '35+'
END
FROM generate_series(1, 100000);
-- Insert daily activity
INSERT INTO daily_activity (user_id, activity_date, posts_created, comments_made, likes_given, minutes_active)
SELECT
(random() * 99999 + 1)::INT,
DATE '2023-01-01' + (random() * 365)::INT,
(random() * 5)::INT,
(random() * 20)::INT,
(random() * 50)::INT,
(random() * 120)::DECIMAL(5,2)
FROM generate_series(1, 5000000);
π Part 1: Cohort Analysis
βΉοΈπ Cohort Analysis
Cohort analysis groups users by a shared characteristic (signup date, acquisition source) and tracks their behavior over time. Essential for understanding retention and engagement patterns.
Monthly Retention Cohort
-- Monthly retention cohort analysis
WITH cohort_base AS (
-- Step 1: Define cohorts by signup month
SELECT
user_id,
DATE_TRUNC('month', signup_date)::DATE AS cohort_month
FROM users
),
activity_with_cohort AS (
-- Step 2: Join with activity to get activity month
SELECT
cb.user_id,
cb.cohort_month,
DATE_TRUNC('month', da.activity_date)::DATE AS activity_month
FROM cohort_base cb
JOIN daily_activity da ON cb.user_id = da.user_id
GROUP BY cb.user_id, cb.cohort_month, DATE_TRUNC('month', da.activity_date)
),
cohort_size AS (
-- Step 3: Count cohort sizes
SELECT
cohort_month,
COUNT(DISTINCT user_id) AS cohort_users
FROM cohort_base
GROUP BY cohort_month
),
retention AS (
-- Step 4: Calculate retention
SELECT
awc.cohort_month,
awc.activity_month,
EXTRACT(MONTH FROM AGE(awc.activity_month, awc.cohort_month))::INT AS months_since_signup,
COUNT(DISTINCT awc.user_id) AS active_users
FROM activity_with_cohort awc
GROUP BY awc.cohort_month, awc.activity_month
)
SELECT
r.cohort_month,
cs.cohort_users,
r.months_since_signup,
r.active_users,
ROUND(r.active_users * 100.0 / cs.cohort_users, 2) AS retention_rate
FROM retention r
JOIN cohort_size cs ON r.cohort_month = cs.cohort_month
ORDER BY r.cohort_month, r.months_since_signup;
Cohort Pivot Table
-- Create retention pivot table
WITH cohort_data AS (
SELECT
DATE_TRUNC('month', u.signup_date)::DATE AS cohort_month,
EXTRACT(MONTH FROM AGE(DATE_TRUNC('month', da.activity_date), DATE_TRUNC('month', u.signup_date)))::INT AS months_since,
COUNT(DISTINCT u.user_id) AS active_users
FROM users u
JOIN daily_activity da ON u.user_id = da.user_id
GROUP BY 1, 2
),
cohort_sizes AS (
SELECT
DATE_TRUNC('month', signup_date)::DATE AS cohort_month,
COUNT(*) AS size
FROM users
GROUP BY 1
)
SELECT
cd.cohort_month,
cs.size AS cohort_size,
ROUND(MAX(CASE WHEN cd.months_since = 0 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 0",
ROUND(MAX(CASE WHEN cd.months_since = 1 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 1",
ROUND(MAX(CASE WHEN cd.months_since = 2 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 2",
ROUND(MAX(CASE WHEN cd.months_since = 3 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 3",
ROUND(MAX(CASE WHEN cd.months_since = 6 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 6",
ROUND(MAX(CASE WHEN cd.months_since = 12 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 12"
FROM cohort_data cd
JOIN cohort_sizes cs ON cd.cohort_month = cs.cohort_month
GROUP BY cd.cohort_month, cs.size
ORDER BY cd.cohort_month;
π Part 2: User Classification (Power Users vs Churned)
-- Classify users based on activity patterns
WITH user_stats AS (
SELECT
u.user_id,
u.signup_date,
COUNT(DISTINCT da.activity_date) AS active_days,
SUM(da.posts_created) AS total_posts,
SUM(da.comments_made) AS total_comments,
SUM(da.likes_given) AS total_likes,
SUM(da.minutes_active) AS total_minutes,
MAX(da.activity_date) AS last_active_date,
MIN(da.activity_date) AS first_active_date
FROM users u
LEFT JOIN daily_activity da ON u.user_id = da.user_id
GROUP BY u.user_id, u.signup_date
),
user_classification AS (
SELECT
*,
CURRENT_DATE - last_active_date AS days_since_last_active,
CURRENT_DATE - signup_date AS account_age_days,
CASE
WHEN active_days IS NULL THEN 'Never Active'
WHEN CURRENT_DATE - last_active_date > 90 THEN 'Churned'
WHEN CURRENT_DATE - last_active_date > 30 THEN 'At Risk'
WHEN total_posts > 100 AND active_days > 200 THEN 'Power User'
WHEN total_posts > 50 AND active_days > 100 THEN 'Engaged'
WHEN active_days > 30 THEN 'Regular'
ELSE 'Casual'
END AS user_segment,
CASE
WHEN total_minutes / NULLIF(active_days, 0) > 60 THEN 'High Intensity'
WHEN total_minutes / NULLIF(active_days, 0) > 30 THEN 'Medium Intensity'
ELSE 'Low Intensity'
END AS usage_intensity
FROM user_stats
)
SELECT
user_segment,
usage_intensity,
COUNT(*) AS user_count,
ROUND(AVG(account_age_days), 0) AS avg_account_age,
ROUND(AVG(active_days), 1) AS avg_active_days,
ROUND(AVG(total_posts), 1) AS avg_posts,
ROUND(AVG(total_minutes), 1) AS avg_minutes
FROM user_classification
GROUP BY user_segment, usage_intensity
ORDER BY user_count DESC;
π Part 3: Conversion Funnel Analysis
-- Conversion funnel from signup to engagement
WITH funnel_events AS (
SELECT
u.user_id,
u.signup_date,
MAX(CASE WHEN oe.event_type = 'profile_created' THEN 1 ELSE 0 END) AS profile_created,
MAX(CASE WHEN oe.event_type = 'first_friend_added' THEN 1 ELSE 0 END) AS friend_added,
MAX(CASE WHEN oe.event_type = 'first_post_created' THEN 1 ELSE 0 END) AS post_created,
MAX(CASE WHEN oe.event_type = 'first_comment' THEN 1 ELSE 0 END) AS first_comment,
MAX(CASE WHEN oe.event_type = 'first_like' THEN 1 ELSE 0 END) AS first_like
FROM users u
LEFT JOIN onboarding_events oe ON u.user_id = oe.user_id
GROUP BY u.user_id, u.signup_date
),
funnel_counts AS (
SELECT
COUNT(*) AS total_signups,
SUM(profile_created) AS profile_created_count,
SUM(friend_added) AS friend_added_count,
SUM(post_created) AS post_created_count,
SUM(first_comment) AS first_comment_count,
SUM(first_like) AS first_like_count
FROM funnel_events
)
SELECT
'Total Signups' AS stage,
total_signups AS count,
100.0 AS conversion_rate
FROM funnel_counts
UNION ALL
SELECT
'Profile Created' AS stage,
profile_created_count,
ROUND(profile_created_count * 100.0 / total_signups, 2)
FROM funnel_counts
UNION ALL
SELECT
'Friend Added' AS stage,
friend_added_count,
ROUND(friend_added_count * 100.0 / total_signups, 2)
FROM funnel_counts
UNION ALL
SELECT
'Post Created' AS stage,
post_created_count,
ROUND(post_created_count * 100.0 / total_signups, 2)
FROM funnel_counts
UNION ALL
SELECT
'First Comment' AS stage,
first_comment_count,
ROUND(first_comment_count * 100.0 / total_signups, 2)
FROM funnel_counts
UNION ALL
SELECT
'First Like' AS stage,
first_like_count,
ROUND(first_like_count * 100.0 / total_signups, 2)
FROM funnel_counts;
Funnel by Acquisition Source
-- Conversion funnel by signup source
WITH funnel AS (
SELECT
u.signup_source,
COUNT(DISTINCT u.user_id) AS signups,
COUNT(DISTINCT CASE WHEN oe.event_type = 'first_post_created' THEN u.user_id END) AS post_creators,
COUNT(DISTINCT CASE WHEN da.user_id IS NOT NULL THEN u.user_id END) AS active_users
FROM users u
LEFT JOIN onboarding_events oe ON u.user_id = oe.user_id AND oe.event_type = 'first_post_created'
LEFT JOIN daily_activity da ON u.user_id = da.user_id
GROUP BY u.signup_source
)
SELECT
signup_source,
signups,
post_creators,
active_users,
ROUND(post_creators * 100.0 / signups, 2) AS post_creation_rate,
ROUND(active_users * 100.0 / signups, 2) AS activation_rate
FROM funnel
ORDER BY activation_rate DESC;
π Part 4: Rolling 7-Day Active Users (DAU/MAU)
-- Rolling 7-day active users
WITH daily_users AS (
SELECT
activity_date,
COUNT(DISTINCT user_id) AS dau
FROM daily_activity
GROUP BY activity_date
)
SELECT
activity_date,
dau,
AVG(dau) OVER (
ORDER BY activity_date
ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
) AS rolling_7d_avg,
AVG(dau) OVER (
ORDER BY activity_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
) AS rolling_30d_avg,
dau * 1.0 / NULLIF(AVG(dau) OVER (
ORDER BY activity_date
ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
), 0) AS mau_ratio
FROM daily_users
ORDER BY activity_date;
DAU/MAU Stickiness
-- Calculate stickiness (DAU/MAU ratio)
WITH monthly_active AS (
SELECT
user_id,
DATE_TRUNC('month', activity_date)::DATE AS month,
COUNT(DISTINCT activity_date) AS active_days
FROM daily_activity
GROUP BY user_id, DATE_TRUNC('month', activity_date)
),
daily_active AS (
SELECT
activity_date,
user_id
FROM daily_activity
),
stickiness AS (
SELECT
da.activity_date,
COUNT(DISTINCT da.user_id) AS dau,
(SELECT COUNT(DISTINCT user_id)
FROM monthly_active
WHERE month = DATE_TRUNC('month', da.activity_date)) AS mau
FROM daily_active da
GROUP BY da.activity_date
)
SELECT
activity_date,
dau,
mau,
ROUND(dau * 100.0 / NULLIF(mau, 0), 2) AS stickiness_pct
FROM stickiness
WHERE activity_date >= '2023-06-01'
ORDER BY activity_date;
π Part 5: Percentile and Distribution Analysis
-- User engagement distribution
WITH user_engagement AS (
SELECT
user_id,
SUM(minutes_active) AS total_minutes,
SUM(posts_created) AS total_posts,
SUM(comments_made) AS total_comments,
COUNT(DISTINCT activity_date) AS active_days
FROM daily_activity
GROUP BY user_id
)
SELECT
user_id,
total_minutes,
total_posts,
active_days,
NTILE(10) OVER (ORDER BY total_minutes) AS engagement_decile,
PERCENT_RANK() OVER (ORDER BY total_minutes) AS percentile_rank,
CUME_DIST() OVER (ORDER BY total_minutes) AS cumulative_distribution,
CASE
WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.9 THEN 'Top 10%'
WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.75 THEN 'Top 25%'
WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.5 THEN 'Top 50%'
ELSE 'Bottom 50%'
END AS user_tier
FROM user_engagement
ORDER BY total_minutes DESC
LIMIT 100;
π Part 6: Growth Metrics
-- Month-over-month growth
WITH monthly_metrics AS (
SELECT
DATE_TRUNC('month', signup_date)::DATE AS signup_month,
COUNT(*) AS new_users,
SUM(SUM(COUNT(*))) OVER (ORDER BY DATE_TRUNC('month', signup_date)) AS cumulative_users
FROM users
GROUP BY DATE_TRUNC('month', signup_date)
)
SELECT
signup_month,
new_users,
cumulative_users,
ROUND(
(new_users - LAG(new_users) OVER (ORDER BY signup_month)) * 100.0 /
NULLIF(LAG(new_users) OVER (ORDER BY signup_month), 0),
2
) AS growth_rate_pct
FROM monthly_metrics
ORDER BY signup_month;
Net Revenue Retention
-- Calculate net revenue retention by cohort
WITH monthly_revenue AS (
SELECT
u.user_id,
DATE_TRUNC('month', u.signup_date)::DATE AS cohort_month,
DATE_TRUNC('month', da.activity_date)::DATE AS activity_month,
SUM(da.posts_created * 0.01 + da.comments_made * 0.005 + da.likes_given * 0.001) AS estimated_revenue
FROM users u
JOIN daily_activity da ON u.user_id = da.user_id
GROUP BY 1, 2, 3
),
cohort_revenue AS (
SELECT
cohort_month,
activity_month,
EXTRACT(MONTH FROM AGE(activity_month, cohort_month))::INT AS month_number,
SUM(estimated_revenue) AS total_revenue,
COUNT(DISTINCT user_id) AS active_paying_users
FROM monthly_revenue
GROUP BY 1, 2, 3
)
SELECT
cohort_month,
month_number,
total_revenue,
active_paying_users,
SUM(total_revenue) OVER (
PARTITION BY cohort_month
ORDER BY month_number
) AS cumulative_revenue
FROM cohort_revenue
ORDER BY cohort_month, month_number;
π― Quiz Section
π Best Practices for Interviews
π‘β Analytical Query Best Practices
1. Define Metrics Clearly:
-- Document metric definitions
-- DAU: Users with any activity on a given day
-- MAU: Users with any activity in a calendar month
-- Retention: Users active N months after signup
2. Handle Edge Cases:
-- Always handle NULLs and zero divisions
ROUND(count * 100.0 / NULLIF(total, 0), 2) AS percentage
-- Handle users with no activity
COALESCE(SUM(posts), 0) AS total_posts
3. Use CTEs for Complex Calculations:
-- Break complex queries into logical steps
WITH
step1_raw_data AS (...),
step2_aggregated AS (...),
step3_calculated AS (...)
SELECT * FROM step3_calculated;
4. Validate Results:
-- Sanity check: sum of parts should equal total
SELECT
SUM(active_users) AS total_active,
(SELECT COUNT(DISTINCT user_id) FROM daily_activity) AS actual_distinct
FROM monthly_active;
β οΈβ οΈ Common Pitfalls
- Survivorship bias: Only analyzing users who completed signup
- Time zone issues: Mixing UTC and local dates
- Double counting: Same user appearing multiple times
- Missing data: Users with no activity vs users who never signed up