πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Topic: Complex Analytical Queries for FAANG Interviews

SQL AdvancedAnalytical Queries⭐ Premium

Advertisement

πŸ“Š Complex Analytical Queries

Meta & Uber Interview Deep Dive

🏒 Meta🏒 Uber⚑ Difficulty: Hard⏱️ 50 min

πŸ“‹ Interview Question

β„ΉοΈπŸ”΄ Meta/Uber Interview Question

"Analyze user engagement for a social media platform. Write queries to: 1) Calculate monthly retention cohorts, 2) Identify power users vs churned users, 3) Build a conversion funnel from signup to first post, 4) Calculate rolling 7-day active users."

Companies: Meta, Uber | Difficulty: Hard | Time: 50 minutes

πŸ“Š Setup: Social Media Schema

CREATE TABLE users (
    user_id SERIAL PRIMARY KEY,
    signup_date DATE,
    signup_source VARCHAR(50),
    country VARCHAR(50),
    age_group VARCHAR(20)
);

CREATE TABLE daily_activity (
    user_id INT,
    activity_date DATE,
    posts_created INT DEFAULT 0,
    comments_made INT DEFAULT 0,
    likes_given INT DEFAULT 0,
    minutes_active DECIMAL(5, 2),
    PRIMARY KEY (user_id, activity_date)
);

CREATE TABLE posts (
    post_id SERIAL PRIMARY KEY,
    user_id INT,
    created_at TIMESTAMP,
    post_type VARCHAR(20),
    engagement_score DECIMAL(5, 2)
);

CREATE TABLE onboarding_events (
    user_id INT,
    event_type VARCHAR(50),
    event_timestamp TIMESTAMP,
    completed BOOLEAN DEFAULT false
);

-- Insert sample data
INSERT INTO users (signup_date, signup_source, country, age_group)
SELECT
    (DATE '2023-01-01' + (random() * 365)::INT),
    CASE (random() * 3)::INT
        WHEN 0 THEN 'organic'
        WHEN 1 THEN 'paid_ad'
        WHEN 2 THEN 'referral'
        ELSE 'social_media'
    END,
    CASE (random() * 4)::INT
        WHEN 0 THEN 'USA'
        WHEN 1 THEN 'UK'
        WHEN 2 THEN 'Canada'
        ELSE 'Germany'
    END,
    CASE (random() * 3)::INT
        WHEN 0 THEN '18-24'
        WHEN 1 THEN '25-34'
        ELSE '35+'
    END
FROM generate_series(1, 100000);

-- Insert daily activity
INSERT INTO daily_activity (user_id, activity_date, posts_created, comments_made, likes_given, minutes_active)
SELECT
    (random() * 99999 + 1)::INT,
    DATE '2023-01-01' + (random() * 365)::INT,
    (random() * 5)::INT,
    (random() * 20)::INT,
    (random() * 50)::INT,
    (random() * 120)::DECIMAL(5,2)
FROM generate_series(1, 5000000);

πŸ“… Part 1: Cohort Analysis

β„ΉοΈπŸ” Cohort Analysis

Cohort analysis groups users by a shared characteristic (signup date, acquisition source) and tracks their behavior over time. Essential for understanding retention and engagement patterns.

Monthly Retention Cohort

-- Monthly retention cohort analysis
WITH cohort_base AS (
    -- Step 1: Define cohorts by signup month
    SELECT
        user_id,
        DATE_TRUNC('month', signup_date)::DATE AS cohort_month
    FROM users
),
activity_with_cohort AS (
    -- Step 2: Join with activity to get activity month
    SELECT
        cb.user_id,
        cb.cohort_month,
        DATE_TRUNC('month', da.activity_date)::DATE AS activity_month
    FROM cohort_base cb
    JOIN daily_activity da ON cb.user_id = da.user_id
    GROUP BY cb.user_id, cb.cohort_month, DATE_TRUNC('month', da.activity_date)
),
cohort_size AS (
    -- Step 3: Count cohort sizes
    SELECT
        cohort_month,
        COUNT(DISTINCT user_id) AS cohort_users
    FROM cohort_base
    GROUP BY cohort_month
),
retention AS (
    -- Step 4: Calculate retention
    SELECT
        awc.cohort_month,
        awc.activity_month,
        EXTRACT(MONTH FROM AGE(awc.activity_month, awc.cohort_month))::INT AS months_since_signup,
        COUNT(DISTINCT awc.user_id) AS active_users
    FROM activity_with_cohort awc
    GROUP BY awc.cohort_month, awc.activity_month
)
SELECT
    r.cohort_month,
    cs.cohort_users,
    r.months_since_signup,
    r.active_users,
    ROUND(r.active_users * 100.0 / cs.cohort_users, 2) AS retention_rate
FROM retention r
JOIN cohort_size cs ON r.cohort_month = cs.cohort_month
ORDER BY r.cohort_month, r.months_since_signup;

Cohort Pivot Table

-- Create retention pivot table
WITH cohort_data AS (
    SELECT
        DATE_TRUNC('month', u.signup_date)::DATE AS cohort_month,
        EXTRACT(MONTH FROM AGE(DATE_TRUNC('month', da.activity_date), DATE_TRUNC('month', u.signup_date)))::INT AS months_since,
        COUNT(DISTINCT u.user_id) AS active_users
    FROM users u
    JOIN daily_activity da ON u.user_id = da.user_id
    GROUP BY 1, 2
),
cohort_sizes AS (
    SELECT
        DATE_TRUNC('month', signup_date)::DATE AS cohort_month,
        COUNT(*) AS size
    FROM users
    GROUP BY 1
)
SELECT
    cd.cohort_month,
    cs.size AS cohort_size,
    ROUND(MAX(CASE WHEN cd.months_since = 0 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 0",
    ROUND(MAX(CASE WHEN cd.months_since = 1 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 1",
    ROUND(MAX(CASE WHEN cd.months_since = 2 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 2",
    ROUND(MAX(CASE WHEN cd.months_since = 3 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 3",
    ROUND(MAX(CASE WHEN cd.months_since = 6 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 6",
    ROUND(MAX(CASE WHEN cd.months_since = 12 THEN cd.active_users * 100.0 / cs.size END), 1) AS "Month 12"
FROM cohort_data cd
JOIN cohort_sizes cs ON cd.cohort_month = cs.cohort_month
GROUP BY cd.cohort_month, cs.size
ORDER BY cd.cohort_month;

πŸ”„ Part 2: User Classification (Power Users vs Churned)

-- Classify users based on activity patterns
WITH user_stats AS (
    SELECT
        u.user_id,
        u.signup_date,
        COUNT(DISTINCT da.activity_date) AS active_days,
        SUM(da.posts_created) AS total_posts,
        SUM(da.comments_made) AS total_comments,
        SUM(da.likes_given) AS total_likes,
        SUM(da.minutes_active) AS total_minutes,
        MAX(da.activity_date) AS last_active_date,
        MIN(da.activity_date) AS first_active_date
    FROM users u
    LEFT JOIN daily_activity da ON u.user_id = da.user_id
    GROUP BY u.user_id, u.signup_date
),
user_classification AS (
    SELECT
        *,
        CURRENT_DATE - last_active_date AS days_since_last_active,
        CURRENT_DATE - signup_date AS account_age_days,
        CASE
            WHEN active_days IS NULL THEN 'Never Active'
            WHEN CURRENT_DATE - last_active_date > 90 THEN 'Churned'
            WHEN CURRENT_DATE - last_active_date > 30 THEN 'At Risk'
            WHEN total_posts > 100 AND active_days > 200 THEN 'Power User'
            WHEN total_posts > 50 AND active_days > 100 THEN 'Engaged'
            WHEN active_days > 30 THEN 'Regular'
            ELSE 'Casual'
        END AS user_segment,
        CASE
            WHEN total_minutes / NULLIF(active_days, 0) > 60 THEN 'High Intensity'
            WHEN total_minutes / NULLIF(active_days, 0) > 30 THEN 'Medium Intensity'
            ELSE 'Low Intensity'
        END AS usage_intensity
    FROM user_stats
)
SELECT
    user_segment,
    usage_intensity,
    COUNT(*) AS user_count,
    ROUND(AVG(account_age_days), 0) AS avg_account_age,
    ROUND(AVG(active_days), 1) AS avg_active_days,
    ROUND(AVG(total_posts), 1) AS avg_posts,
    ROUND(AVG(total_minutes), 1) AS avg_minutes
FROM user_classification
GROUP BY user_segment, usage_intensity
ORDER BY user_count DESC;

πŸ”€ Part 3: Conversion Funnel Analysis

-- Conversion funnel from signup to engagement
WITH funnel_events AS (
    SELECT
        u.user_id,
        u.signup_date,
        MAX(CASE WHEN oe.event_type = 'profile_created' THEN 1 ELSE 0 END) AS profile_created,
        MAX(CASE WHEN oe.event_type = 'first_friend_added' THEN 1 ELSE 0 END) AS friend_added,
        MAX(CASE WHEN oe.event_type = 'first_post_created' THEN 1 ELSE 0 END) AS post_created,
        MAX(CASE WHEN oe.event_type = 'first_comment' THEN 1 ELSE 0 END) AS first_comment,
        MAX(CASE WHEN oe.event_type = 'first_like' THEN 1 ELSE 0 END) AS first_like
    FROM users u
    LEFT JOIN onboarding_events oe ON u.user_id = oe.user_id
    GROUP BY u.user_id, u.signup_date
),
funnel_counts AS (
    SELECT
        COUNT(*) AS total_signups,
        SUM(profile_created) AS profile_created_count,
        SUM(friend_added) AS friend_added_count,
        SUM(post_created) AS post_created_count,
        SUM(first_comment) AS first_comment_count,
        SUM(first_like) AS first_like_count
    FROM funnel_events
)
SELECT
    'Total Signups' AS stage,
    total_signups AS count,
    100.0 AS conversion_rate
FROM funnel_counts

UNION ALL

SELECT
    'Profile Created' AS stage,
    profile_created_count,
    ROUND(profile_created_count * 100.0 / total_signups, 2)
FROM funnel_counts

UNION ALL

SELECT
    'Friend Added' AS stage,
    friend_added_count,
    ROUND(friend_added_count * 100.0 / total_signups, 2)
FROM funnel_counts

UNION ALL

SELECT
    'Post Created' AS stage,
    post_created_count,
    ROUND(post_created_count * 100.0 / total_signups, 2)
FROM funnel_counts

UNION ALL

SELECT
    'First Comment' AS stage,
    first_comment_count,
    ROUND(first_comment_count * 100.0 / total_signups, 2)
FROM funnel_counts

UNION ALL

SELECT
    'First Like' AS stage,
    first_like_count,
    ROUND(first_like_count * 100.0 / total_signups, 2)
FROM funnel_counts;

Funnel by Acquisition Source

-- Conversion funnel by signup source
WITH funnel AS (
    SELECT
        u.signup_source,
        COUNT(DISTINCT u.user_id) AS signups,
        COUNT(DISTINCT CASE WHEN oe.event_type = 'first_post_created' THEN u.user_id END) AS post_creators,
        COUNT(DISTINCT CASE WHEN da.user_id IS NOT NULL THEN u.user_id END) AS active_users
    FROM users u
    LEFT JOIN onboarding_events oe ON u.user_id = oe.user_id AND oe.event_type = 'first_post_created'
    LEFT JOIN daily_activity da ON u.user_id = da.user_id
    GROUP BY u.signup_source
)
SELECT
    signup_source,
    signups,
    post_creators,
    active_users,
    ROUND(post_creators * 100.0 / signups, 2) AS post_creation_rate,
    ROUND(active_users * 100.0 / signups, 2) AS activation_rate
FROM funnel
ORDER BY activation_rate DESC;

πŸ“ˆ Part 4: Rolling 7-Day Active Users (DAU/MAU)

-- Rolling 7-day active users
WITH daily_users AS (
    SELECT
        activity_date,
        COUNT(DISTINCT user_id) AS dau
    FROM daily_activity
    GROUP BY activity_date
)
SELECT
    activity_date,
    dau,
    AVG(dau) OVER (
        ORDER BY activity_date
        ROWS BETWEEN 6 PRECEDING AND CURRENT ROW
    ) AS rolling_7d_avg,
    AVG(dau) OVER (
        ORDER BY activity_date
        ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
    ) AS rolling_30d_avg,
    dau * 1.0 / NULLIF(AVG(dau) OVER (
        ORDER BY activity_date
        ROWS BETWEEN 29 PRECEDING AND CURRENT ROW
    ), 0) AS mau_ratio
FROM daily_users
ORDER BY activity_date;

DAU/MAU Stickiness

-- Calculate stickiness (DAU/MAU ratio)
WITH monthly_active AS (
    SELECT
        user_id,
        DATE_TRUNC('month', activity_date)::DATE AS month,
        COUNT(DISTINCT activity_date) AS active_days
    FROM daily_activity
    GROUP BY user_id, DATE_TRUNC('month', activity_date)
),
daily_active AS (
    SELECT
        activity_date,
        user_id
    FROM daily_activity
),
stickiness AS (
    SELECT
        da.activity_date,
        COUNT(DISTINCT da.user_id) AS dau,
        (SELECT COUNT(DISTINCT user_id)
         FROM monthly_active
         WHERE month = DATE_TRUNC('month', da.activity_date)) AS mau
    FROM daily_active da
    GROUP BY da.activity_date
)
SELECT
    activity_date,
    dau,
    mau,
    ROUND(dau * 100.0 / NULLIF(mau, 0), 2) AS stickiness_pct
FROM stickiness
WHERE activity_date >= '2023-06-01'
ORDER BY activity_date;

πŸ† Part 5: Percentile and Distribution Analysis

-- User engagement distribution
WITH user_engagement AS (
    SELECT
        user_id,
        SUM(minutes_active) AS total_minutes,
        SUM(posts_created) AS total_posts,
        SUM(comments_made) AS total_comments,
        COUNT(DISTINCT activity_date) AS active_days
    FROM daily_activity
    GROUP BY user_id
)
SELECT
    user_id,
    total_minutes,
    total_posts,
    active_days,
    NTILE(10) OVER (ORDER BY total_minutes) AS engagement_decile,
    PERCENT_RANK() OVER (ORDER BY total_minutes) AS percentile_rank,
    CUME_DIST() OVER (ORDER BY total_minutes) AS cumulative_distribution,
    CASE
        WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.9 THEN 'Top 10%'
        WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.75 THEN 'Top 25%'
        WHEN PERCENT_RANK() OVER (ORDER BY total_minutes) >= 0.5 THEN 'Top 50%'
        ELSE 'Bottom 50%'
    END AS user_tier
FROM user_engagement
ORDER BY total_minutes DESC
LIMIT 100;

πŸ“Š Part 6: Growth Metrics

-- Month-over-month growth
WITH monthly_metrics AS (
    SELECT
        DATE_TRUNC('month', signup_date)::DATE AS signup_month,
        COUNT(*) AS new_users,
        SUM(SUM(COUNT(*))) OVER (ORDER BY DATE_TRUNC('month', signup_date)) AS cumulative_users
    FROM users
    GROUP BY DATE_TRUNC('month', signup_date)
)
SELECT
    signup_month,
    new_users,
    cumulative_users,
    ROUND(
        (new_users - LAG(new_users) OVER (ORDER BY signup_month)) * 100.0 /
        NULLIF(LAG(new_users) OVER (ORDER BY signup_month), 0),
        2
    ) AS growth_rate_pct
FROM monthly_metrics
ORDER BY signup_month;

Net Revenue Retention

-- Calculate net revenue retention by cohort
WITH monthly_revenue AS (
    SELECT
        u.user_id,
        DATE_TRUNC('month', u.signup_date)::DATE AS cohort_month,
        DATE_TRUNC('month', da.activity_date)::DATE AS activity_month,
        SUM(da.posts_created * 0.01 + da.comments_made * 0.005 + da.likes_given * 0.001) AS estimated_revenue
    FROM users u
    JOIN daily_activity da ON u.user_id = da.user_id
    GROUP BY 1, 2, 3
),
cohort_revenue AS (
    SELECT
        cohort_month,
        activity_month,
        EXTRACT(MONTH FROM AGE(activity_month, cohort_month))::INT AS month_number,
        SUM(estimated_revenue) AS total_revenue,
        COUNT(DISTINCT user_id) AS active_paying_users
    FROM monthly_revenue
    GROUP BY 1, 2, 3
)
SELECT
    cohort_month,
    month_number,
    total_revenue,
    active_paying_users,
    SUM(total_revenue) OVER (
        PARTITION BY cohort_month
        ORDER BY month_number
    ) AS cumulative_revenue
FROM cohort_revenue
ORDER BY cohort_month, month_number;

🎯 Quiz Section

πŸ† Best Practices for Interviews

πŸ’‘βœ… Analytical Query Best Practices

1. Define Metrics Clearly:

-- Document metric definitions
-- DAU: Users with any activity on a given day
-- MAU: Users with any activity in a calendar month
-- Retention: Users active N months after signup

2. Handle Edge Cases:

-- Always handle NULLs and zero divisions
ROUND(count * 100.0 / NULLIF(total, 0), 2) AS percentage

-- Handle users with no activity
COALESCE(SUM(posts), 0) AS total_posts

3. Use CTEs for Complex Calculations:

-- Break complex queries into logical steps
WITH
step1_raw_data AS (...),
step2_aggregated AS (...),
step3_calculated AS (...)
SELECT * FROM step3_calculated;

4. Validate Results:

-- Sanity check: sum of parts should equal total
SELECT
    SUM(active_users) AS total_active,
    (SELECT COUNT(DISTINCT user_id) FROM daily_activity) AS actual_distinct
FROM monthly_active;

⚠️⚠️ Common Pitfalls

  1. Survivorship bias: Only analyzing users who completed signup
  2. Time zone issues: Mixing UTC and local dates
  3. Double counting: Same user appearing multiple times
  4. Missing data: Users with no activity vs users who never signed up

Advertisement