CW

Snowflake Notebooks

Free Lesson

Advertisement

Snowflake Notebooks

Snowflake Notebooks provide an interactive environment for data exploration, analysis, and visualization directly within the Snowflake platform.

Creating Notebooks

SQL Cells

-- SQL cell for data exploration
SELECT
  table_name,
  column_name,
  data_type,
  is_nullable
FROM INFORMATION_SCHEMA.COLUMNS
WHERE table_schema = 'PRODUCTION'
ORDER BY table_name, ordinal_position;

-- Aggregate query
SELECT
  order_date,
  COUNT(*) as order_count,
  SUM(amount) as total_revenue,
  AVG(amount) as avg_order_value
FROM orders
WHERE order_date >= DATEADD('month', -1, CURRENT_DATE())
GROUP BY order_date
ORDER BY order_date;

Python Cells

# Python cell using Snowpark
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, avg, count, sum

# Create session
session = Session.builder.configs({"connection_name": "my_connection"}).create()

# Query data
df = session.table("orders").filter(col("order_date") >= "2024-01-01")

# Perform analysis
results = df.group_by("order_date").agg(
    count("*").alias("order_count"),
    sum("amount").alias("total_revenue"),
    avg("amount").alias("avg_order_value")
).order_by("order_date").collect()

# Display results
print(results)

Cell Types

SQL Cells

-- Query with parameters
SELECT *
FROM orders
WHERE order_date >= '{{start_date}}'
  AND order_date <= '{{end_date}}';

-- Use macros
{% macro date_filter(column_name, days_back) %}
  {{column_name}} >= DATEADD('day', -{{days_back}}, CURRENT_DATE())
{% endmacro %}

-- Apply macro
SELECT * FROM orders
WHERE {{ date_filter('order_date', 30) }};

Python Cells

# Python cell with visualization
import pandas as pd
import matplotlib.pyplot as plt

# Query data
query = """
SELECT
  order_date,
  COUNT(*) as order_count,
  SUM(amount) as total_revenue
FROM orders
WHERE order_date >= DATEADD('month', -3, CURRENT_DATE())
GROUP BY order_date
ORDER BY order_date
"""

# Execute query
results = session.sql(query).collect()

# Create visualization
df = pd.DataFrame(results)
plt.figure(figsize=(12, 6))
plt.plot(df['ORDER_DATE'], df['TOTAL_REVENUE'])
plt.title('Revenue Trend')
plt.xlabel('Date')
plt.ylabel('Revenue')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()

Markdown Cells

# Analysis Title

## Overview
This notebook analyzes customer behavior and revenue trends.

## Key Findings
- Revenue increased 15% month-over-month
- Top 10 customers contribute 40% of revenue
- Mobile orders grew 25% in Q1

## Methodology
1. Data extracted from production tables
2. Aggregated by time period
3. Visualized trends and patterns

Use Markdown cells to document your analysis, methodology, and findings. This makes notebooks self-documenting and easier to collaborate on with team members.

Visualization Capabilities

Built-in Charts

-- Create chart from query results
SELECT
  category,
  SUM(amount) as total_sales,
  COUNT(*) as order_count
FROM orders
JOIN products ON orders.product_id = products.product_id
GROUP BY category
ORDER BY total_sales DESC;

-- The notebook will auto-generate a bar chart

Custom Visualizations

# Create custom visualization
import plotly.express as px

# Query data
df = session.sql("""
  SELECT
    region,
    product_category,
    SUM(amount) as total_sales
  FROM orders
  JOIN products ON orders.product_id = products.product_id
  GROUP BY region, product_category
""").to_pandas()

# Create interactive chart
fig = px.bar(
  df,
  x='REGION',
  y='TOTAL_SALES',
  color='PRODUCT_CATEGORY',
  title='Sales by Region and Category',
  barmode='group'
)
fig.show()

Collaboration Features

Sharing Notebooks

-- Share notebook with team
-- Use the notebook UI to:
-- 1. Set sharing permissions
-- 2. Add collaborators
-- 3. Configure access levels

-- Create shared notebook
CREATE OR REPLACE NOTEBOOK shared_analysis
  COMMENT = 'Collaborative analysis for Q1 results'
  WITH SAMPLE_DATA = FALSE;

Version Control

# Check notebook version
import json

# Notebook metadata
notebook_info = {
  "name": "customer_analysis",
  "version": "1.0",
  "author": "data_team",
  "created_at": "2024-01-15",
  "last_modified": "2024-01-20"
}

# Save version
with open('notebook_versions.json', 'w') as f:
  json.dump(notebook_info, f, indent=2)

Advanced Features

Parameterized Notebooks

-- Define parameters at the top
{% set start_date = '2024-01-01' %}
{% set end_date = '2024-12-31' %}
{% set region = 'US' %}

-- Use parameters in queries
SELECT *
FROM orders
WHERE order_date BETWEEN '{{start_date}}' AND '{{end_date}}'
  AND region = '{{region}}';

Scheduled Execution

-- Create task to execute notebook logic
CREATE OR REPLACE TASK daily_analysis
  SCHEDULE = 'USING CRON 0 6 * * * America/New_York'
  WAREHOUSE = 'COMPUTE_WH'
AS
  CALL notebook_procedure();

-- Store notebook results
CREATE OR REPLACE TABLE notebook_results (
  result_id INTEGER PRIMARY KEY,
  notebook_name STRING,
  execution_date TIMESTAMP_NTZ,
  result_data VARIANT
);

Snowpark Integration

# Advanced Snowpark usage
from snowflake.snowpark import Session
from snowflake.snowpark.functions import col, when, lit
from snowflake.snowpark.types import StructType, StructField, StringType, IntegerType

# Create session
session = Session.builder.configs({"connection_name": "my_connection"}).create()

# Complex transformation
df = session.table("orders") \
  .with_column(
    "order_category",
    when(col("amount") > 1000, lit("HIGH"))
    .when(col("amount") > 500, lit("MEDIUM"))
    .otherwise(lit("LOW"))
  ) \
  .with_column(
    "days_since_order",
    datediff("day", col("order_date"), current_date())
  )

# Show results
df.show()

Snowpark enables complex data transformations using Python. Use it for advanced analytics, machine learning preprocessing, and data quality checks within notebooks.

Notebook Best Practices

PracticeImplementationBenefit
DocumentUse Markdown cellsSelf-documenting analysis
ParameterizeUse template variablesReusable notebooks
ModularizeBreak into logical sectionsMaintainable code
VisualizeUse charts and graphsBetter insights
CollaborateShare and commentTeam productivity
VersionTrack changesAudit trail
-- Create notebook template
CREATE OR REPLACE NOTEBOOK analysis_template
  COMMENT = 'Template for standard analysis';

-- Add standard sections
-- 1. Data exploration
-- 2. Quality checks
-- 3. Analysis
-- 4. Visualization
-- 5. Conclusions

Common Use Cases

Data Exploration

-- Quick data profile
SELECT
  COUNT(*) as total_rows,
  COUNT(DISTINCT customer_id) as unique_customers,
  MIN(order_date) as min_date,
  MAX(order_date) as max_date,
  AVG(amount) as avg_amount
FROM orders;

Ad-hoc Analysis

# Quick analysis
df = session.sql("""
  SELECT
    DATE_TRUNC('week', order_date) as week,
    COUNT(*) as orders,
    SUM(amount) as revenue
  FROM orders
  GROUP BY 1
  ORDER BY 1
""").to_pandas()

# Display trends
print(df.to_string())

Reporting

-- Create report dataset
CREATE OR REPLACE TABLE report_data AS
SELECT
  customer_id,
  customer_name,
  total_orders,
  total_spend,
  CASE
    WHEN total_spend > 10000 THEN 'VIP'
    WHEN total_spend > 5000 THEN 'Premium'
    ELSE 'Standard'
  END as customer_tier
FROM (
  SELECT
    c.customer_id,
    c.customer_name,
    COUNT(o.order_id) as total_orders,
    SUM(o.amount) as total_spend
  FROM customers c
  LEFT JOIN orders o ON c.customer_id = o.customer_id
  GROUP BY c.customer_id, c.customer_name
) customer_summary;

-- Export to presentation
SELECT * FROM report_data ORDER BY total_spend DESC;

Key Takeaways:

  • Snowflake Notebooks combine SQL, Python, and Markdown
  • Built-in visualizations enable interactive data exploration
  • Collaboration features support team-based analysis
  • Parameterized notebooks enable reusable templates
  • Snowpark integration enables advanced Python analytics
  • Best practices include documentation and modularization

Advertisement

Need Expert Snowflake Help?

Get personalized warehouse optimization, data modeling, or Snowflake platform consulting.

Advertisement