Static vs Interactive: The Decision Framework
The choice between static and interactive visualization is a cognitive design decision that affects how viewers process information.
Plotly Express: The Grammar of Interactive Graphics
Scatter Plots
import plotly.express as px
import plotly.graph_objects as go
import plotly.subplots as sp
import pandas as pd
import numpy as np
df = px.data.gapminder()
fig = px.scatter(
df.query("year == 2007"),
x="gdpPercap", y="lifeExp",
size="pop", color="continent",
hover_name="country",
log_x=True, size_max=60,
title="GDP per Capita vs Life Expectancy (2007)",
labels={
"gdpPercap": "GDP per Capita (USD, log scale)",
"lifeExp": "Life Expectancy (years)"
}
)
fig.update_layout(template="plotly_white", font=dict(family="Arial", size=12))
fig.show()
Line Charts
df_canada = px.data.gapminder().query("country == 'Canada'")
fig = px.line(
df_canada, x="year", y="lifeExp",
title="Life Expectancy in Canada Over Time",
markers=True,
labels={"year": "Year", "lifeExp": "Life Expectancy (years)"},
template="plotly_white"
)
fig.update_traces(line=dict(color="#2E86AB", width=3), marker=dict(size=8))
fig.show()
Bar Charts
df_tips = px.data.tips()
fig = px.bar(
df_tips, x="day", y="total_bill", color="sex",
barmode="group",
title="Average Total Bill by Day and Gender",
text_auto=".2f",
template="plotly_white"
)
fig.show()
Histograms and Distribution Plots
fig = px.histogram(
df_tips, x="total_bill", color="time",
nbins=30, opacity=0.7,
histnorm="probability density",
marginal="rug",
title="Distribution of Total Bills (Lunch vs Dinner)",
template="plotly_white"
)
fig.show()
Kernel Density Estimator
Here,
- =kernel function (typically Gaussian)
- =bandwidth (smoothing parameter)
- =number of data points
Box and Violin Plots
fig = px.box(
df_tips, x="day", y="total_bill", color="day",
points="all", notched=True,
title="Total Bill Distribution by Day",
template="plotly_white"
)
fig.show()
fig = px.violin(
df_tips, x="day", y="total_bill", color="day",
box=True, points="all",
title="Violin Plot of Total Bills by Day",
template="plotly_white"
)
fig.show()
Silverman's Rule of Thumb for Bandwidth
Here,
- =sample standard deviation
- =interquartile range
- =sample size
Subplots: Multi-Panel Layouts
from plotly.subplots import make_subplots
fig = make_subplots(
rows=2, cols=2,
subplot_titles=("Scatter", "Bar", "Histogram", "Box"),
horizontal_spacing=0.12,
vertical_spacing=0.15
)
fig.add_trace(
go.Scatter(x=df_tips["total_bill"], y=df_tips["tip"],
mode="markers", marker=dict(size=6, opacity=0.6), name="Scatter"),
row=1, col=1
)
day_means = df_tips.groupby("day")["total_bill"].mean().reset_index()
fig.add_trace(
go.Bar(x=day_means["day"], y=day_means["total_bill"],
name="Bar", marker_color="#2E86AB"),
row=1, col=2
)
fig.add_trace(
go.Histogram(x=df_tips["total_bill"], nbinsx=20,
name="Histogram", marker_color="#A23B72"),
row=2, col=1
)
for day in df_tips["day"].unique():
day_data = df_tips[df_tips["day"] == day]
fig.add_trace(go.Box(y=day_data["total_bill"], name=day), row=2, col=2)
fig.update_layout(height=700, width=900, title_text="Multi-Panel Dashboard",
template="plotly_white", showlegend=False)
fig.show()
Secondary Y-Axes
months = pd.date_range("2024-01-01", periods=12, freq="MS")
revenue = np.random.uniform(50000, 120000, 12)
orders = np.random.poisson(200, 12)
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(
go.Scatter(x=months, y=revenue, name="Revenue ($)",
line=dict(color="#2E86AB", width=3), mode="lines+markers"),
secondary_y=False
)
fig.add_trace(
go.Bar(x=months, y=orders, name="Orders",
marker_color="rgba(162, 59, 114, 0.5)"),
secondary_y=True
)
fig.update_yaxes(title_text="Revenue ($)", secondary_y=False)
fig.update_yaxes(title_text="Number of Orders", secondary_y=True)
fig.update_layout(title="Revenue and Orders Over Time", template="plotly_white")
fig.show()
Statistical Visualizations
Correlation Heatmaps
numeric_cols = ["total_bill", "tip", "size"]
corr_matrix = df_tips[numeric_cols].corr()
fig = px.imshow(
corr_matrix, text_auto=".3f",
color_continuous_scale="RdBu_r",
zmin=-1, zmax=1,
title="Correlation Matrix Heatmap"
)
fig.show()
Pearson Correlation Coefficient
Here,
- =sample means
- =number of paired observations
- =correlation coefficient (−1 to +1)
Geographical Maps
# Choropleth Map
df_2007 = px.data.gapminder().query("year == 2007")
fig = px.choropleth(
df_2007, locations="iso_alpha", color="lifeExp",
hover_name="country",
color_continuous_scale=px.colors.sequential.Viridis,
title="World Life Expectancy (2007)"
)
fig.update_layout(geo=dict(showframe=False, showcoastlines=True))
fig.show()
# Scatter Mapbox
np.random.seed(42)
df_map = pd.DataFrame({
"lat": 40.7128 + np.random.normal(0, 0.05, 200),
"lon": -74.0060 + np.random.normal(0, 0.05, 200),
"value": np.random.exponential(50, 200)
})
fig = px.scatter_mapbox(
df_map, lat="lat", lon="lon", color="value", size="value",
color_continuous_scale="Viridis", size_max=15, zoom=11,
mapbox_style="carto-positron"
)
fig.show()
Animated Charts
df = px.data.gapminder()
fig = px.scatter(
df, x="gdpPercap", y="lifeExp", size="pop",
color="continent", hover_name="country",
log_x=True, size_max=55,
range_x=[100, 100000], range_y=[25, 90],
animation_frame="year", animation_group="country",
title="Global Development Over Time (1952-2007)"
)
fig.show()
Dashboard-Ready: Updatemenus and Sliders
# Dropdown filter
fig = go.Figure()
continents = df["continent"].unique()
for continent in continents:
cdf = df[df["continent"] == continent]
fig.add_trace(go.Scatter(
x=cdf["gdpPercap"], y=cdf["lifeExp"],
mode="markers", marker=dict(size=cdf["pop"] / 2e7, opacity=0.6),
name=continent,
visible=True if continent == "Asia" else "legendonly"
))
fig.update_layout(
updatemenus=[dict(
buttons=[
dict(label="All Continents", method="update",
args=[{"visible": [True] * len(continents)}])
] + [
dict(label=c, method="update",
args=[{"visible": [i == idx for i in range(len(continents))]}])
for idx, c in enumerate(continents)
],
direction="down", showactive=True, x=0.17, y=1.15
)],
template="plotly_white", height=550
)
fig.show()
# Range slider
fig = go.Figure()
fig.add_trace(go.Scatter(
x=df_canada["year"], y=df_canada["lifeExp"],
mode="lines+markers", line=dict(color="#2E86AB", width=3),
fill="tozeroy", fillcolor="rgba(46,134,171,0.2)"
))
fig.update_layout(
xaxis=dict(
rangeselector=dict(buttons=[
dict(count=10, label="10Y", step="year", stepmode="backward"),
dict(count=20, label="20Y", step="year", stepmode="backward"),
dict(step="all", label="All")
]),
rangeslider=dict(visible=True), type="date"
),
title="Life Expectancy with Range Slider", template="plotly_white"
)
fig.show()
Customizing Themes and Export
# Built-in templates
import plotly.io as pio
pio.templates.default = "plotly_white"
# Custom template
custom_template = go.layout.Template(
layout=go.Layout(
title=dict(font=dict(family="Helvetica", size=18, color="#1a1a2e"),
x=0.5, xanchor="center"),
font=dict(family="Helvetica", size=12, color="#1a1a2e"),
plot_bgcolor="white", paper_bgcolor="white",
colorway=["#2E86AB", "#A23B72", "#F18F01", "#C73E1D", "#3B1F2B"]
)
)
# Export
fig.write_image("figure.png", scale=3, width=800, height=500) # 300 DPI
fig.write_image("figure.svg", width=800, height=500) # Vector
fig.write_html("interactive.html", include_plotlyjs=True) # Standalone
File Size Optimization
A standalone HTML with include_plotlyjs=True bundles ~3.5 MB Plotly.js. For multiple charts, use include_plotlyjs='cdn' to load once from CDN, reducing per-chart overhead to ~5 KB.
Library Comparison
| Criterion | Matplotlib | Seaborn | Plotly |
|---|---|---|---|
| Best for | Fine-grained control | Statistical plots | Interactive dashboards |
| Interactivity | None | None | Native |
| Statistical models | Manual | Built-in | Limited |
| 3D plotting | Axes3D | Limited | scatter_3d |
| Animation | FuncAnimation | Limited | animation_frame |
| Export quality | Excellent | Excellent | Good (kaleido) |
| Browser embedding | No | No | Yes |
Key Takeaways
Summary: Advanced Visualization
- Interactive visualization is a cognitive design choice — use Plotly when stakeholders need to explore data; use matplotlib when precision and reproducibility matter
- Plotly Express provides the highest-level API — a single function call generates a fully interactive figure with hover, zoom, and legends
make_subplotsis the foundation for multi-panel dashboards — combinespecswithsecondary_yfor complex layouts- Statistical visualizations (marginal distributions, correlation heatmaps, pair plots) are built into Plotly Express
- Animations via
animation_framecreate self-contained temporal visualizations without external video tools - Export to HTML for web embedding; export to PNG/SVG via
kaleidofor print and publications at 300+ DPI - The matplotlib vs plotly decision depends on your medium (print vs web), audience, and need for interactivity
Practice Exercises
- Create an interactive scatter plot using
px.data.iris()with marginal distributions and a trendline - Build a 2x3 subplot dashboard using the
px.data.gapminder()dataset - Create the animated Gapminder bubble chart with custom colorscale and range slider
- Create a masked correlation heatmap (upper triangle only) with annotations
- Create a choropleth map colored by GDP per capita with scatter_mapbox overlay