Functions, Lambda and Comprehensions
Functions are the building blocks of modular, reusable code. Comprehensions enable concise, Pythonic data transformations.
Function Basics
def greet(name: str, greeting: str = "Hello") -> str:
"""Return a greeting string."""
return f"{greeting}, {name}!"
greet("Alice") # "Hello, Alice!"
greet("Bob", "Hi") # "Hi, Bob!"
greet.__doc__ # "Return a greeting string."
greet.__annotations__ # {'name': <class 'str'>, 'greeting': <class 'str'>, 'return': <class 'str'>}
*args and **kwargs
def flexible(*args, **kwargs):
print(f"Positional: {args}")
print(f"Keyword: {kwargs}")
flexible(1, 2, 3, name="Alice", age=30)
# Positional: (1, 2, 3)
# Keyword: {'name': 'Alice', 'age': 30}
# Real data science use case
def create_model(model_type, *args, **kwargs):
if model_type == "rf":
from sklearn.ensemble import RandomForestClassifier
return RandomForestClassifier(*args, **kwargs)
elif model_type == "xgb":
from xgboost import XGBClassifier
return XGBClassifier(*args, **kwargs)
model = create_model("rf", n_estimators=100, max_depth=5, random_state=42)
Scope and Closures
# LEGB Rule: Local → Enclosing → Global → Built-in
x = "global"
def outer():
x = "enclosing"
def inner():
x = "local"
print(x) # "local"
inner()
print(x) # "enclosing"
# Closure: inner function captures enclosing scope
def make_multiplier(n):
def multiplier(x):
return x * n
return multiplier
double = make_multiplier(2)
triple = make_multiplier(3)
double(5) # 10
triple(5) # 15
# Nonlocal keyword
def counter():
count = 0
def increment():
nonlocal count
count += 1
return count
return increment
c = counter()
c() # 1
c() # 2
Higher-Order Functions
from functools import reduce
# map
nums = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, nums)) # [1, 4, 9, 16, 25]
# filter
evens = list(filter(lambda x: x % 2 == 0, nums)) # [2, 4]
# reduce
total = reduce(lambda a, b: a + b, nums) # 15
product = reduce(lambda a, b: a * b, nums) # 120
# sorted with key
students = [("Alice", 95), ("Bob", 88), ("Charlie", 92)]
by_grade = sorted(students, key=lambda s: s[1], reverse=True)
# [("Alice", 95), ("Charlie", 92), ("Bob", 88)]
Lambda Expressions
# Lambda: anonymous single-expression functions
square = lambda x: x ** 2
add = lambda a, b: a + b
# Useful in callbacks and data transformations
data = [{"name": "Alice", "score": 95}, {"name": "Bob", "score": 88}]
sorted_data = sorted(data, key=lambda d: d["score"])
# Immediately invoked
result = (lambda x, y: x + y)(3, 4) # 7
List Comprehensions
# Basic syntax: [expression for item in iterable if condition]
# Squares of even numbers
squares = [x**2 for x in range(10) if x % 2 == 0]
# [0, 4, 16, 36, 64]
# Nested loops
matrix = [[i*3 + j + 1 for j in range(3)] for i in range(3)]
# [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]
# Conditional expression (ternary)
labels = ["even" if x % 2 == 0 else "odd" for x in range(5)]
# ["even", "odd", "even", "odd", "even"]
Dictionary Comprehensions
# {key_expr: value_expr for item in iterable if condition}
# Invert dictionary
original = {"a": 1, "b": 2, "c": 3}
inverted = {v: k for k, v in original.items()}
# {1: "a", 2: "b", 3: "c"}
# Filter and transform
scores = {"Alice": 95, "Bob": 67, "Charlie": 88, "Diana": 42}
honors = {name: score for name, score in scores.items() if score >= 80}
# {"Alice": 95, "Charlie": 88}
# Character frequency
text = "hello world"
freq = {ch: text.count(ch) for ch in set(text) if ch != " "}
# {'h': 1, 'e': 1, 'l': 3, 'o': 2, 'w': 1, 'r': 1, 'd': 1}
Set Comprehensions
# {expr for item in iterable if condition}
words = ["hello", "world", "python", "hello", "world"]
unique_lengths = {len(w) for w in words}
# {5, 6}
# Unique characters
all_chars = {ch for word in words for ch in word}
# {'h', 'e', 'l', 'o', 'w', 'r', 'd', 'p', 'y', 't', 'n'}
Generator Expressions and Functions
# Generator expression: lazy evaluation, memory efficient
squares_gen = (x**2 for x in range(1000000))
next(squares_gen) # 0
next(squares_gen) # 1
# Generator function
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
fib = fibonacci()
[first 10 Fibonacci numbers]
fib_nums = [next(fib) for _ in range(10)]
# [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Generator for large datasets (memory efficient)
def read_large_file(file_path):
with open(file_path, "r") as f:
for line in f:
yield line.strip()
# Process line by line without loading entire file
for line in read_large_file("huge_dataset.csv"):
process(line)
Decorators
import time
from functools import wraps
def timer(func):
@wraps(func)
def wrapper(*args, **kwargs):
start = time.perf_counter()
result = func(*args, **kwargs)
elapsed = time.perf_counter() - start
print(f"{func.__name__} took {elapsed:.4f}s")
return result
return wrapper
@timer
def slow_function():
time.sleep(1)
return "done"
# Memoization decorator
def memoize(func):
cache = {}
@wraps(func)
def wrapper(*args):
if args not in cache:
cache[args] = func(*args)
return cache[args]
return wrapper
@memoize
def fibonacci(n):
if n < 2:
return n
return fibonacci(n - 1) + fibonacci(n - 2)
fibonacci(100) # Computed instantly
Comprehension vs Map/Filter
# Equivalent operations
nums = [1, 2, 3, 4, 5]
# List comprehension (preferred for readability)
result = [x**2 for x in nums if x % 2 == 0]
# map/filter (functional style)
result = list(map(lambda x: x**2, filter(lambda x: x % 2 == 0, nums)))
# Performance: comprehensions are generally faster due to
# reduced function call overhead
Practical Data Science Examples
# Data cleaning with comprehensions
raw_data = [" Alice ", "bob", " CHARLIE ", "diana "]
cleaned = [name.strip().title() for name in raw_data]
# ["Alice", "Bob", "Charlie", "Diana"]
# Feature engineering
records = [{"age": 25, "income": 50000}, {"age": 35, "income": 80000}]
features = [{**r, "income_per_age": r["income"] / r["age"]} for r in records]
# Aggregation
from collections import defaultdict
grouped = defaultdict(list)
[grouped[k].append(v) for k, v in [("A", 1), ("B", 2), ("A", 3)]]
# {"A": [1, 3], "B": [2]}
Summary
- Use lambda for short, throwaway functions
- Prefer comprehensions over map/filter for readability
- Use generators for memory-efficient processing of large data
- Apply decorators for cross-cutting concerns (timing, logging, caching)
- Master args/kwargs for flexible function signatures