CW

Functions, Lambda and Comprehensions

Module 1: Introduction & Python BasicsFree Lesson

Advertisement

Functions, Lambda and Comprehensions

Functions are the building blocks of modular, reusable code. Comprehensions enable concise, Pythonic data transformations.

Function Basics

def greet(name: str, greeting: str = "Hello") -> str:
    """Return a greeting string."""
    return f"{greeting}, {name}!"

greet("Alice")              # "Hello, Alice!"
greet("Bob", "Hi")          # "Hi, Bob!"
greet.__doc__               # "Return a greeting string."
greet.__annotations__       # {'name': <class 'str'>, 'greeting': <class 'str'>, 'return': <class 'str'>}

*args and **kwargs

def flexible(*args, **kwargs):
    print(f"Positional: {args}")
    print(f"Keyword: {kwargs}")

flexible(1, 2, 3, name="Alice", age=30)
# Positional: (1, 2, 3)
# Keyword: {'name': 'Alice', 'age': 30}

# Real data science use case
def create_model(model_type, *args, **kwargs):
    if model_type == "rf":
        from sklearn.ensemble import RandomForestClassifier
        return RandomForestClassifier(*args, **kwargs)
    elif model_type == "xgb":
        from xgboost import XGBClassifier
        return XGBClassifier(*args, **kwargs)

model = create_model("rf", n_estimators=100, max_depth=5, random_state=42)

Scope and Closures

# LEGB Rule: Local → Enclosing → Global → Built-in
x = "global"

def outer():
    x = "enclosing"

    def inner():
        x = "local"
        print(x)  # "local"

    inner()
    print(x)  # "enclosing"

# Closure: inner function captures enclosing scope
def make_multiplier(n):
    def multiplier(x):
        return x * n
    return multiplier

double = make_multiplier(2)
triple = make_multiplier(3)
double(5)   # 10
triple(5)   # 15

# Nonlocal keyword
def counter():
    count = 0
    def increment():
        nonlocal count
        count += 1
        return count
    return increment

c = counter()
c()  # 1
c()  # 2

Higher-Order Functions

map(func, iter)Applies functionto each element[1, 2, 3] ↓ map(sq)[1, 4, 9]filter(func, iter)Keeps elementswhere func is True[1, 2, 3, 4, 5] ↓ filter(even)[2, 4]reduce(func, iter)Reduces to singlevalue via accumulation[1, 2, 3, 4] ↓ reduce(add)10sorted(iter, key)Sorts using customkey function[3, 1, 4, 1, 5] ↓ sorted(reverse)[5, 4, 3, 1, 1]
from functools import reduce

# map
nums = [1, 2, 3, 4, 5]
squared = list(map(lambda x: x**2, nums))  # [1, 4, 9, 16, 25]

# filter
evens = list(filter(lambda x: x % 2 == 0, nums))  # [2, 4]

# reduce
total = reduce(lambda a, b: a + b, nums)  # 15
product = reduce(lambda a, b: a * b, nums)  # 120

# sorted with key
students = [("Alice", 95), ("Bob", 88), ("Charlie", 92)]
by_grade = sorted(students, key=lambda s: s[1], reverse=True)
# [("Alice", 95), ("Charlie", 92), ("Bob", 88)]

Lambda Expressions

# Lambda: anonymous single-expression functions
square = lambda x: x ** 2
add = lambda a, b: a + b

# Useful in callbacks and data transformations
data = [{"name": "Alice", "score": 95}, {"name": "Bob", "score": 88}]
sorted_data = sorted(data, key=lambda d: d["score"])

# Immediately invoked
result = (lambda x, y: x + y)(3, 4)  # 7

List Comprehensions

# Basic syntax: [expression for item in iterable if condition]

# Squares of even numbers
squares = [x**2 for x in range(10) if x % 2 == 0]
# [0, 4, 16, 36, 64]

# Nested loops
matrix = [[i*3 + j + 1 for j in range(3)] for i in range(3)]
# [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
flat = [x for row in matrix for x in row]
# [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Conditional expression (ternary)
labels = ["even" if x % 2 == 0 else "odd" for x in range(5)]
# ["even", "odd", "even", "odd", "even"]

Dictionary Comprehensions

# {key_expr: value_expr for item in iterable if condition}

# Invert dictionary
original = {"a": 1, "b": 2, "c": 3}
inverted = {v: k for k, v in original.items()}
# {1: "a", 2: "b", 3: "c"}

# Filter and transform
scores = {"Alice": 95, "Bob": 67, "Charlie": 88, "Diana": 42}
honors = {name: score for name, score in scores.items() if score >= 80}
# {"Alice": 95, "Charlie": 88}

# Character frequency
text = "hello world"
freq = {ch: text.count(ch) for ch in set(text) if ch != " "}
# {'h': 1, 'e': 1, 'l': 3, 'o': 2, 'w': 1, 'r': 1, 'd': 1}

Set Comprehensions

# {expr for item in iterable if condition}

words = ["hello", "world", "python", "hello", "world"]
unique_lengths = {len(w) for w in words}
# {5, 6}

# Unique characters
all_chars = {ch for word in words for ch in word}
# {'h', 'e', 'l', 'o', 'w', 'r', 'd', 'p', 'y', 't', 'n'}

Generator Expressions and Functions

# Generator expression: lazy evaluation, memory efficient
squares_gen = (x**2 for x in range(1000000))
next(squares_gen)  # 0
next(squares_gen)  # 1

# Generator function
def fibonacci():
    a, b = 0, 1
    while True:
        yield a
        a, b = b, a + b

fib = fibonacci()
[first 10 Fibonacci numbers]
fib_nums = [next(fib) for _ in range(10)]
# [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]

# Generator for large datasets (memory efficient)
def read_large_file(file_path):
    with open(file_path, "r") as f:
        for line in f:
            yield line.strip()

# Process line by line without loading entire file
for line in read_large_file("huge_dataset.csv"):
    process(line)

Decorators

import time
from functools import wraps

def timer(func):
    @wraps(func)
    def wrapper(*args, **kwargs):
        start = time.perf_counter()
        result = func(*args, **kwargs)
        elapsed = time.perf_counter() - start
        print(f"{func.__name__} took {elapsed:.4f}s")
        return result
    return wrapper

@timer
def slow_function():
    time.sleep(1)
    return "done"

# Memoization decorator
def memoize(func):
    cache = {}
    @wraps(func)
    def wrapper(*args):
        if args not in cache:
            cache[args] = func(*args)
        return cache[args]
    return wrapper

@memoize
def fibonacci(n):
    if n < 2:
        return n
    return fibonacci(n - 1) + fibonacci(n - 2)

fibonacci(100)  # Computed instantly

Comprehension vs Map/Filter

# Equivalent operations
nums = [1, 2, 3, 4, 5]

# List comprehension (preferred for readability)
result = [x**2 for x in nums if x % 2 == 0]

# map/filter (functional style)
result = list(map(lambda x: x**2, filter(lambda x: x % 2 == 0, nums)))

# Performance: comprehensions are generally faster due to
# reduced function call overhead

Practical Data Science Examples

# Data cleaning with comprehensions
raw_data = ["  Alice ", "bob", " CHARLIE ", "diana  "]
cleaned = [name.strip().title() for name in raw_data]
# ["Alice", "Bob", "Charlie", "Diana"]

# Feature engineering
records = [{"age": 25, "income": 50000}, {"age": 35, "income": 80000}]
features = [{**r, "income_per_age": r["income"] / r["age"]} for r in records]

# Aggregation
from collections import defaultdict
grouped = defaultdict(list)
[grouped[k].append(v) for k, v in [("A", 1), ("B", 2), ("A", 3)]]
# {"A": [1, 3], "B": [2]}

Summary

  • Use lambda for short, throwaway functions
  • Prefer comprehensions over map/filter for readability
  • Use generators for memory-efficient processing of large data
  • Apply decorators for cross-cutting concerns (timing, logging, caching)
  • Master args/kwargs for flexible function signatures

Advertisement

Need Expert Data Science Help?

Get personalized tutoring, project support, or professional consulting.

Advertisement