Generators & Iterators in Python
Difficulty: Medium | Companies: Google, Meta, Amazon, Netflix, Stripe
Iterator Protocol Deep Dive
Custom Iterator Implementation
class FibonacciIterator:
"""Custom iterator implementing iterator protocol."""
def __init__(self, max_count: int):
self.max_count = max_count
self.current = 0
self.a = 0
self.b = 1
def __iter__(self):
return self
def __next__(self):
if self.current >= self.max_count:
raise StopIteration
self.current += 1
if self.current == 1:
return self.a
elif self.current == 2:
return self.b
self.a, self.b = self.b, self.a + self.b
return self.b
# Usage
fib = FibonacciIterator(10)
print(list(fib)) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Reset by creating new iterator
for num in FibonacciIterator(5):
print(num, end=" ") # 0 1 1 2 3
βΉοΈ
Iterators are stateful - once exhausted, you must create a new instance. Use iter() function with iterables for fresh iterations.
Iterator vs Iterable Distinction
class NumberRange:
"""Iterable - implements __iter__ returning new iterator."""
def __init__(self, start: int, end: int, step: int = 1):
self.start = start
self.end = end
self.step = step
def __iter__(self):
"""Return new iterator instance each time."""
return NumberRangeIterator(self)
def __contains__(self, value):
"""Support 'in' operator."""
if self.step > 0:
return self.start <= value < self.end and (value - self.start) % self.step == 0
else:
return self.end < value <= self.start and (self.start - value) % (-self.step) == 0
def __len__(self):
"""Support len() function."""
return max(0, (self.end - self.start + self.step - 1) // self.step)
class NumberRangeIterator:
"""Iterator - implements __next__."""
def __init__(self, iterable: NumberRange):
self.current = iterable.start
self.end = iterable.end
self.step = iterable.step
def __iter__(self):
return self
def __next__(self):
if (self.step > 0 and self.current >= self.end) or \
(self.step < 0 and self.current <= self.end):
raise StopIteration
value = self.current
self.current += self.step
return value
# Usage
numbers = NumberRange(0, 10, 2)
print(list(numbers)) # [0, 2, 4, 6, 8]
print(4 in numbers) # True
print(len(numbers)) # 5
Generator Functions
Basic Generator Patterns
def countdown(n: int):
"""Simple generator counting down."""
print("Starting countdown")
while n > 0:
yield n
n -= 1
print("Liftoff!")
# Usage
for i in countdown(5):
print(i, end=" ")
# Output: 5 4 3 2 1 Liftoff!
def fibonacci(limit: int):
"""Generator yielding Fibonacci numbers."""
a, b = 0, 1
count = 0
while count < limit:
yield a
a, b = b, a + b
count += 1
def read_large_file(file_path: str):
"""Memory-efficient file reading."""
with open(file_path, 'r') as file:
for line in file:
yield line.strip()
# Generator expression (like list comprehension but lazy)
squares = (x**2 for x in range(1000000))
print(next(squares)) # 0
print(next(squares)) # 1
Generator Chaining (Pipeline)
def integers():
"""Generate all integers starting from 1."""
n = 1
while True:
yield n
n += 1
def square numbers):
"""Square each input number."""
for n in numbers:
yield n ** 2
def take(n: int, iterable):
"""Take first n items from iterable."""
for i, item in enumerate(iterable):
if i >= n:
break
yield item
# Pipeline composition
pipeline = take(10, square(integers()))
print(list(pipeline)) # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]
# Using itertools for pipeline composition
from itertools import islice, chain, starmap
def pipeline_with_itertools():
"""Alternative using itertools."""
nums = integers()
squares = map(lambda x: x**2, nums)
first_10 = islice(squares, 10)
return list(first_10)
β οΈ
Generators can only be iterated once. If you need multiple passes, convert to a list or implement a reusable iterator.
Advanced Generator Patterns
Generator with Send (Coroutines)
def average_calculator():
"""Coroutine that calculates running average using send()."""
total = 0.0
count = 0
average = None
while True:
value = yield average
if value is None:
break
total += value
count += 1
average = total / count
# Usage
calc = average_calculator()
next(calc) # Prime the coroutine
print(calc.send(10)) # 10.0
print(calc.send(20)) # 15.0
print(calc.send(30)) # 20.0
def data_pipeline():
"""Data processing pipeline using send."""
results = []
while True:
data = yield results
if data is None:
break
# Process data
processed = [x * 2 for x in data if x > 0]
results = processed
# Usage
pipeline = data_pipeline()
next(pipeline)
print(pipeline.send([1, -2, 3, -4, 5])) # [2, 6, 10]
Generator with Return Value
from functools import reduce
def accumulate_and_sum():
"""Generator that accumulates values and returns final sum."""
total = 0
count = 0
try:
while True:
value = yield
total += value
count += 1
except GeneratorExit:
return {"total": total, "count": count, "average": total / count if count else 0}
def collect_generator_results():
"""Collect results from generator with return value."""
gen = accumulate_and_sum()
next(gen)
gen.send(10)
gen.send(20)
gen.send(30)
try:
gen.close()
except StopIteration as e:
result = e.value
print(f"Total: {result['total']}, Count: {result['count']}")
collect_generator_results() # Total: 60, Count: 3
Itertools Power Tools
Grouping and Accumulating
from itertools import (
groupby, accumulate, chain, compress,
islice, takewhile, dropwhile, starmap
)
from operator import mul, itemgetter
# Group consecutive elements
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
for key, group in groupby(data, key=itemgetter(0)):
items = list(group)
print(f"{key}: {items}")
# Running totals
numbers = [1, 2, 3, 4, 5]
print(list(accumulate(numbers))) # [1, 3, 6, 10, 15]
print(list(accumulate(numbers, mul))) # [1, 2, 6, 24, 120]
# Chain multiple iterables
list1 = [1, 2, 3]
list2 = [4, 5, 6]
list3 = [7, 8, 9]
combined = list(chain(list1, list2, list3))
print(combined) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# Select elements based on mask
data = ["a", "b", "c", "d", "e"]
mask = [True, False, True, False, True]
print(list(compress(data, mask))) # ['a', 'c', 'e']
# Take while condition is true
numbers = [1, 2, 3, 4, 5, 1, 2]
print(list(takewhile(lambda x: x < 4, numbers))) # [1, 2, 3]
# Drop while condition is true
print(list(dropwhile(lambda x: x < 4, numbers))) # [4, 5, 1, 2]
Combinatorics and Permutations
from itertools import permutations, combinations, combinations_with_replacement, product
# All permutations
items = ['A', 'B', 'C']
print(list(permutations(items, 2)))
# [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]
# All combinations
print(list(combinations(items, 2)))
# [('A', 'B'), ('A', 'C'), ('B', 'C')]
# Combinations with replacement
print(list(combinations_with_replacement(items, 2)))
# [('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]
# Cartesian product
colors = ['red', 'blue']
sizes = ['S', 'M', 'L']
print(list(product(colors, sizes)))
# [('red', 'S'), ('red', 'M'), ('red', 'L'), ('blue', 'S'), ('blue', 'M'), ('blue', 'L')]
def powerset(iterable):
"""Generate all subsets of iterable."""
from itertools import chain
s = list(iterable)
return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))
print(list(powerset([1, 2, 3])))
# [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]
Memory-Efficient Patterns
Lazy Data Processing Pipeline
import csv
from typing import Iterator, Dict, Any
from pathlib import Path
def read_csv_lazy(filepath: Path) -> Iterator[Dict[str, str]]:
"""Read CSV file lazily - one row at a time."""
with open(filepath, 'r') as f:
reader = csv.DictReader(f)
for row in reader:
yield row
def filter_rows(rows: Iterator[Dict], condition: callable) -> Iterator[Dict]:
"""Filter rows based on condition."""
for row in rows:
if condition(row):
yield row
def transform_rows(rows: Iterator[Dict], transform: callable) -> Iterator[Dict]:
"""Transform each row."""
for row in rows:
yield transform(row)
def batch_rows(rows: Iterator[Dict], batch_size: int) -> Iterator[list]:
"""Group rows into batches."""
batch = []
for row in rows:
batch.append(row)
if len(batch) >= batch_size:
yield batch
batch = []
if batch:
yield batch
# Complete pipeline - processes gigabyte files with constant memory
def process_large_csv(filepath: Path):
"""Process large CSV with memory-efficient pipeline."""
rows = read_csv_lazy(filepath)
filtered = filter_rows(rows, lambda r: int(r['age']) >= 18)
transformed = transform_rows(filtered, lambda r: {
'name': r['name'].upper(),
'email': r['email'].lower(),
'age': int(r['age'])
})
for batch in batch_rows(transformed, 1000):
# Process batch (e.g., insert into database)
process_batch(batch)
def process_batch(batch: list):
"""Process a batch of rows."""
print(f"Processing batch of {len(batch)} rows")
Circular Buffer Generator
from typing import Iterator, TypeVar, Generic
T = TypeVar('T')
class CircularBuffer(Generic[T]):
"""Fixed-size circular buffer with generator-based iteration."""
def __init__(self, capacity: int):
self.capacity = capacity
self.buffer: list[T] = []
self.head = 0
self.size = 0
def add(self, item: T) -> None:
"""Add item to buffer, overwriting oldest if full."""
if self.size < self.capacity:
self.buffer.append(item)
self.size += 1
else:
self.buffer[self.head] = item
self.head = (self.head + 1) % self.capacity
def iterate(self) -> Iterator[T]:
"""Generator yielding items in order from oldest to newest."""
if self.size < self.capacity:
for i in range(self.size):
yield self.buffer[i]
else:
for i in range(self.capacity):
idx = (self.head + i) % self.capacity
yield self.buffer[idx]
def recent(self, n: int) -> Iterator[T]:
"""Generator yielding n most recent items."""
start = max(0, self.size - n)
for i in range(start, self.size):
idx = (self.head + i) % self.capacity if self.size == self.capacity else i
yield self.buffer[idx]
# Usage
buffer = CircularBuffer[int](5)
for i in range(10):
buffer.add(i)
print(list(buffer.iterate())) # [5, 6, 7, 8, 9]
print(list(buffer.recent(3))) # [7, 8, 9]
βΉοΈ
Generators are ideal for representing infinite sequences, processing large files, and building data pipelines where you don't need all data in memory at once.
Follow-Up Questions
-
Explain the difference between a generator function and a generator expression.
-
How does Python's garbage collector handle generators that are not fully consumed?
-
When would you use a list comprehension over a generator expression?
-
How can you make a generator that can be restarted after exhaustion?
-
Explain the concept of generator delegation using
yield from.