πŸŽ‰ 75% of content is free forever β€” Unlock Premium from $10/mo β†’
CW
Search courses…
πŸ’Ό Servicesℹ️ Aboutβœ‰οΈ ContactView Pricing Plansfrom $10

Generators & Iterators in Python

Python InterviewIteration Patterns⭐ Premium

Advertisement

Generators & Iterators in Python

Difficulty: Medium | Companies: Google, Meta, Amazon, Netflix, Stripe

Iterator Protocol Deep Dive

Custom Iterator Implementation

class FibonacciIterator:
    """Custom iterator implementing iterator protocol."""
    
    def __init__(self, max_count: int):
        self.max_count = max_count
        self.current = 0
        self.a = 0
        self.b = 1
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current >= self.max_count:
            raise StopIteration
        
        self.current += 1
        if self.current == 1:
            return self.a
        elif self.current == 2:
            return self.b
        
        self.a, self.b = self.b, self.a + self.b
        return self.b

# Usage
fib = FibonacciIterator(10)
print(list(fib))  # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]

# Reset by creating new iterator
for num in FibonacciIterator(5):
    print(num, end=" ")  # 0 1 1 2 3

ℹ️

Iterators are stateful - once exhausted, you must create a new instance. Use iter() function with iterables for fresh iterations.

Iterator vs Iterable Distinction

class NumberRange:
    """Iterable - implements __iter__ returning new iterator."""
    
    def __init__(self, start: int, end: int, step: int = 1):
        self.start = start
        self.end = end
        self.step = step
    
    def __iter__(self):
        """Return new iterator instance each time."""
        return NumberRangeIterator(self)
    
    def __contains__(self, value):
        """Support 'in' operator."""
        if self.step > 0:
            return self.start <= value < self.end and (value - self.start) % self.step == 0
        else:
            return self.end < value <= self.start and (self.start - value) % (-self.step) == 0
    
    def __len__(self):
        """Support len() function."""
        return max(0, (self.end - self.start + self.step - 1) // self.step)

class NumberRangeIterator:
    """Iterator - implements __next__."""
    
    def __init__(self, iterable: NumberRange):
        self.current = iterable.start
        self.end = iterable.end
        self.step = iterable.step
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if (self.step > 0 and self.current >= self.end) or \
           (self.step < 0 and self.current <= self.end):
            raise StopIteration
        
        value = self.current
        self.current += self.step
        return value

# Usage
numbers = NumberRange(0, 10, 2)
print(list(numbers))  # [0, 2, 4, 6, 8]
print(4 in numbers)   # True
print(len(numbers))   # 5

Generator Functions

Basic Generator Patterns

def countdown(n: int):
    """Simple generator counting down."""
    print("Starting countdown")
    while n > 0:
        yield n
        n -= 1
    print("Liftoff!")

# Usage
for i in countdown(5):
    print(i, end=" ")
# Output: 5 4 3 2 1 Liftoff!

def fibonacci(limit: int):
    """Generator yielding Fibonacci numbers."""
    a, b = 0, 1
    count = 0
    while count < limit:
        yield a
        a, b = b, a + b
        count += 1

def read_large_file(file_path: str):
    """Memory-efficient file reading."""
    with open(file_path, 'r') as file:
        for line in file:
            yield line.strip()

# Generator expression (like list comprehension but lazy)
squares = (x**2 for x in range(1000000))
print(next(squares))  # 0
print(next(squares))  # 1

Generator Chaining (Pipeline)

def integers():
    """Generate all integers starting from 1."""
    n = 1
    while True:
        yield n
        n += 1

def square numbers):
    """Square each input number."""
    for n in numbers:
        yield n ** 2

def take(n: int, iterable):
    """Take first n items from iterable."""
    for i, item in enumerate(iterable):
        if i >= n:
            break
        yield item

# Pipeline composition
pipeline = take(10, square(integers()))
print(list(pipeline))  # [1, 4, 9, 16, 25, 36, 49, 64, 81, 100]

# Using itertools for pipeline composition
from itertools import islice, chain, starmap

def pipeline_with_itertools():
    """Alternative using itertools."""
    nums = integers()
    squares = map(lambda x: x**2, nums)
    first_10 = islice(squares, 10)
    return list(first_10)

⚠️

Generators can only be iterated once. If you need multiple passes, convert to a list or implement a reusable iterator.

Advanced Generator Patterns

Generator with Send (Coroutines)

def average_calculator():
    """Coroutine that calculates running average using send()."""
    total = 0.0
    count = 0
    average = None
    
    while True:
        value = yield average
        if value is None:
            break
        
        total += value
        count += 1
        average = total / count

# Usage
calc = average_calculator()
next(calc)  # Prime the coroutine
print(calc.send(10))   # 10.0
print(calc.send(20))   # 15.0
print(calc.send(30))   # 20.0

def data_pipeline():
    """Data processing pipeline using send."""
    results = []
    while True:
        data = yield results
        if data is None:
            break
        
        # Process data
        processed = [x * 2 for x in data if x > 0]
        results = processed

# Usage
pipeline = data_pipeline()
next(pipeline)
print(pipeline.send([1, -2, 3, -4, 5]))  # [2, 6, 10]

Generator with Return Value

from functools import reduce

def accumulate_and_sum():
    """Generator that accumulates values and returns final sum."""
    total = 0
    count = 0
    try:
        while True:
            value = yield
            total += value
            count += 1
    except GeneratorExit:
        return {"total": total, "count": count, "average": total / count if count else 0}

def collect_generator_results():
    """Collect results from generator with return value."""
    gen = accumulate_and_sum()
    next(gen)
    
    gen.send(10)
    gen.send(20)
    gen.send(30)
    
    try:
        gen.close()
    except StopIteration as e:
        result = e.value
        print(f"Total: {result['total']}, Count: {result['count']}")

collect_generator_results()  # Total: 60, Count: 3

Itertools Power Tools

Grouping and Accumulating

from itertools import (
    groupby, accumulate, chain, compress,
    islice, takewhile, dropwhile, starmap
)
from operator import mul, itemgetter

# Group consecutive elements
data = [("A", 1), ("A", 2), ("B", 3), ("B", 4), ("A", 5)]
for key, group in groupby(data, key=itemgetter(0)):
    items = list(group)
    print(f"{key}: {items}")

# Running totals
numbers = [1, 2, 3, 4, 5]
print(list(accumulate(numbers)))  # [1, 3, 6, 10, 15]
print(list(accumulate(numbers, mul)))  # [1, 2, 6, 24, 120]

# Chain multiple iterables
list1 = [1, 2, 3]
list2 = [4, 5, 6]
list3 = [7, 8, 9]
combined = list(chain(list1, list2, list3))
print(combined)  # [1, 2, 3, 4, 5, 6, 7, 8, 9]

# Select elements based on mask
data = ["a", "b", "c", "d", "e"]
mask = [True, False, True, False, True]
print(list(compress(data, mask)))  # ['a', 'c', 'e']

# Take while condition is true
numbers = [1, 2, 3, 4, 5, 1, 2]
print(list(takewhile(lambda x: x < 4, numbers)))  # [1, 2, 3]

# Drop while condition is true
print(list(dropwhile(lambda x: x < 4, numbers)))  # [4, 5, 1, 2]

Combinatorics and Permutations

from itertools import permutations, combinations, combinations_with_replacement, product

# All permutations
items = ['A', 'B', 'C']
print(list(permutations(items, 2)))
# [('A', 'B'), ('A', 'C'), ('B', 'A'), ('B', 'C'), ('C', 'A'), ('C', 'B')]

# All combinations
print(list(combinations(items, 2)))
# [('A', 'B'), ('A', 'C'), ('B', 'C')]

# Combinations with replacement
print(list(combinations_with_replacement(items, 2)))
# [('A', 'A'), ('A', 'B'), ('A', 'C'), ('B', 'B'), ('B', 'C'), ('C', 'C')]

# Cartesian product
colors = ['red', 'blue']
sizes = ['S', 'M', 'L']
print(list(product(colors, sizes)))
# [('red', 'S'), ('red', 'M'), ('red', 'L'), ('blue', 'S'), ('blue', 'M'), ('blue', 'L')]

def powerset(iterable):
    """Generate all subsets of iterable."""
    from itertools import chain
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(len(s) + 1))

print(list(powerset([1, 2, 3])))
# [(), (1,), (2,), (3,), (1, 2), (1, 3), (2, 3), (1, 2, 3)]

Memory-Efficient Patterns

Lazy Data Processing Pipeline

import csv
from typing import Iterator, Dict, Any
from pathlib import Path

def read_csv_lazy(filepath: Path) -> Iterator[Dict[str, str]]:
    """Read CSV file lazily - one row at a time."""
    with open(filepath, 'r') as f:
        reader = csv.DictReader(f)
        for row in reader:
            yield row

def filter_rows(rows: Iterator[Dict], condition: callable) -> Iterator[Dict]:
    """Filter rows based on condition."""
    for row in rows:
        if condition(row):
            yield row

def transform_rows(rows: Iterator[Dict], transform: callable) -> Iterator[Dict]:
    """Transform each row."""
    for row in rows:
        yield transform(row)

def batch_rows(rows: Iterator[Dict], batch_size: int) -> Iterator[list]:
    """Group rows into batches."""
    batch = []
    for row in rows:
        batch.append(row)
        if len(batch) >= batch_size:
            yield batch
            batch = []
    if batch:
        yield batch

# Complete pipeline - processes gigabyte files with constant memory
def process_large_csv(filepath: Path):
    """Process large CSV with memory-efficient pipeline."""
    rows = read_csv_lazy(filepath)
    filtered = filter_rows(rows, lambda r: int(r['age']) >= 18)
    transformed = transform_rows(filtered, lambda r: {
        'name': r['name'].upper(),
        'email': r['email'].lower(),
        'age': int(r['age'])
    })
    
    for batch in batch_rows(transformed, 1000):
        # Process batch (e.g., insert into database)
        process_batch(batch)

def process_batch(batch: list):
    """Process a batch of rows."""
    print(f"Processing batch of {len(batch)} rows")

Circular Buffer Generator

from typing import Iterator, TypeVar, Generic

T = TypeVar('T')

class CircularBuffer(Generic[T]):
    """Fixed-size circular buffer with generator-based iteration."""
    
    def __init__(self, capacity: int):
        self.capacity = capacity
        self.buffer: list[T] = []
        self.head = 0
        self.size = 0
    
    def add(self, item: T) -> None:
        """Add item to buffer, overwriting oldest if full."""
        if self.size < self.capacity:
            self.buffer.append(item)
            self.size += 1
        else:
            self.buffer[self.head] = item
            self.head = (self.head + 1) % self.capacity
    
    def iterate(self) -> Iterator[T]:
        """Generator yielding items in order from oldest to newest."""
        if self.size < self.capacity:
            for i in range(self.size):
                yield self.buffer[i]
        else:
            for i in range(self.capacity):
                idx = (self.head + i) % self.capacity
                yield self.buffer[idx]
    
    def recent(self, n: int) -> Iterator[T]:
        """Generator yielding n most recent items."""
        start = max(0, self.size - n)
        for i in range(start, self.size):
            idx = (self.head + i) % self.capacity if self.size == self.capacity else i
            yield self.buffer[idx]

# Usage
buffer = CircularBuffer[int](5)
for i in range(10):
    buffer.add(i)

print(list(buffer.iterate()))  # [5, 6, 7, 8, 9]
print(list(buffer.recent(3)))  # [7, 8, 9]

ℹ️

Generators are ideal for representing infinite sequences, processing large files, and building data pipelines where you don't need all data in memory at once.

Follow-Up Questions

  1. Explain the difference between a generator function and a generator expression.

  2. How does Python's garbage collector handle generators that are not fully consumed?

  3. When would you use a list comprehension over a generator expression?

  4. How can you make a generator that can be restarted after exhaustion?

  5. Explain the concept of generator delegation using yield from.

Advertisement