Python Iterators — The Iterator Protocol Explained
Iteration is fundamental to Python. Every for loop uses iterators under the hood. Understanding the iterator protocol gives you power over custom iteration.
Learning Objectives
- Understand the iterator protocol (
__iter__ and __next__)
- Create custom iterators with classes
- Use the
iter() and next() built-in functions
- Implement infinite sequences safely
- Understand the difference between iterables and iterators
- Master memory-efficient iteration patterns
- Use itertools for advanced iteration patterns
The Iterator Protocol
# Every iterable has __iter__ that returns an iterator
# Every iterator has __next__ that returns the next value
# Built-in iterables
nums = [1, 2, 3]
it = iter(nums) # Calls nums.__iter__()
print(next(it)) # 1
print(next(it)) # 2
print(next(it)) # 3
# next(it) # Raises StopIteration
# Strings are iterable
s = "Hello"
it = iter(s)
print(next(it)) # 'H'
print(next(it)) # 'e'
# Dictionaries are iterable (iterate over keys)
d = {"a": 1, "b": 2}
for key in d:
print(key, d[key])
# Tuples are iterable
t = (10, 20, 30)
it = iter(t)
print(next(it)) # 10
# Sets are iterable
s = {10, 20, 30}
it = iter(s)
print(next(it)) # Some element (order not guaranteed)
# Files are iterable (line by line)
# for line in open('file.txt'):
# print(line)
Iterable vs Iterator — Key Distinction
# Iterable: has __iter__() method, returns an iterator
# - Can be iterated multiple times
# - Examples: list, tuple, dict, set, str, range
# Iterator: has __next__() method, returns next value
# - Can only be iterated ONCE
# - Also has __iter__() (returns self)
# A list is an iterable (can create multiple iterators)
nums = [1, 2, 3]
it1 = iter(nums)
it2 = iter(nums) # Independent iterator
print(next(it1)) # 1
print(next(it1)) # 2
print(next(it2)) # 1 — it2 starts from beginning
# A set is an iterable
unique = {10, 20, 30}
it = iter(unique)
# A tuple is an iterable
tup = (100, 200, 300)
it = iter(tup)
# A dict is an iterable
d = {"x": 1, "y": 2}
it = iter(d)
# An iterator is also an iterable (returns itself)
print(iter(it) is it) # True
# Type checking
from collections.abc import Iterable, Iterator
print(isinstance(nums, Iterable)) # True
print(isinstance(nums, Iterator)) # False
print(isinstance(it1, Iterable)) # True
print(isinstance(it1, Iterator)) # True
Iterable vs Iterator Reference Table
| Feature | Iterable | Iterator |
|---|
| Protocol method | __iter__() | __next__() |
| Can create multiple iterators | Yes | No (returns self) |
Can be used in for loop | Yes | Yes |
iter() call behavior | Returns new iterator | Returns self |
| State tracking | No | Yes (current position) |
| Examples | list, tuple, dict, set | generator, file, iter(list) |
Custom Iterator Classes
class CountDown:
"""Countdown iterator — single-use."""
def __init__(self, start):
self.current = start
def __iter__(self):
return self
def __next__(self):
if self.current < 0:
raise StopIteration
value = self.current
self.current -= 1
return value
# Usage
for num in CountDown(5):
print(num, end=" ") # 5 4 3 2 1 0
# Can only iterate once
cd = CountDown(3)
print(list(cd)) # [3, 2, 1, 0]
print(list(cd)) # [] — exhausted!
# Manual iteration
cd = CountDown(3)
print(next(cd)) # 3
print(next(cd)) # 2
print(next(cd)) # 1
print(next(cd)) # 0
# next(cd) # StopIteration
Separate Iterable and Iterator (Reusable)
class NumberRange:
"""Reusable iterable — creates new iterator each time."""
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
return NumberRangeIterator(self.start, self.end)
class NumberRangeIterator:
def __init__(self, start, end):
self.current = start
self.end = end
def __iter__(self):
return self
def __next__(self):
if self.current >= self.end:
raise StopIteration
value = self.current
self.current += 1
return value
# Can iterate multiple times
nums = NumberRange(0, 5)
print(list(nums)) # [0, 1, 2, 3, 4]
print(list(nums)) # [0, 1, 2, 3, 4] — works again!
# Supports multiple active iterators
nums = NumberRange(0, 5)
it1 = iter(nums)
it2 = iter(nums)
print(next(it1)) # 0
print(next(it1)) # 1
print(next(it2)) # 0 — independent position
print(list(it1)) # [2, 3, 4]
print(list(it2)) # [1, 2, 3, 4]
Bidirectional Iterator
class BidirectionalRange:
"""Iterator that can go forward or backward."""
def __init__(self, start, end):
self.start = start
self.end = end
def __iter__(self):
return BidirectionalIterator(self.start, self.end)
class BidirectionalIterator:
def __init__(self, start, end):
self.current = start
self.end = end
self.direction = 1
def __iter__(self):
return self
def __next__(self):
if self.direction == 1 and self.current >= self.end:
raise StopIteration
if self.direction == -1 and self.current < self.start:
raise StopIteration
value = self.current
self.current += self.direction
return value
def reverse(self):
self.direction *= -1
# Usage
br = BidirectionalRange(0, 5)
it = iter(br)
print(list(it)) # [0, 1, 2, 3, 4]
it.reverse()
print(list(it)) # [3, 2, 1, 0]
The iter() and next() Functions
# iter() converts iterable to iterator
nums = [1, 2, 3]
it = iter(nums)
# next() with default value (avoids StopIteration)
print(next(it, "default")) # 1
print(next(it, "default")) # 2
print(next(it, "default")) # 3
print(next(it, "default")) # "default" — returns default instead of raising
# Two-argument form of iter(): callable + sentinel
# Calls callable repeatedly until sentinel value is returned
counter = [0]
def read_counter():
counter[0] += 1
return counter[0]
# Read until value reaches 5
values = iter(read_counter, 5)
print(list(values)) # [1, 2, 3, 4]
# Useful for reading files line by line
# with open('data.txt') as f:
# for line in iter(f.readline, ''): # Read until empty string
# process(line)
# Reading from a socket
# for data in iter(lambda: socket.recv(1024), b''):
# process(data)
StopIteration Exception
# StopIteration is how Python signals end of iteration
# for loops catch it automatically
# Manual iteration with StopIteration handling
nums = [1, 2, 3]
it = iter(nums)
try:
while True:
val = next(it)
print(val)
except StopIteration:
print("Iteration complete")
# StopIteration in generators
def gen():
yield 1
yield 2
return # Raises StopIteration with no value
g = gen()
print(next(g)) # 1
print(next(g)) # 2
# next(g) # StopIteration
# StopIteration with return value
def gen_with_value():
yield 1
yield 2
return "done" # Value available as e.value
g = gen_with_value()
next(g)
next(g)
try:
next(g)
except StopIteration as e:
print(e.value) # "done"
# StopIteration propagation
def outer():
yield from inner()
def inner():
yield 1
yield 2
return "result" # Propagates through yield from
Infinite Iterators from itertools
from itertools import count, cycle, repeat
# count(start=0, step=1) — Infinite counter
for i in count(10):
if i > 15:
break
print(i, end=" ") # 10 11 12 13 14 15
# count with step
for i in count(0, 2):
if i > 10:
break
print(i, end=" ") # 0 2 4 6 8 10
# cycle(iterable) — Infinite cycling
colors = cycle(["red", "green", "blue"])
for _ in range(6):
print(next(colors), end=" ") # red green blue red green blue
# repeat(value, times=None) — Repeat value
ones = repeat(1, 5)
print(list(ones)) # [1, 1, 1, 1, 1]
# Infinite repeat
inf_ones = repeat(1) # No times limit
print(next(inf_ones)) # 1
print(next(inf_ones)) # 1
# Custom infinite iterator
def fibonacci():
a, b = 0, 1
while True:
yield a
a, b = b, a + b
# Take first 10 fibonacci numbers
fib = fibonacci()
first_ten = [next(fib) for _ in range(10)]
print(first_ten) # [0, 1, 1, 2, 3, 5, 8, 13, 21, 34]
# Infinite prime generator
def primes():
"""Infinite prime number generator."""
yield 2
candidate = 3
found = [2]
while True:
is_prime = True
for p in found:
if p * p > candidate:
break
if candidate % p == 0:
is_prime = False
break
if is_prime:
found.append(candidate)
yield candidate
candidate += 2
# Take first 10 primes
from itertools import islice
print(list(islice(primes(), 10))) # [2, 3, 5, 7, 11, 13, 17, 19, 23, 29]
Infinite Iterators Reference
| Function | Description | Example |
|---|
count(start, step) | Infinite count | count(10, 2) -> 10, 12, 14, ... |
cycle(iterable) | Infinite cycling | cycle([1,2,3]) -> 1, 2, 3, 1, 2, ... |
repeat(value, times) | Repeat value | repeat(5, 3) -> 5, 5, 5 |
Consuming Iterators
# list() consumes entire iterator
nums = [1, 2, 3]
print(list(iter(nums))) # [1, 2, 3]
# enumerate, zip, map, filter all return iterators
scores = [95, 87, 91, 78, 85]
for i, score in enumerate(scores):
print(f"Student {i}: {score}")
# zip returns iterator
names = ["Alice", "Bob"]
ages = [30, 25]
for name, age in zip(names, ages):
print(f"{name}: {age}")
# map returns iterator
squared = map(lambda x: x**2, range(5))
print(list(squared)) # [0, 1, 4, 9, 16]
# filter returns iterator
evens = filter(lambda x: x % 2 == 0, range(10))
print(list(evens)) # [0, 2, 4, 6, 8]
# sum, min, max consume iterators
print(sum(range(10))) # 45
print(min(range(10))) # 0
print(max(range(10))) # 9
# any and all consume iterators
print(any(x > 5 for x in range(10))) # True
print(all(x > 0 for x in range(10))) # False
itertools for Advanced Iteration
from itertools import (
chain, compress, filterfalse, islice,
zip_longest, takewhile, dropwhile,
starmap, accumulate, product, combinations,
permutations, groupby, chain.from_iterable
)
# === chain() — Concatenate iterables ===
a = [1, 2, 3]
b = [4, 5, 6]
c = [7, 8, 9]
print(list(chain(a, b, c))) # [1, 2, 3, 4, 5, 6, 7, 8, 9]
# chain.from_iterable — Flatten one level
nested = [[1, 2], [3, 4], [5, 6]]
print(list(chain.from_iterable(nested))) # [1, 2, 3, 4, 5, 6]
# === compress() — Filter by boolean selector ===
data = ['a', 'b', 'c', 'd', 'e']
selectors = [1, 0, 1, 0, 1]
print(list(compress(data, selectors))) # ['a', 'c', 'e']
# === filterfalse() — Opposite of filter ===
print(list(filterfalse(lambda x: x % 2 == 0, range(10)))) # [1, 3, 5, 7, 9]
# === islice() — Slice an iterator ===
print(list(islice(count(), 5))) # [0, 1, 2, 3, 4]
print(list(islice(count(), 2, 8))) # [2, 3, 4, 5, 6, 7]
print(list(islice(count(), 0, 10, 2))) # [0, 2, 4, 6, 8]
# === zip_longest() — Zip with fillvalue for unequal lengths ===
a = [1, 2, 3]
b = ['a', 'b']
print(list(zip_longest(a, b, fillvalue='-')))
# [(1, 'a'), (2, 'b'), (3, '-')]
# === takewhile() — Take while condition is true ===
print(list(takewhile(lambda x: x < 5, count()))) # [0, 1, 2, 3, 4]
print(list(takewhile(lambda x: x < 5, [1, 3, 5, 2, 4]))) # [1, 3]
# === dropwhile() — Drop while condition is true ===
print(list(dropwhile(lambda x: x < 5, range(10)))) # [5, 6, 7, 8, 9]
print(list(dropwhile(lambda x: x < 5, [1, 3, 5, 2, 4]))) # [5, 2, 4]
# === starmap() — Apply function to argument tuples ===
pairs = [(2, 3), (4, 5), (6, 7)]
print(list(starmap(pow, pairs))) # [8, 1024, 279936]
# === accumulate() — Running totals ===
print(list(accumulate([1, 2, 3, 4, 5]))) # [1, 3, 6, 10, 15]
print(list(accumulate([1, 2, 3, 4, 5], max))) # [1, 2, 3, 4, 5]
print(list(accumulate([1, 2, 3, 4, 5], lambda a, b: a * b))) # [1, 2, 6, 24, 120]
# === product() — Cartesian product ===
print(list(product([1, 2], ['a', 'b'])))
# [(1, 'a'), (1, 'b'), (2, 'a'), (2, 'b')]
# === combinations() — Combinations of length r ===
print(list(combinations([1, 2, 3, 4], 2)))
# [(1, 2), (1, 3), (1, 4), (2, 3), (2, 4), (3, 4)]
# === permutations() — Permutations of length r ===
print(list(permutations([1, 2, 3], 2)))
# [(1, 2), (1, 3), (2, 1), (2, 3), (3, 1), (3, 2)]
# === groupby() — Group consecutive elements ===
data = [('a', 1), ('a', 2), ('b', 3), ('b', 4), ('a', 5)]
for key, group in groupby(data, key=lambda x: x[0]):
print(key, list(group))
# a [('a', 1), ('a', 2)]
# b [('b', 3), ('b', 4)]
# a [('a', 5)]
# Important: groupby groups CONSECUTIVE elements
# Sort first if grouping all:
data.sort(key=lambda x: x[0])
for key, group in groupby(data, key=lambda x: x[0]):
print(key, list(group))
itertools Reference Table
| Function | Description | Example |
|---|
chain(a, b) | Concatenate iterables | [1,2] + [3,4] |
chain.from_iterable(nested) | Flatten one level | [[1,2],[3,4]] -> [1,2,3,4] |
compress(data, selectors) | Filter by booleans | [a,c,e] from [a,b,c,d,e] with [1,0,1,0,1] |
filterfalse(pred, iter) | Filter false values | [1,3,5,7,9] from 0..9 |
islice(iter, start, stop, step) | Slice iterator | islice(count(), 5) -> [0,1,2,3,4] |
zip_longest(a, b, fillvalue) | Zip unequal lengths | [(1,'a'),(2,'b'),(3,'-')] |
takewhile(pred, iter) | Take while true | [0,1,2,3,4] while <5 |
dropwhile(pred, iter) | Drop while true | [5,6,7,8,9] from 0..9 |
starmap(func, iter) | Map with unpacked args | pow(2,3)=8 from (2,3) |
accumulate(iter, func) | Running totals | [1,3,6,10,15] |
product(a, b) | Cartesian product | [(1,'a'),(1,'b'),(2,'a'),(2,'b')] |
combinations(iter, r) | Combinations | [(1,2),(1,3),(2,3)] from [1,2,3] r=2 |
permutations(iter, r) | Permutations | [(1,2),(2,1),(1,3),(3,1),(2,3),(3,2)] |
groupby(iter, key) | Group consecutive | {'a': [...], 'b': [...]} |
Memory Efficiency
import sys
# Lists store all elements in memory
list_comp = [x ** 2 for x in range(1_000_000)]
print(sys.getsizeof(list_comp)) # ~8,000,056 bytes (8 MB)
# Iterators/generators store almost nothing
gen_exp = (x ** 2 for x in range(1_000_000))
print(sys.getsizeof(gen_exp)) # ~208 bytes
# Lazy evaluation — compute on demand
def read_large_file(path):
with open(path, 'r') as f:
for line in f: # Reads one line at a time
yield line.strip()
# Process 10GB file with 200 bytes of memory
# for line in read_large_file('huge.log'):
# process(line)
# Memory comparison for common patterns
patterns = {
"list_comp": [x for x in range(10000)],
"gen_exp": (x for x in range(10000)),
"map_obj": map(lambda x: x, range(10000)),
"filter_obj": filter(lambda x: True, range(10000)),
"chain_obj": chain([1], [2], [3]),
"islice_obj": islice(range(10000), 100),
}
for name, obj in patterns.items():
print(f"{name}: {sys.getsizeof(obj)} bytes")
| Approach | Memory | Speed | Reusable |
|---|
| List comprehension | High | Fast | Yes |
| Generator expression | Minimal | Lazy | No (single pass) |
| Iterator object | Minimal | Lazy | Depends on class |
itertools functions | Minimal | Lazy | No (single pass) |
map() / filter() | Minimal | Lazy | No (single pass) |
Real-World: Data Stream Iterator
import csv
from io import StringIO
class CSVStreamParser:
"""Parse CSV data in chunks for memory-efficient processing."""
def __init__(self, data, chunk_size=100):
self.data = data
self.chunk_size = chunk_size
def __iter__(self):
reader = csv.DictReader(StringIO(self.data))
chunk = []
for row in reader:
chunk.append(row)
if len(chunk) >= self.chunk_size:
yield chunk
chunk = []
if chunk:
yield chunk
# Usage
csv_data = """name,age,city
Alice,30,NYC
Bob,25,LA
Charlie,35,Chicago
Diana,28,Boston"""
parser = CSVStreamParser(csv_data, chunk_size=2)
for chunk in parser:
print(chunk)
# [{'name': 'Alice', 'age': '30', 'city': 'NYC'}, {'name': 'Bob', ...}]
# [{'name': 'Charlie', ...}, {'name': 'Diana', ...}]
# Real-world: Paginated API iterator
class PaginatedAPI:
"""Iterate over all pages of a paginated API."""
def __init__(self, fetch_func, page_size=100):
self.fetch_func = fetch_func
self.page_size = page_size
def __iter__(self):
page = 1
while True:
data = self.fetch_func(page=page, page_size=self.page_size)
if not data:
break
yield from data
page += 1
# Usage:
# api = PaginatedAPI(fetch_users, page_size=50)
# for user in api:
# process(user)
# Real-world: Line-by-line log processor
class LogProcessor:
"""Process log files line by line without loading entire file."""
def __init__(self, filepath, filters=None):
self.filepath = filepath
self.filters = filters or []
def __iter__(self):
with open(self.filepath, 'r') as f:
for line_num, line in enumerate(f, 1):
line = line.strip()
if not line:
continue
if all(f(line) for f in self.filters):
yield {
'line_num': line_num,
'content': line
}
Common Mistakes
# Mistake 1: Iterators are consumed once
it = iter([1, 2, 3])
list(it) # [1, 2, 3]
list(it) # [] — empty!
# Fix: recreate iterator or use list
items = [1, 2, 3]
list(iter(items)) # Works each time
# Mistake 2: Modifying collection during iteration
nums = [1, 2, 3]
# for n in nums:
# nums.remove(n) # Undefined behavior!
# Fix: iterate over copy
for n in list(nums):
nums.remove(n)
# Mistake 3: Forgetting StopIteration handling
it = iter([1, 2, 3])
try:
while True:
val = next(it)
print(val)
except StopIteration:
pass
# Fix: use for loop or next with default
for val in iter([1, 2, 3]):
print(val)
# Mistake 4: Infinite iterator without break
# for i in count(0): # Runs forever!
# print(i)
# Fix: always have a break condition
for i in count(0):
if i >= 100:
break
process(i)
# Mistake 5: Using list operations on iterators
it = iter([1, 2, 3])
# it[0] # TypeError: 'list_iterator' object is not subscriptable
# len(it) # TypeError: object of type 'list_iterator' has no len()
# Fix: convert to list first
lst = list(it)
print(lst[0]) # 1
# Mistake 6: Not priming generators before send()
def my_gen():
value = yield
yield f"Got: {value}"
g = my_gen()
# g.send("hello") # TypeError: can't send non-None value to a just-started generator
next(g) # Prime the generator
g.send("hello") # Works: "Got: hello"
# Mistake 7: Assuming iterator protocol returns new iterator
it = iter([1, 2, 3])
it2 = iter(it) # Returns the same iterator!
print(it is it2) # True
Key Takeaways
- Iterators implement
__iter__ and __next__ — the protocol Python uses for for loops
- Generators are the easiest way to create iterators — no class needed
- Iterators are consumed once — they are not reusable (unless class-based with
__iter__ creating new instance)
StopIteration signals the end of iteration — for loops catch it automatically
- Use
itertools for advanced iteration patterns — chain, islice, groupby, etc.
- Iterables can create multiple independent iterators; iterators return themselves from
__iter__
- Use
next(default) to avoid StopIteration exceptions — returns default instead of raising
iter(callable, sentinel) creates iterators from functions — calls until sentinel returned
- Iterator objects are memory-efficient — they don't store all elements in memory
- Infinite iterators (count, cycle, repeat) are powerful but always require a break condition