Generators let you iterate without loading everything into memory. Here's how they work.

The Problem

# Loads all data into memory at once
def get_all_numbers(n):
    result = []
    for i in range(n):
        result.append(i)
    return result
 
numbers = get_all_numbers(10_000_000)  # Uses ~400MB

The Solution: Generators

# Generates values on demand
def get_numbers(n):
    for i in range(n):
        yield i
 
numbers = get_numbers(10_000_000)  # Uses almost no memory

The yield keyword makes this a generator. It pauses and returns a value, resuming when you ask for the next one.

Using Generators

gen = get_numbers(5)
 
print(next(gen))  # 0
print(next(gen))  # 1
print(next(gen))  # 2
 
# Or iterate
for num in get_numbers(5):
    print(num)

Generator Expressions

Like list comprehensions, but lazy:

# List comprehension - creates list in memory
squares = [x**2 for x in range(1000000)]
 
# Generator expression - creates values on demand
squares = (x**2 for x in range(1000000))

Note the parentheses instead of brackets.

When to Use Generators

Large Data

def read_large_file(path):
    with open(path) as f:
        for line in f:
            yield line.strip()
 
# Process line by line, never load whole file
for line in read_large_file("huge.csv"):
    process(line)

Infinite Sequences

def count_forever():
    n = 0
    while True:
        yield n
        n += 1
 
# Take only what you need
from itertools import islice
first_100 = list(islice(count_forever(), 100))

Pipeline Processing

def read_lines(path):
    with open(path) as f:
        for line in f:
            yield line
 
def parse_json(lines):
    for line in lines:
        yield json.loads(line)
 
def filter_active(records):
    for record in records:
        if record.get("active"):
            yield record
 
# Chain them together
pipeline = filter_active(parse_json(read_lines("data.jsonl")))
 
for record in pipeline:
    process(record)

The Iterator Protocol

Generators implement the iterator protocol:

class Counter:
    def __init__(self, max_count):
        self.max = max_count
        self.current = 0
    
    def __iter__(self):
        return self
    
    def __next__(self):
        if self.current >= self.max:
            raise StopIteration
        self.current += 1
        return self.current - 1
 
for num in Counter(5):
    print(num)  # 0, 1, 2, 3, 4

Generators are simpler:

def counter(max_count):
    current = 0
    while current < max_count:
        yield current
        current += 1

yield from

Delegate to another generator:

def flatten(nested):
    for item in nested:
        if isinstance(item, list):
            yield from flatten(item)  # Delegate
        else:
            yield item
 
list(flatten([1, [2, 3], [4, [5, 6]]]))
# [1, 2, 3, 4, 5, 6]

Sending Values

Generators can receive values:

def accumulator():
    total = 0
    while True:
        value = yield total
        if value is not None:
            total += value
 
acc = accumulator()
next(acc)      # Start the generator, returns 0
acc.send(10)   # Returns 10
acc.send(5)    # Returns 15
acc.send(25)   # Returns 40

Common Patterns

Batch Processing

def batches(iterable, size):
    batch = []
    for item in iterable:
        batch.append(item)
        if len(batch) == size:
            yield batch
            batch = []
    if batch:
        yield batch
 
for batch in batches(range(10), 3):
    print(batch)
# [0, 1, 2]
# [3, 4, 5]
# [6, 7, 8]
# [9]

Sliding Window

from collections import deque
 
def sliding_window(iterable, size):
    it = iter(iterable)
    window = deque(maxlen=size)
    
    for _ in range(size):
        window.append(next(it))
    yield tuple(window)
    
    for item in it:
        window.append(item)
        yield tuple(window)
 
list(sliding_window([1, 2, 3, 4, 5], 3))
# [(1, 2, 3), (2, 3, 4), (3, 4, 5)]

itertools

The standard library has powerful generator tools:

from itertools import (
    count,      # Infinite counter
    cycle,      # Infinite repetition
    repeat,     # Repeat value
    chain,      # Concatenate iterables
    islice,     # Slice an iterator
    takewhile,  # Take while condition
    dropwhile,  # Drop while condition
    groupby,    # Group consecutive items
)
 
# Examples
list(islice(count(10), 5))        # [10, 11, 12, 13, 14]
list(islice(cycle([1, 2]), 5))    # [1, 2, 1, 2, 1]
list(chain([1, 2], [3, 4]))       # [1, 2, 3, 4]

Memory Comparison

import sys
 
# List - stores all values
list_nums = [i for i in range(1000)]
print(sys.getsizeof(list_nums))  # ~8856 bytes
 
# Generator - stores only the function
gen_nums = (i for i in range(1000))
print(sys.getsizeof(gen_nums))   # ~112 bytes

Gotchas

One-Time Use

gen = (x for x in range(5))
list(gen)  # [0, 1, 2, 3, 4]
list(gen)  # [] - exhausted!

No Length

gen = (x for x in range(5))
len(gen)  # TypeError

No Indexing

gen = (x for x in range(5))
gen[0]  # TypeError

When Not to Use

  • When you need to access items multiple times
  • When you need random access
  • When you need to know the length upfront
  • When the data easily fits in memory

Use generators for streaming, pipelines, and memory efficiency.

React to this post: