The heapq module implements a min-heap. Use it for priority queues, finding top-N elements, and merge operations.

Heap Basics

import heapq
 
# Create a heap from a list
nums = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(nums)
print(nums)  # [1, 1, 2, 6, 5, 9, 4, 3]
 
# Smallest element is always at index 0
print(nums[0])  # 1

Push and Pop

import heapq
 
heap = []
 
# Push items
heapq.heappush(heap, 3)
heapq.heappush(heap, 1)
heapq.heappush(heap, 4)
 
# Pop smallest
smallest = heapq.heappop(heap)
print(smallest)  # 1
 
# Push and pop in one operation (more efficient)
result = heapq.heappushpop(heap, 2)  # Push 2, pop smallest
 
# Pop and push in one operation
result = heapq.heapreplace(heap, 5)  # Pop smallest, push 5

Finding N Largest/Smallest

import heapq
 
nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
 
# Smallest 3
print(heapq.nsmallest(3, nums))  # [1, 1, 2]
 
# Largest 3
print(heapq.nlargest(3, nums))   # [9, 6, 5]
 
# With key function
people = [
    {'name': 'Alice', 'age': 30},
    {'name': 'Bob', 'age': 25},
    {'name': 'Carol', 'age': 35},
]
youngest = heapq.nsmallest(2, people, key=lambda x: x['age'])

Performance guide:

  • For N=1: use min() or max()
  • For small N: use nsmallest/nlargest
  • For large N (close to len): use sorted()[:N]

Max Heap

Python only has min-heap. For max-heap, negate values:

import heapq
 
# Max heap by negating
max_heap = []
for val in [3, 1, 4, 1, 5]:
    heapq.heappush(max_heap, -val)
 
# Pop largest
largest = -heapq.heappop(max_heap)
print(largest)  # 5

Priority Queue Pattern

import heapq
from dataclasses import dataclass, field
from typing import Any
 
@dataclass(order=True)
class PrioritizedItem:
    priority: int
    item: Any = field(compare=False)
 
class PriorityQueue:
    def __init__(self):
        self._heap = []
    
    def push(self, item, priority):
        heapq.heappush(self._heap, PrioritizedItem(priority, item))
    
    def pop(self):
        return heapq.heappop(self._heap).item
    
    def __bool__(self):
        return bool(self._heap)
 
# Usage
pq = PriorityQueue()
pq.push("low priority task", priority=10)
pq.push("high priority task", priority=1)
pq.push("medium task", priority=5)
 
while pq:
    print(pq.pop())
# high priority task
# medium task
# low priority task

Merging Sorted Iterables

import heapq
 
list1 = [1, 3, 5, 7]
list2 = [2, 4, 6, 8]
list3 = [0, 9]
 
# Merge maintaining sort order
merged = list(heapq.merge(list1, list2, list3))
print(merged)  # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
 
# Memory efficient - uses iterators
for item in heapq.merge(list1, list2, list3):
    print(item)

Practical Examples

Task Scheduler

import heapq
from datetime import datetime, timedelta
 
class TaskScheduler:
    def __init__(self):
        self._tasks = []
        self._counter = 0
    
    def schedule(self, task: str, run_at: datetime):
        # Counter ensures FIFO for same time
        heapq.heappush(self._tasks, (run_at, self._counter, task))
        self._counter += 1
    
    def get_next(self):
        if self._tasks:
            run_at, _, task = heapq.heappop(self._tasks)
            return run_at, task
        return None
 
scheduler = TaskScheduler()
now = datetime.now()
scheduler.schedule("Send email", now + timedelta(hours=1))
scheduler.schedule("Backup", now + timedelta(minutes=30))
scheduler.schedule("Report", now + timedelta(hours=2))
 
# Get tasks in order
while task := scheduler.get_next():
    print(task)

Running Median

import heapq
 
class RunningMedian:
    def __init__(self):
        self.low = []   # Max heap (negated)
        self.high = []  # Min heap
    
    def add(self, num):
        # Add to appropriate heap
        if not self.low or num <= -self.low[0]:
            heapq.heappush(self.low, -num)
        else:
            heapq.heappush(self.high, num)
        
        # Rebalance
        if len(self.low) > len(self.high) + 1:
            heapq.heappush(self.high, -heapq.heappop(self.low))
        elif len(self.high) > len(self.low):
            heapq.heappush(self.low, -heapq.heappop(self.high))
    
    def median(self):
        if len(self.low) > len(self.high):
            return -self.low[0]
        return (-self.low[0] + self.high[0]) / 2
 
rm = RunningMedian()
for num in [2, 1, 5, 7, 2, 0, 5]:
    rm.add(num)
    print(f"Added {num}, median: {rm.median()}")

Dijkstra's Algorithm

import heapq
from collections import defaultdict
 
def dijkstra(graph, start):
    """Find shortest paths from start to all nodes."""
    distances = {start: 0}
    heap = [(0, start)]
    
    while heap:
        dist, node = heapq.heappop(heap)
        
        if dist > distances.get(node, float('inf')):
            continue
        
        for neighbor, weight in graph[node]:
            new_dist = dist + weight
            if new_dist < distances.get(neighbor, float('inf')):
                distances[neighbor] = new_dist
                heapq.heappush(heap, (new_dist, neighbor))
    
    return distances

Quick Reference

import heapq
 
# Transform list into heap
heapq.heapify(x)
 
# Push item
heapq.heappush(heap, item)
 
# Pop smallest
heapq.heappop(heap)
 
# Push then pop (efficient)
heapq.heappushpop(heap, item)
 
# Pop then push (efficient)
heapq.heapreplace(heap, item)
 
# N smallest/largest
heapq.nsmallest(n, iterable, key=None)
heapq.nlargest(n, iterable, key=None)
 
# Merge sorted iterables
heapq.merge(*iterables, key=None, reverse=False)
OperationTime Complexity
heapifyO(n)
heappushO(log n)
heappopO(log n)
nsmallest(k)O(n log k)
nlargest(k)O(n log k)

heapq is the foundation for priority queues in Python. Simple interface, efficient operations.

React to this post: