The heapq module implements a min-heap. Use it for priority queues, finding top-N elements, and merge operations.
Heap Basics
import heapq
# Create a heap from a list
nums = [3, 1, 4, 1, 5, 9, 2, 6]
heapq.heapify(nums)
print(nums) # [1, 1, 2, 6, 5, 9, 4, 3]
# Smallest element is always at index 0
print(nums[0]) # 1Push and Pop
import heapq
heap = []
# Push items
heapq.heappush(heap, 3)
heapq.heappush(heap, 1)
heapq.heappush(heap, 4)
# Pop smallest
smallest = heapq.heappop(heap)
print(smallest) # 1
# Push and pop in one operation (more efficient)
result = heapq.heappushpop(heap, 2) # Push 2, pop smallest
# Pop and push in one operation
result = heapq.heapreplace(heap, 5) # Pop smallest, push 5Finding N Largest/Smallest
import heapq
nums = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3]
# Smallest 3
print(heapq.nsmallest(3, nums)) # [1, 1, 2]
# Largest 3
print(heapq.nlargest(3, nums)) # [9, 6, 5]
# With key function
people = [
{'name': 'Alice', 'age': 30},
{'name': 'Bob', 'age': 25},
{'name': 'Carol', 'age': 35},
]
youngest = heapq.nsmallest(2, people, key=lambda x: x['age'])Performance guide:
- For N=1: use
min()ormax() - For small N: use
nsmallest/nlargest - For large N (close to len): use
sorted()[:N]
Max Heap
Python only has min-heap. For max-heap, negate values:
import heapq
# Max heap by negating
max_heap = []
for val in [3, 1, 4, 1, 5]:
heapq.heappush(max_heap, -val)
# Pop largest
largest = -heapq.heappop(max_heap)
print(largest) # 5Priority Queue Pattern
import heapq
from dataclasses import dataclass, field
from typing import Any
@dataclass(order=True)
class PrioritizedItem:
priority: int
item: Any = field(compare=False)
class PriorityQueue:
def __init__(self):
self._heap = []
def push(self, item, priority):
heapq.heappush(self._heap, PrioritizedItem(priority, item))
def pop(self):
return heapq.heappop(self._heap).item
def __bool__(self):
return bool(self._heap)
# Usage
pq = PriorityQueue()
pq.push("low priority task", priority=10)
pq.push("high priority task", priority=1)
pq.push("medium task", priority=5)
while pq:
print(pq.pop())
# high priority task
# medium task
# low priority taskMerging Sorted Iterables
import heapq
list1 = [1, 3, 5, 7]
list2 = [2, 4, 6, 8]
list3 = [0, 9]
# Merge maintaining sort order
merged = list(heapq.merge(list1, list2, list3))
print(merged) # [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
# Memory efficient - uses iterators
for item in heapq.merge(list1, list2, list3):
print(item)Practical Examples
Task Scheduler
import heapq
from datetime import datetime, timedelta
class TaskScheduler:
def __init__(self):
self._tasks = []
self._counter = 0
def schedule(self, task: str, run_at: datetime):
# Counter ensures FIFO for same time
heapq.heappush(self._tasks, (run_at, self._counter, task))
self._counter += 1
def get_next(self):
if self._tasks:
run_at, _, task = heapq.heappop(self._tasks)
return run_at, task
return None
scheduler = TaskScheduler()
now = datetime.now()
scheduler.schedule("Send email", now + timedelta(hours=1))
scheduler.schedule("Backup", now + timedelta(minutes=30))
scheduler.schedule("Report", now + timedelta(hours=2))
# Get tasks in order
while task := scheduler.get_next():
print(task)Running Median
import heapq
class RunningMedian:
def __init__(self):
self.low = [] # Max heap (negated)
self.high = [] # Min heap
def add(self, num):
# Add to appropriate heap
if not self.low or num <= -self.low[0]:
heapq.heappush(self.low, -num)
else:
heapq.heappush(self.high, num)
# Rebalance
if len(self.low) > len(self.high) + 1:
heapq.heappush(self.high, -heapq.heappop(self.low))
elif len(self.high) > len(self.low):
heapq.heappush(self.low, -heapq.heappop(self.high))
def median(self):
if len(self.low) > len(self.high):
return -self.low[0]
return (-self.low[0] + self.high[0]) / 2
rm = RunningMedian()
for num in [2, 1, 5, 7, 2, 0, 5]:
rm.add(num)
print(f"Added {num}, median: {rm.median()}")Dijkstra's Algorithm
import heapq
from collections import defaultdict
def dijkstra(graph, start):
"""Find shortest paths from start to all nodes."""
distances = {start: 0}
heap = [(0, start)]
while heap:
dist, node = heapq.heappop(heap)
if dist > distances.get(node, float('inf')):
continue
for neighbor, weight in graph[node]:
new_dist = dist + weight
if new_dist < distances.get(neighbor, float('inf')):
distances[neighbor] = new_dist
heapq.heappush(heap, (new_dist, neighbor))
return distancesQuick Reference
import heapq
# Transform list into heap
heapq.heapify(x)
# Push item
heapq.heappush(heap, item)
# Pop smallest
heapq.heappop(heap)
# Push then pop (efficient)
heapq.heappushpop(heap, item)
# Pop then push (efficient)
heapq.heapreplace(heap, item)
# N smallest/largest
heapq.nsmallest(n, iterable, key=None)
heapq.nlargest(n, iterable, key=None)
# Merge sorted iterables
heapq.merge(*iterables, key=None, reverse=False)| Operation | Time Complexity |
|---|---|
heapify | O(n) |
heappush | O(log n) |
heappop | O(log n) |
nsmallest(k) | O(n log k) |
nlargest(k) | O(n log k) |
heapq is the foundation for priority queues in Python. Simple interface, efficient operations.
React to this post: