The heapq module implements a min-heap priority queue. O(log n) push and pop—essential for scheduling, graph algorithms, and finding extremes in data.
Basic Heap Operations
import heapq
# Create heap from list
data = [5, 3, 8, 1, 9, 2]
heapq.heapify(data) # In-place, O(n)
print(data) # [1, 3, 2, 5, 9, 8]
# Push item
heapq.heappush(data, 0)
print(data[0]) # 0 (minimum)
# Pop smallest
smallest = heapq.heappop(data)
print(smallest) # 0
# Push and pop in one operation (more efficient)
result = heapq.heappushpop(data, 4) # Push 4, pop smallest
# Pop and push in one operation
result = heapq.heapreplace(data, 10) # Pop smallest, push 10Priority Queue
import heapq
from dataclasses import dataclass, field
from typing import Any
@dataclass(order=True)
class PrioritizedItem:
priority: int
item: Any = field(compare=False)
class PriorityQueue:
def __init__(self):
self._heap = []
def push(self, priority: int, item: Any):
heapq.heappush(self._heap, PrioritizedItem(priority, item))
def pop(self) -> Any:
return heapq.heappop(self._heap).item
def peek(self) -> Any:
return self._heap[0].item if self._heap else None
def __len__(self):
return len(self._heap)
# Usage
pq = PriorityQueue()
pq.push(3, "medium priority")
pq.push(1, "high priority")
pq.push(5, "low priority")
print(pq.pop()) # "high priority"
print(pq.pop()) # "medium priority"Max Heap
Python's heapq is a min-heap. For max-heap, negate values:
import heapq
class MaxHeap:
def __init__(self):
self._heap = []
def push(self, value):
heapq.heappush(self._heap, -value)
def pop(self):
return -heapq.heappop(self._heap)
def peek(self):
return -self._heap[0] if self._heap else None
# Usage
mh = MaxHeap()
mh.push(5)
mh.push(3)
mh.push(8)
print(mh.pop()) # 8 (largest)
print(mh.pop()) # 5Find K Largest/Smallest
import heapq
data = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
# K largest
largest = heapq.nlargest(3, data)
print(largest) # [9, 6, 5]
# K smallest
smallest = heapq.nsmallest(3, data)
print(smallest) # [1, 1, 2]
# With key function
words = ['python', 'go', 'javascript', 'c', 'rust']
longest = heapq.nlargest(3, words, key=len)
print(longest) # ['javascript', 'python', 'rust']Merge Sorted Iterables
import heapq
# Merge sorted sequences efficiently
list1 = [1, 4, 7, 10]
list2 = [2, 5, 8, 11]
list3 = [3, 6, 9, 12]
merged = list(heapq.merge(list1, list2, list3))
print(merged) # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
# With key function
words = [['apple', 'cherry'], ['banana', 'date']]
merged = list(heapq.merge(*words, key=str.lower))Task Scheduler
import heapq
import time
from dataclasses import dataclass, field
from typing import Callable
@dataclass(order=True)
class ScheduledTask:
run_at: float
task: Callable = field(compare=False)
name: str = field(compare=False, default="")
class Scheduler:
def __init__(self):
self._tasks = []
def schedule(self, delay: float, task: Callable, name: str = ""):
run_at = time.time() + delay
heapq.heappush(self._tasks, ScheduledTask(run_at, task, name))
def run(self):
while self._tasks:
task = self._tasks[0]
now = time.time()
if task.run_at > now:
time.sleep(task.run_at - now)
heapq.heappop(self._tasks)
task.task()
# Usage
scheduler = Scheduler()
scheduler.schedule(2, lambda: print("Task 1"), "task1")
scheduler.schedule(1, lambda: print("Task 2"), "task2")
scheduler.schedule(3, lambda: print("Task 3"), "task3")
# scheduler.run() # Runs in order: Task 2, Task 1, Task 3Running Median
Track median of a stream:
import heapq
class RunningMedian:
"""Track median of a stream of numbers."""
def __init__(self):
self.small = [] # Max heap (negated)
self.large = [] # Min heap
def add(self, num: float):
# Add to max heap
heapq.heappush(self.small, -num)
# Balance: move largest from small to large
heapq.heappush(self.large, -heapq.heappop(self.small))
# Ensure small has >= elements than large
if len(self.large) > len(self.small):
heapq.heappush(self.small, -heapq.heappop(self.large))
def median(self) -> float:
if len(self.small) > len(self.large):
return -self.small[0]
return (-self.small[0] + self.large[0]) / 2
# Usage
rm = RunningMedian()
for num in [2, 1, 5, 7, 2, 0, 5]:
rm.add(num)
print(f"Added {num}, median: {rm.median()}")Dijkstra's Shortest Path
import heapq
from collections import defaultdict
def dijkstra(graph: dict, start: str) -> dict:
"""Find shortest paths from start to all nodes."""
distances = {start: 0}
heap = [(0, start)]
while heap:
dist, node = heapq.heappop(heap)
if dist > distances.get(node, float('inf')):
continue
for neighbor, weight in graph[node]:
new_dist = dist + weight
if new_dist < distances.get(neighbor, float('inf')):
distances[neighbor] = new_dist
heapq.heappush(heap, (new_dist, neighbor))
return distances
# Graph as adjacency list: node -> [(neighbor, weight), ...]
graph = {
'A': [('B', 1), ('C', 4)],
'B': [('C', 2), ('D', 5)],
'C': [('D', 1)],
'D': [],
}
print(dijkstra(graph, 'A')) # {'A': 0, 'B': 1, 'C': 3, 'D': 4}Top K Frequent Elements
import heapq
from collections import Counter
def top_k_frequent(items: list, k: int) -> list:
"""Find k most frequent elements."""
counts = Counter(items)
return heapq.nlargest(k, counts.keys(), key=counts.get)
words = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
print(top_k_frequent(words, 2)) # ['apple', 'banana']Bounded Priority Queue
Keep only top K items:
import heapq
class BoundedPriorityQueue:
"""Keep only the K largest items."""
def __init__(self, max_size: int):
self.max_size = max_size
self._heap = [] # Min heap
def push(self, item):
if len(self._heap) < self.max_size:
heapq.heappush(self._heap, item)
elif item > self._heap[0]:
heapq.heapreplace(self._heap, item)
def get_top_k(self) -> list:
return sorted(self._heap, reverse=True)
# Track top 3 scores
scores = BoundedPriorityQueue(3)
for score in [45, 82, 91, 23, 67, 88, 95]:
scores.push(score)
print(scores.get_top_k()) # [95, 91, 88]Merge K Sorted Lists
import heapq
def merge_k_sorted(lists: list[list]) -> list:
"""Merge k sorted lists into one sorted list."""
result = []
# Heap of (value, list_index, element_index)
heap = []
for i, lst in enumerate(lists):
if lst:
heapq.heappush(heap, (lst[0], i, 0))
while heap:
val, list_idx, elem_idx = heapq.heappop(heap)
result.append(val)
# Push next element from same list
if elem_idx + 1 < len(lists[list_idx]):
next_val = lists[list_idx][elem_idx + 1]
heapq.heappush(heap, (next_val, list_idx, elem_idx + 1))
return result
lists = [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
print(merge_k_sorted(lists)) # [1, 2, 3, 4, 5, 6, 7, 8, 9]Time Complexity
| Operation | Complexity |
|---|---|
| heappush | O(log n) |
| heappop | O(log n) |
| heapify | O(n) |
| nlargest/nsmallest(k) | O(n log k) |
| heap[0] (peek) | O(1) |
When to use what:
K << N: Usenlargest/nsmallestK ≈ N: Usesorted()- Single min/max: Use
min()/max() - Repeated push/pop: Use heap directly
The heapq module powers efficient priority-based algorithms. Use it for scheduling, graph algorithms, streaming top-k, and any problem requiring ordered access to extremes.
React to this post: