The heapq module implements a min-heap priority queue. O(log n) push and pop—essential for scheduling, graph algorithms, and finding extremes in data.

Basic Heap Operations

import heapq
 
# Create heap from list
data = [5, 3, 8, 1, 9, 2]
heapq.heapify(data)  # In-place, O(n)
print(data)  # [1, 3, 2, 5, 9, 8]
 
# Push item
heapq.heappush(data, 0)
print(data[0])  # 0 (minimum)
 
# Pop smallest
smallest = heapq.heappop(data)
print(smallest)  # 0
 
# Push and pop in one operation (more efficient)
result = heapq.heappushpop(data, 4)  # Push 4, pop smallest
 
# Pop and push in one operation
result = heapq.heapreplace(data, 10)  # Pop smallest, push 10

Priority Queue

import heapq
from dataclasses import dataclass, field
from typing import Any
 
@dataclass(order=True)
class PrioritizedItem:
    priority: int
    item: Any = field(compare=False)
 
class PriorityQueue:
    def __init__(self):
        self._heap = []
    
    def push(self, priority: int, item: Any):
        heapq.heappush(self._heap, PrioritizedItem(priority, item))
    
    def pop(self) -> Any:
        return heapq.heappop(self._heap).item
    
    def peek(self) -> Any:
        return self._heap[0].item if self._heap else None
    
    def __len__(self):
        return len(self._heap)
 
# Usage
pq = PriorityQueue()
pq.push(3, "medium priority")
pq.push(1, "high priority")
pq.push(5, "low priority")
 
print(pq.pop())  # "high priority"
print(pq.pop())  # "medium priority"

Max Heap

Python's heapq is a min-heap. For max-heap, negate values:

import heapq
 
class MaxHeap:
    def __init__(self):
        self._heap = []
    
    def push(self, value):
        heapq.heappush(self._heap, -value)
    
    def pop(self):
        return -heapq.heappop(self._heap)
    
    def peek(self):
        return -self._heap[0] if self._heap else None
 
# Usage
mh = MaxHeap()
mh.push(5)
mh.push(3)
mh.push(8)
 
print(mh.pop())  # 8 (largest)
print(mh.pop())  # 5

Find K Largest/Smallest

import heapq
 
data = [3, 1, 4, 1, 5, 9, 2, 6, 5, 3, 5]
 
# K largest
largest = heapq.nlargest(3, data)
print(largest)  # [9, 6, 5]
 
# K smallest
smallest = heapq.nsmallest(3, data)
print(smallest)  # [1, 1, 2]
 
# With key function
words = ['python', 'go', 'javascript', 'c', 'rust']
longest = heapq.nlargest(3, words, key=len)
print(longest)  # ['javascript', 'python', 'rust']

Merge Sorted Iterables

import heapq
 
# Merge sorted sequences efficiently
list1 = [1, 4, 7, 10]
list2 = [2, 5, 8, 11]
list3 = [3, 6, 9, 12]
 
merged = list(heapq.merge(list1, list2, list3))
print(merged)  # [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]
 
# With key function
words = [['apple', 'cherry'], ['banana', 'date']]
merged = list(heapq.merge(*words, key=str.lower))

Task Scheduler

import heapq
import time
from dataclasses import dataclass, field
from typing import Callable
 
@dataclass(order=True)
class ScheduledTask:
    run_at: float
    task: Callable = field(compare=False)
    name: str = field(compare=False, default="")
 
class Scheduler:
    def __init__(self):
        self._tasks = []
    
    def schedule(self, delay: float, task: Callable, name: str = ""):
        run_at = time.time() + delay
        heapq.heappush(self._tasks, ScheduledTask(run_at, task, name))
    
    def run(self):
        while self._tasks:
            task = self._tasks[0]
            now = time.time()
            
            if task.run_at > now:
                time.sleep(task.run_at - now)
            
            heapq.heappop(self._tasks)
            task.task()
 
# Usage
scheduler = Scheduler()
scheduler.schedule(2, lambda: print("Task 1"), "task1")
scheduler.schedule(1, lambda: print("Task 2"), "task2")
scheduler.schedule(3, lambda: print("Task 3"), "task3")
# scheduler.run()  # Runs in order: Task 2, Task 1, Task 3

Running Median

Track median of a stream:

import heapq
 
class RunningMedian:
    """Track median of a stream of numbers."""
    
    def __init__(self):
        self.small = []  # Max heap (negated)
        self.large = []  # Min heap
    
    def add(self, num: float):
        # Add to max heap
        heapq.heappush(self.small, -num)
        
        # Balance: move largest from small to large
        heapq.heappush(self.large, -heapq.heappop(self.small))
        
        # Ensure small has >= elements than large
        if len(self.large) > len(self.small):
            heapq.heappush(self.small, -heapq.heappop(self.large))
    
    def median(self) -> float:
        if len(self.small) > len(self.large):
            return -self.small[0]
        return (-self.small[0] + self.large[0]) / 2
 
# Usage
rm = RunningMedian()
for num in [2, 1, 5, 7, 2, 0, 5]:
    rm.add(num)
    print(f"Added {num}, median: {rm.median()}")

Dijkstra's Shortest Path

import heapq
from collections import defaultdict
 
def dijkstra(graph: dict, start: str) -> dict:
    """Find shortest paths from start to all nodes."""
    distances = {start: 0}
    heap = [(0, start)]
    
    while heap:
        dist, node = heapq.heappop(heap)
        
        if dist > distances.get(node, float('inf')):
            continue
        
        for neighbor, weight in graph[node]:
            new_dist = dist + weight
            if new_dist < distances.get(neighbor, float('inf')):
                distances[neighbor] = new_dist
                heapq.heappush(heap, (new_dist, neighbor))
    
    return distances
 
# Graph as adjacency list: node -> [(neighbor, weight), ...]
graph = {
    'A': [('B', 1), ('C', 4)],
    'B': [('C', 2), ('D', 5)],
    'C': [('D', 1)],
    'D': [],
}
 
print(dijkstra(graph, 'A'))  # {'A': 0, 'B': 1, 'C': 3, 'D': 4}

Top K Frequent Elements

import heapq
from collections import Counter
 
def top_k_frequent(items: list, k: int) -> list:
    """Find k most frequent elements."""
    counts = Counter(items)
    return heapq.nlargest(k, counts.keys(), key=counts.get)
 
words = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
print(top_k_frequent(words, 2))  # ['apple', 'banana']

Bounded Priority Queue

Keep only top K items:

import heapq
 
class BoundedPriorityQueue:
    """Keep only the K largest items."""
    
    def __init__(self, max_size: int):
        self.max_size = max_size
        self._heap = []  # Min heap
    
    def push(self, item):
        if len(self._heap) < self.max_size:
            heapq.heappush(self._heap, item)
        elif item > self._heap[0]:
            heapq.heapreplace(self._heap, item)
    
    def get_top_k(self) -> list:
        return sorted(self._heap, reverse=True)
 
# Track top 3 scores
scores = BoundedPriorityQueue(3)
for score in [45, 82, 91, 23, 67, 88, 95]:
    scores.push(score)
 
print(scores.get_top_k())  # [95, 91, 88]

Merge K Sorted Lists

import heapq
 
def merge_k_sorted(lists: list[list]) -> list:
    """Merge k sorted lists into one sorted list."""
    result = []
    # Heap of (value, list_index, element_index)
    heap = []
    
    for i, lst in enumerate(lists):
        if lst:
            heapq.heappush(heap, (lst[0], i, 0))
    
    while heap:
        val, list_idx, elem_idx = heapq.heappop(heap)
        result.append(val)
        
        # Push next element from same list
        if elem_idx + 1 < len(lists[list_idx]):
            next_val = lists[list_idx][elem_idx + 1]
            heapq.heappush(heap, (next_val, list_idx, elem_idx + 1))
    
    return result
 
lists = [[1, 4, 7], [2, 5, 8], [3, 6, 9]]
print(merge_k_sorted(lists))  # [1, 2, 3, 4, 5, 6, 7, 8, 9]

Time Complexity

OperationComplexity
heappushO(log n)
heappopO(log n)
heapifyO(n)
nlargest/nsmallest(k)O(n log k)
heap[0] (peek)O(1)

When to use what:

  • K << N: Use nlargest/nsmallest
  • K ≈ N: Use sorted()
  • Single min/max: Use min()/max()
  • Repeated push/pop: Use heap directly

The heapq module powers efficient priority-based algorithms. Use it for scheduling, graph algorithms, streaming top-k, and any problem requiring ordered access to extremes.

React to this post: