The collections module provides specialized container types beyond lists and dicts. Here's what you need to know.
Counter
Count hashable objects:
from collections import Counter
# Count elements
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
counts = Counter(words)
# Counter({'apple': 3, 'banana': 2, 'cherry': 1})
# Count characters
Counter("mississippi")
# Counter({'i': 4, 's': 4, 'p': 2, 'm': 1})
# Most common
counts.most_common(2) # [('apple', 3), ('banana', 2)]
# Total count
counts.total() # 6
# Arithmetic
c1 = Counter(a=3, b=1)
c2 = Counter(a=1, b=2)
c1 + c2 # Counter({'a': 4, 'b': 3})
c1 - c2 # Counter({'a': 2})
# Update counts
counts.update(["apple", "date"])defaultdict
Dict with default values for missing keys:
from collections import defaultdict
# List default
groups = defaultdict(list)
for item in [("a", 1), ("b", 2), ("a", 3)]:
groups[item[0]].append(item[1])
# {'a': [1, 3], 'b': [2]}
# Int default (counting)
counts = defaultdict(int)
for word in words:
counts[word] += 1
# Set default
seen = defaultdict(set)
seen["user1"].add("page1")
seen["user1"].add("page2")
# Custom default
def default_value():
return {"count": 0, "items": []}
data = defaultdict(default_value)deque
Double-ended queue with O(1) operations on both ends:
from collections import deque
# Create
d = deque([1, 2, 3])
d = deque(maxlen=3) # Fixed size
# Add elements
d.append(4) # Right: [1, 2, 3, 4]
d.appendleft(0) # Left: [0, 1, 2, 3, 4]
# Remove elements
d.pop() # Right: returns 4
d.popleft() # Left: returns 0
# Rotate
d = deque([1, 2, 3, 4, 5])
d.rotate(2) # [4, 5, 1, 2, 3]
d.rotate(-2) # [1, 2, 3, 4, 5]
# Extend
d.extend([6, 7])
d.extendleft([0, -1]) # Note: reverses orderUse cases:
- Queues (FIFO)
- Sliding windows
- Recent history (with maxlen)
# Recent history
history = deque(maxlen=5)
for action in actions:
history.append(action)
# Automatically discards oldest when fullnamedtuple
Tuples with named fields:
from collections import namedtuple
# Define
Point = namedtuple("Point", ["x", "y"])
# Or: Point = namedtuple("Point", "x y")
# Create instances
p = Point(3, 4)
p = Point(x=3, y=4)
# Access
p.x # 3
p[0] # 3
x, y = p # Unpacking works
# Immutable
p.x = 5 # AttributeError
# Convert
p._asdict() # {'x': 3, 'y': 4}
p._replace(x=10) # Point(x=10, y=4) - new instance
# With defaults (Python 3.7+)
Point = namedtuple("Point", ["x", "y"], defaults=[0, 0])
Point() # Point(x=0, y=0)OrderedDict
Dict that remembers insertion order:
from collections import OrderedDict
# Note: Regular dicts maintain order since Python 3.7
# OrderedDict is still useful for:
# 1. Move to end
od = OrderedDict([("a", 1), ("b", 2), ("c", 3)])
od.move_to_end("a") # {"b": 2, "c": 3, "a": 1}
od.move_to_end("c", last=False) # {"c": 3, "b": 2, "a": 1}
# 2. Pop from either end
od.popitem() # ("a", 1) - last
od.popitem(last=False) # ("c", 3) - first
# 3. Equality considers order
OrderedDict([("a", 1), ("b", 2)]) == OrderedDict([("b", 2), ("a", 1)])
# False (regular dicts would be True)ChainMap
Combine multiple dicts:
from collections import ChainMap
defaults = {"color": "red", "size": "medium"}
user_prefs = {"color": "blue"}
runtime = {"debug": True}
config = ChainMap(runtime, user_prefs, defaults)
config["color"] # "blue" (first match wins)
config["size"] # "medium" (from defaults)
config["debug"] # True
# Useful for layered configurationPractical Examples
Word frequency
from collections import Counter
text = "the quick brown fox jumps over the lazy dog"
word_freq = Counter(text.lower().split())
print(word_freq.most_common(3))Grouping data
from collections import defaultdict
users = [
{"name": "Alice", "dept": "Engineering"},
{"name": "Bob", "dept": "Sales"},
{"name": "Carol", "dept": "Engineering"},
]
by_dept = defaultdict(list)
for user in users:
by_dept[user["dept"]].append(user["name"])
# {'Engineering': ['Alice', 'Carol'], 'Sales': ['Bob']}LRU cache (simple)
from collections import OrderedDict
class LRUCache:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key in self.cache:
self.cache.move_to_end(key)
return self.cache[key]
return None
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
self.cache.popitem(last=False)Data class alternative
from collections import namedtuple
# Quick data containers
User = namedtuple("User", ["id", "name", "email"])
Point3D = namedtuple("Point3D", ["x", "y", "z"], defaults=[0, 0, 0])
# For more features, use dataclassesQuick Reference
| Type | Use Case |
|---|---|
Counter | Count occurrences |
defaultdict | Dict with default factory |
deque | Fast append/pop both ends |
namedtuple | Lightweight immutable data |
OrderedDict | Order-aware dict operations |
ChainMap | Layered dict lookup |
The collections module covers common patterns that would otherwise require boilerplate code. Learn these and write cleaner Python.
React to this post: