You know @dataclass basics. Here are the patterns that make them powerful.
Field Factories
Never use mutable defaults directly:
from dataclasses import dataclass, field
# BAD: shared list across instances
@dataclass
class BadConfig:
items: list = [] # Error!
# GOOD: factory creates new list per instance
@dataclass
class Config:
items: list = field(default_factory=list)
metadata: dict = field(default_factory=dict)Custom factories:
from dataclasses import dataclass, field
from datetime import datetime
def now() -> datetime:
return datetime.now()
@dataclass
class Event:
name: str
created_at: datetime = field(default_factory=now)post_init
Run code after __init__:
from dataclasses import dataclass
@dataclass
class Rectangle:
width: float
height: float
area: float = field(init=False) # Computed, not in __init__
def __post_init__(self):
self.area = self.width * self.height
r = Rectangle(3, 4)
print(r.area) # 12.0Validation:
@dataclass
class User:
name: str
age: int
def __post_init__(self):
if self.age < 0:
raise ValueError("Age cannot be negative")
if not self.name.strip():
raise ValueError("Name cannot be empty")InitVar: Init-Only Variables
Pass to __post_init__ but don't store:
from dataclasses import dataclass, field, InitVar
@dataclass
class Database:
connection_string: InitVar[str]
host: str = field(init=False)
port: int = field(init=False)
def __post_init__(self, connection_string: str):
# Parse connection string
host, port = connection_string.split(":")
self.host = host
self.port = int(port)
db = Database("localhost:5432")
print(db.host, db.port) # localhost 5432
# db.connection_string # AttributeError - not storedFrozen: Immutable Dataclasses
from dataclasses import dataclass
@dataclass(frozen=True)
class Point:
x: float
y: float
p = Point(1, 2)
# p.x = 3 # FrozenInstanceError
# Hashable (can use in sets/dict keys)
points = {Point(0, 0), Point(1, 1)}Slots: Memory Efficiency
Python 3.10+:
from dataclasses import dataclass
@dataclass(slots=True)
class Particle:
x: float
y: float
velocity: float
# ~40% less memory per instance
# Slightly faster attribute access
# Cannot add arbitrary attributesField Options
from dataclasses import dataclass, field
@dataclass
class Record:
# Normal field
name: str
# Default value
count: int = 0
# Not in __init__
computed: str = field(init=False, default="")
# Not in repr
secret: str = field(repr=False, default="hidden")
# Not in comparison
cache: dict = field(compare=False, default_factory=dict)
# Not hashed (for frozen classes)
mutable_ref: list = field(hash=False, default_factory=list)Inheritance
from dataclasses import dataclass
@dataclass
class Animal:
name: str
age: int
@dataclass
class Dog(Animal):
breed: str
dog = Dog(name="Rex", age=5, breed="German Shepherd")Gotcha: Parent fields with defaults before child fields without:
# BAD: TypeError
@dataclass
class Parent:
name: str = "default"
@dataclass
class Child(Parent):
age: int # Non-default after default - Error!
# GOOD: Use field() or reorder
@dataclass
class Parent:
name: str = "default"
@dataclass
class Child(Parent):
age: int = 0 # Give defaultKW Only (3.10+)
Force keyword arguments:
from dataclasses import dataclass
@dataclass(kw_only=True)
class Config:
host: str
port: int
debug: bool
# Must use keywords
Config(host="localhost", port=8080, debug=True)
# Config("localhost", 8080, True) # TypeErrorPer-field:
from dataclasses import dataclass, field
@dataclass
class Request:
method: str
path: str
headers: dict = field(kw_only=True, default_factory=dict)
body: bytes = field(kw_only=True, default=b"")
Request("GET", "/api", headers={"Auth": "token"})Comparison and Ordering
from dataclasses import dataclass
@dataclass(order=True)
class Version:
major: int
minor: int
patch: int
v1 = Version(1, 0, 0)
v2 = Version(2, 0, 0)
print(v1 < v2) # True
print(sorted([v2, v1])) # [Version(1,0,0), Version(2,0,0)]Custom sort key:
@dataclass(order=True)
class Task:
sort_index: int = field(init=False, repr=False)
priority: int
name: str
def __post_init__(self):
self.sort_index = -self.priority # Higher priority firstReplace (Functional Updates)
from dataclasses import dataclass, replace
@dataclass(frozen=True)
class User:
name: str
email: str
active: bool
user = User("Alice", "alice@example.com", True)
updated = replace(user, active=False)
# New instance with one field changedasdict and astuple
from dataclasses import dataclass, asdict, astuple
@dataclass
class Person:
name: str
age: int
p = Person("Alice", 30)
print(asdict(p)) # {'name': 'Alice', 'age': 30}
print(astuple(p)) # ('Alice', 30)Pattern: Builder
from dataclasses import dataclass, field
@dataclass
class QueryBuilder:
table: str
columns: list = field(default_factory=lambda: ["*"])
conditions: list = field(default_factory=list)
limit: int | None = None
def select(self, *cols) -> "QueryBuilder":
return replace(self, columns=list(cols))
def where(self, condition: str) -> "QueryBuilder":
return replace(self, conditions=[*self.conditions, condition])
def take(self, n: int) -> "QueryBuilder":
return replace(self, limit=n)
def build(self) -> str:
sql = f"SELECT {', '.join(self.columns)} FROM {self.table}"
if self.conditions:
sql += f" WHERE {' AND '.join(self.conditions)}"
if self.limit:
sql += f" LIMIT {self.limit}"
return sql
query = (QueryBuilder("users")
.select("name", "email")
.where("active = true")
.take(10)
.build())Quick Reference
@dataclass(
init=True, # Generate __init__
repr=True, # Generate __repr__
eq=True, # Generate __eq__
order=False, # Generate comparison methods
frozen=False, # Make immutable
slots=False, # Use __slots__ (3.10+)
kw_only=False, # Keyword-only args (3.10+)
)
class MyClass:
field: type = field(
default=..., # Default value
default_factory=..., # Factory for mutable defaults
init=True, # Include in __init__
repr=True, # Include in __repr__
compare=True, # Include in comparisons
hash=None, # Include in __hash__
kw_only=False, # Keyword-only (3.10+)
)Dataclasses eliminate boilerplate. These patterns eliminate the rest.
React to this post: