Data classes eliminate the boilerplate of writing classes that mainly hold data.
The Problem
class User:
def __init__(self, name, email, age):
self.name = name
self.email = email
self.age = age
def __repr__(self):
return f"User(name={self.name!r}, email={self.email!r}, age={self.age!r})"
def __eq__(self, other):
return (self.name, self.email, self.age) == (other.name, other.email, other.age)That's a lot of code for a simple data container.
The Solution
from dataclasses import dataclass
@dataclass
class User:
name: str
email: str
age: intThat's it. You get __init__, __repr__, and __eq__ for free.
Basic Usage
from dataclasses import dataclass
@dataclass
class Point:
x: float
y: float
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
print(p1) # Point(x=1.0, y=2.0)
print(p1 == p2) # True
print(p1.x) # 1.0Default Values
@dataclass
class User:
name: str
email: str
active: bool = True
login_count: int = 0
user = User("Owen", "owen@example.com")
print(user.active) # TrueNote: Fields with defaults must come after fields without.
Mutable Default Values
from dataclasses import dataclass, field
# Wrong - shared list across instances!
@dataclass
class BadContainer:
items: list = []
# Right - use field()
@dataclass
class Container:
items: list = field(default_factory=list)Always use field(default_factory=...) for mutable defaults.
Field Options
from dataclasses import dataclass, field
@dataclass
class Product:
name: str
price: float
# Exclude from repr
internal_id: str = field(repr=False)
# Exclude from comparison
timestamp: float = field(compare=False)
# Exclude from init
computed: str = field(init=False)
def __post_init__(self):
self.computed = f"{self.name}-{self.internal_id}"Frozen (Immutable)
@dataclass(frozen=True)
class Point:
x: float
y: float
p = Point(1.0, 2.0)
p.x = 3.0 # FrozenInstanceError!Frozen dataclasses are hashable and can be used in sets/dict keys.
Ordering
@dataclass(order=True)
class Version:
major: int
minor: int
patch: int
versions = [Version(2, 0, 0), Version(1, 9, 0), Version(1, 10, 0)]
sorted(versions) # [Version(1, 9, 0), Version(1, 10, 0), Version(2, 0, 0)]Comparison uses fields in order of declaration.
Custom Sorting
@dataclass(order=True)
class Person:
sort_index: int = field(init=False, repr=False)
name: str
age: int
def __post_init__(self):
self.sort_index = self.age # Sort by ageInheritance
@dataclass
class Animal:
name: str
age: int
@dataclass
class Dog(Animal):
breed: str
dog = Dog("Rex", 5, "German Shepherd")Post-Init Processing
@dataclass
class Rectangle:
width: float
height: float
area: float = field(init=False)
def __post_init__(self):
self.area = self.width * self.height
rect = Rectangle(10, 5)
print(rect.area) # 50Converting To/From Dict
from dataclasses import asdict, astuple
@dataclass
class User:
name: str
email: str
user = User("Owen", "owen@example.com")
# To dict
d = asdict(user) # {'name': 'Owen', 'email': 'owen@example.com'}
# To tuple
t = astuple(user) # ('Owen', 'owen@example.com')
# From dict
user = User(**d)Comparison with Alternatives
NamedTuple
from typing import NamedTuple
class Point(NamedTuple):
x: float
y: float
# Immutable, tuple-like, lighter weight
# But less flexible than dataclassesPydantic
from pydantic import BaseModel
class User(BaseModel):
name: str
email: str
age: int
# Runtime validation, JSON serialization
# But heavier, external dependency| Feature | dataclass | NamedTuple | Pydantic |
|---|---|---|---|
| Mutable | Yes | No | Yes |
| Validation | No | No | Yes |
| JSON | Manual | Manual | Built-in |
| Performance | Fast | Fastest | Slower |
| Dependency | stdlib | stdlib | External |
Slots
Python 3.10+ supports slots for memory efficiency:
@dataclass(slots=True)
class Point:
x: float
y: floatUses less memory and is faster, but can't add attributes dynamically.
KW-Only Fields
Python 3.10+:
@dataclass(kw_only=True)
class Config:
host: str
port: int
debug: bool = False
# Must use keyword arguments
config = Config(host="localhost", port=8080)My Patterns
from dataclasses import dataclass, field
from datetime import datetime
@dataclass
class Event:
name: str
timestamp: datetime = field(default_factory=datetime.now)
tags: list[str] = field(default_factory=list)
def add_tag(self, tag: str) -> None:
self.tags.append(tag)
@dataclass(frozen=True)
class Coordinates:
lat: float
lon: floatUse dataclasses for simple data containers. Use Pydantic when you need validation.