Data classes eliminate the boilerplate of writing classes that mainly hold data.

The Problem

class User:
    def __init__(self, name, email, age):
        self.name = name
        self.email = email
        self.age = age
    
    def __repr__(self):
        return f"User(name={self.name!r}, email={self.email!r}, age={self.age!r})"
    
    def __eq__(self, other):
        return (self.name, self.email, self.age) == (other.name, other.email, other.age)

That's a lot of code for a simple data container.

The Solution

from dataclasses import dataclass
 
@dataclass
class User:
    name: str
    email: str
    age: int

That's it. You get __init__, __repr__, and __eq__ for free.

Basic Usage

from dataclasses import dataclass
 
@dataclass
class Point:
    x: float
    y: float
 
p1 = Point(1.0, 2.0)
p2 = Point(1.0, 2.0)
 
print(p1)           # Point(x=1.0, y=2.0)
print(p1 == p2)     # True
print(p1.x)         # 1.0

Default Values

@dataclass
class User:
    name: str
    email: str
    active: bool = True
    login_count: int = 0
 
user = User("Owen", "owen@example.com")
print(user.active)  # True

Note: Fields with defaults must come after fields without.

Mutable Default Values

from dataclasses import dataclass, field
 
# Wrong - shared list across instances!
@dataclass
class BadContainer:
    items: list = []
 
# Right - use field()
@dataclass
class Container:
    items: list = field(default_factory=list)

Always use field(default_factory=...) for mutable defaults.

Field Options

from dataclasses import dataclass, field
 
@dataclass
class Product:
    name: str
    price: float
    # Exclude from repr
    internal_id: str = field(repr=False)
    # Exclude from comparison
    timestamp: float = field(compare=False)
    # Exclude from init
    computed: str = field(init=False)
    
    def __post_init__(self):
        self.computed = f"{self.name}-{self.internal_id}"

Frozen (Immutable)

@dataclass(frozen=True)
class Point:
    x: float
    y: float
 
p = Point(1.0, 2.0)
p.x = 3.0  # FrozenInstanceError!

Frozen dataclasses are hashable and can be used in sets/dict keys.

Ordering

@dataclass(order=True)
class Version:
    major: int
    minor: int
    patch: int
 
versions = [Version(2, 0, 0), Version(1, 9, 0), Version(1, 10, 0)]
sorted(versions)  # [Version(1, 9, 0), Version(1, 10, 0), Version(2, 0, 0)]

Comparison uses fields in order of declaration.

Custom Sorting

@dataclass(order=True)
class Person:
    sort_index: int = field(init=False, repr=False)
    name: str
    age: int
    
    def __post_init__(self):
        self.sort_index = self.age  # Sort by age

Inheritance

@dataclass
class Animal:
    name: str
    age: int
 
@dataclass
class Dog(Animal):
    breed: str
 
dog = Dog("Rex", 5, "German Shepherd")

Post-Init Processing

@dataclass
class Rectangle:
    width: float
    height: float
    area: float = field(init=False)
    
    def __post_init__(self):
        self.area = self.width * self.height
 
rect = Rectangle(10, 5)
print(rect.area)  # 50

Converting To/From Dict

from dataclasses import asdict, astuple
 
@dataclass
class User:
    name: str
    email: str
 
user = User("Owen", "owen@example.com")
 
# To dict
d = asdict(user)  # {'name': 'Owen', 'email': 'owen@example.com'}
 
# To tuple
t = astuple(user)  # ('Owen', 'owen@example.com')
 
# From dict
user = User(**d)

Comparison with Alternatives

NamedTuple

from typing import NamedTuple
 
class Point(NamedTuple):
    x: float
    y: float
 
# Immutable, tuple-like, lighter weight
# But less flexible than dataclasses

Pydantic

from pydantic import BaseModel
 
class User(BaseModel):
    name: str
    email: str
    age: int
 
# Runtime validation, JSON serialization
# But heavier, external dependency
FeaturedataclassNamedTuplePydantic
MutableYesNoYes
ValidationNoNoYes
JSONManualManualBuilt-in
PerformanceFastFastestSlower
DependencystdlibstdlibExternal

Slots

Python 3.10+ supports slots for memory efficiency:

@dataclass(slots=True)
class Point:
    x: float
    y: float

Uses less memory and is faster, but can't add attributes dynamically.

KW-Only Fields

Python 3.10+:

@dataclass(kw_only=True)
class Config:
    host: str
    port: int
    debug: bool = False
 
# Must use keyword arguments
config = Config(host="localhost", port=8080)

My Patterns

from dataclasses import dataclass, field
from datetime import datetime
 
@dataclass
class Event:
    name: str
    timestamp: datetime = field(default_factory=datetime.now)
    tags: list[str] = field(default_factory=list)
    
    def add_tag(self, tag: str) -> None:
        self.tags.append(tag)
 
@dataclass(frozen=True)
class Coordinates:
    lat: float
    lon: float

Use dataclasses for simple data containers. Use Pydantic when you need validation.

React to this post: