Working with JSON in Python

JSON is everywhere. Here's how to handle it properly in Python.

The Basics

import json
 
# Parse JSON string to Python
data = json.loads('{"name": "Owen", "age": 25}')
print(data["name"])  # Owen
 
# Convert Python to JSON string
json_string = json.dumps({"name": "Owen", "age": 25})
print(json_string)  # {"name": "Owen", "age": 25}

Reading JSON Files

# Load from file
with open("data.json") as f:
    data = json.load(f)
 
# Write to file
with open("output.json", "w") as f:
    json.dump(data, f)

Note: loads/dumps for strings, load/dump for files.

Pretty Printing

data = {"users": [{"name": "Owen"}, {"name": "Alex"}]}
 
# Compact (default)
json.dumps(data)
# {"users": [{"name": "Owen"}, {"name": "Alex"}]}
 
# Pretty
json.dumps(data, indent=2)
# {
#   "users": [
#     {"name": "Owen"},
#     {"name": "Alex"}
#   ]
# }
 
# Sorted keys
json.dumps(data, indent=2, sort_keys=True)

Handling Dates

JSON doesn't have a date type. You need to serialize manually:

from datetime import datetime
 
data = {"created": datetime.now()}
 
# This fails
json.dumps(data)  # TypeError: Object of type datetime is not JSON serializable
 
# Solution 1: Convert to string first
data = {"created": datetime.now().isoformat()}
json.dumps(data)  # Works
 
# Solution 2: Custom encoder
class DateTimeEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        return super().default(obj)
 
json.dumps(data, cls=DateTimeEncoder)

Custom Serialization

For your own classes:

from dataclasses import dataclass, asdict
 
@dataclass
class User:
    name: str
    email: str
 
user = User("Owen", "owen@example.com")
 
# Convert dataclass to dict, then to JSON
json.dumps(asdict(user))

Or with a custom encoder:

class User:
    def __init__(self, name, email):
        self.name = name
        self.email = email
    
    def to_dict(self):
        return {"name": self.name, "email": self.email}
 
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if hasattr(obj, "to_dict"):
            return obj.to_dict()
        return super().default(obj)
 
user = User("Owen", "owen@example.com")
json.dumps(user, cls=CustomEncoder)

Custom Deserialization

Parse JSON into custom objects:

def user_decoder(dct):
    if "name" in dct and "email" in dct:
        return User(dct["name"], dct["email"])
    return dct
 
json_str = '{"name": "Owen", "email": "owen@example.com"}'
user = json.loads(json_str, object_hook=user_decoder)

Validation with Pydantic

For robust validation, use Pydantic:

from pydantic import BaseModel, EmailStr
 
class User(BaseModel):
    name: str
    email: EmailStr
    age: int
 
# Parse and validate
json_str = '{"name": "Owen", "email": "owen@example.com", "age": 25}'
user = User.model_validate_json(json_str)
 
# Serialize back
user.model_dump_json()

Pydantic gives you:

Type validation
Clear error messages
Automatic conversion
IDE autocomplete

Handling Large Files

Don't load huge files into memory:

import ijson
 
# Stream parse large JSON
with open("huge.json", "rb") as f:
    for item in ijson.items(f, "users.item"):
        process(item)

Or use json.JSONDecoder for streaming:

decoder = json.JSONDecoder()
buffer = ""
 
with open("data.json") as f:
    for line in f:
        buffer += line
        try:
            obj, idx = decoder.raw_decode(buffer)
            process(obj)
            buffer = buffer[idx:].lstrip()
        except json.JSONDecodeError:
            continue

JSON Lines (JSONL)

One JSON object per line:

# Read JSONL
with open("data.jsonl") as f:
    for line in f:
        item = json.loads(line)
        process(item)
 
# Write JSONL
with open("output.jsonl", "w") as f:
    for item in items:
        f.write(json.dumps(item) + "\n")

Good for log files and streaming data.

Error Handling

import json
 
try:
    data = json.loads(maybe_json)
except json.JSONDecodeError as e:
    print(f"Invalid JSON: {e.msg} at line {e.lineno}")

Common Gotchas

NaN and Infinity

# Standard JSON doesn't support these
json.dumps(float("nan"))  # Fails by default
 
# Allow non-standard values
json.dumps(float("nan"), allow_nan=True)  # "NaN"

Unicode

# Default: ASCII-safe
json.dumps({"name": "Müller"})  # {"name": "M\\u00fcller"}
 
# Allow unicode
json.dumps({"name": "Müller"}, ensure_ascii=False)  # {"name": "Müller"}

Bytes

# JSON expects strings
data = {"content": b"bytes"}
json.dumps(data)  # TypeError
 
# Encode first
data = {"content": b"bytes".decode("utf-8")}

Performance Tips

# orjson is much faster
import orjson
 
data = orjson.loads(json_bytes)
json_bytes = orjson.dumps(data)
 
# ujson is also fast
import ujson
 
data = ujson.loads(json_string)

For high-throughput applications, these are 3-10x faster than stdlib.

My Patterns

import json
from pathlib import Path
 
def load_json(path: Path) -> dict:
    """Load JSON file with helpful error."""
    try:
        return json.loads(path.read_text())
    except json.JSONDecodeError as e:
        raise ValueError(f"Invalid JSON in {path}: {e}")
 
def save_json(path: Path, data: dict, pretty: bool = True) -> None:
    """Save data as JSON."""
    indent = 2 if pretty else None
    path.write_text(json.dumps(data, indent=indent, ensure_ascii=False))

Keep it simple. Add complexity only when needed.

React to this post:

#The Basics

#Reading JSON Files

#Pretty Printing

#Handling Dates

#Custom Serialization

#Custom Deserialization

#Validation with Pydantic

#Handling Large Files

#JSON Lines (JSONL)

#Error Handling

#Common Gotchas

#Performance Tips

#My Patterns

Keep Reading

Need help shipping fast?