JSON is everywhere. Here's how to handle it properly in Python.
The Basics
import json
# Parse JSON string to Python
data = json.loads('{"name": "Owen", "age": 25}')
print(data["name"]) # Owen
# Convert Python to JSON string
json_string = json.dumps({"name": "Owen", "age": 25})
print(json_string) # {"name": "Owen", "age": 25}Reading JSON Files
# Load from file
with open("data.json") as f:
data = json.load(f)
# Write to file
with open("output.json", "w") as f:
json.dump(data, f)Note: loads/dumps for strings, load/dump for files.
Pretty Printing
data = {"users": [{"name": "Owen"}, {"name": "Alex"}]}
# Compact (default)
json.dumps(data)
# {"users": [{"name": "Owen"}, {"name": "Alex"}]}
# Pretty
json.dumps(data, indent=2)
# {
# "users": [
# {"name": "Owen"},
# {"name": "Alex"}
# ]
# }
# Sorted keys
json.dumps(data, indent=2, sort_keys=True)Handling Dates
JSON doesn't have a date type. You need to serialize manually:
from datetime import datetime
data = {"created": datetime.now()}
# This fails
json.dumps(data) # TypeError: Object of type datetime is not JSON serializable
# Solution 1: Convert to string first
data = {"created": datetime.now().isoformat()}
json.dumps(data) # Works
# Solution 2: Custom encoder
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return super().default(obj)
json.dumps(data, cls=DateTimeEncoder)Custom Serialization
For your own classes:
from dataclasses import dataclass, asdict
@dataclass
class User:
name: str
email: str
user = User("Owen", "owen@example.com")
# Convert dataclass to dict, then to JSON
json.dumps(asdict(user))Or with a custom encoder:
class User:
def __init__(self, name, email):
self.name = name
self.email = email
def to_dict(self):
return {"name": self.name, "email": self.email}
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if hasattr(obj, "to_dict"):
return obj.to_dict()
return super().default(obj)
user = User("Owen", "owen@example.com")
json.dumps(user, cls=CustomEncoder)Custom Deserialization
Parse JSON into custom objects:
def user_decoder(dct):
if "name" in dct and "email" in dct:
return User(dct["name"], dct["email"])
return dct
json_str = '{"name": "Owen", "email": "owen@example.com"}'
user = json.loads(json_str, object_hook=user_decoder)Validation with Pydantic
For robust validation, use Pydantic:
from pydantic import BaseModel, EmailStr
class User(BaseModel):
name: str
email: EmailStr
age: int
# Parse and validate
json_str = '{"name": "Owen", "email": "owen@example.com", "age": 25}'
user = User.model_validate_json(json_str)
# Serialize back
user.model_dump_json()Pydantic gives you:
- Type validation
- Clear error messages
- Automatic conversion
- IDE autocomplete
Handling Large Files
Don't load huge files into memory:
import ijson
# Stream parse large JSON
with open("huge.json", "rb") as f:
for item in ijson.items(f, "users.item"):
process(item)Or use json.JSONDecoder for streaming:
decoder = json.JSONDecoder()
buffer = ""
with open("data.json") as f:
for line in f:
buffer += line
try:
obj, idx = decoder.raw_decode(buffer)
process(obj)
buffer = buffer[idx:].lstrip()
except json.JSONDecodeError:
continueJSON Lines (JSONL)
One JSON object per line:
# Read JSONL
with open("data.jsonl") as f:
for line in f:
item = json.loads(line)
process(item)
# Write JSONL
with open("output.jsonl", "w") as f:
for item in items:
f.write(json.dumps(item) + "\n")Good for log files and streaming data.
Error Handling
import json
try:
data = json.loads(maybe_json)
except json.JSONDecodeError as e:
print(f"Invalid JSON: {e.msg} at line {e.lineno}")Common Gotchas
NaN and Infinity
# Standard JSON doesn't support these
json.dumps(float("nan")) # Fails by default
# Allow non-standard values
json.dumps(float("nan"), allow_nan=True) # "NaN"Unicode
# Default: ASCII-safe
json.dumps({"name": "Müller"}) # {"name": "M\\u00fcller"}
# Allow unicode
json.dumps({"name": "Müller"}, ensure_ascii=False) # {"name": "Müller"}Bytes
# JSON expects strings
data = {"content": b"bytes"}
json.dumps(data) # TypeError
# Encode first
data = {"content": b"bytes".decode("utf-8")}Performance Tips
# orjson is much faster
import orjson
data = orjson.loads(json_bytes)
json_bytes = orjson.dumps(data)
# ujson is also fast
import ujson
data = ujson.loads(json_string)For high-throughput applications, these are 3-10x faster than stdlib.
My Patterns
import json
from pathlib import Path
def load_json(path: Path) -> dict:
"""Load JSON file with helpful error."""
try:
return json.loads(path.read_text())
except json.JSONDecodeError as e:
raise ValueError(f"Invalid JSON in {path}: {e}")
def save_json(path: Path, data: dict, pretty: bool = True) -> None:
"""Save data as JSON."""
indent = 2 if pretty else None
path.write_text(json.dumps(data, indent=indent, ensure_ascii=False))Keep it simple. Add complexity only when needed.
React to this post: