pickle serializes Python objects to bytes. Here's how to use it safely.
Basic Usage
import pickle
# Save object
data = {"users": ["Alice", "Bob"], "count": 2}
with open("data.pkl", "wb") as f:
pickle.dump(data, f)
# Load object
with open("data.pkl", "rb") as f:
loaded = pickle.load(f)
print(loaded) # {"users": ["Alice", "Bob"], "count": 2}Bytes Operations
# To bytes
data = [1, 2, 3]
pickled = pickle.dumps(data)
# From bytes
unpickled = pickle.loads(pickled)What Can Be Pickled
# ✓ Built-in types
pickle.dumps(42)
pickle.dumps("hello")
pickle.dumps([1, 2, 3])
pickle.dumps({"key": "value"})
pickle.dumps((1, 2, 3))
pickle.dumps({1, 2, 3})
# ✓ Classes and instances
class User:
def __init__(self, name):
self.name = name
pickle.dumps(User("Alice"))
# ✓ Functions (by reference)
pickle.dumps(len)
# ✗ Cannot pickle
pickle.dumps(lambda x: x) # Lambdas
pickle.dumps(open("file.txt")) # File objects
pickle.dumps(lock) # Threading locksProtocols
# Protocol versions (higher = more efficient)
pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
# Protocol 0: ASCII (readable)
# Protocol 1: Old binary
# Protocol 2: Python 2.3+
# Protocol 3: Python 3.0+
# Protocol 4: Python 3.4+ (default in 3.8+)
# Protocol 5: Python 3.8+ (out-of-band buffers)
# Check default
print(pickle.DEFAULT_PROTOCOL)Custom Pickling
class Connection:
def __init__(self, host):
self.host = host
self._socket = connect(host) # Can't pickle
def __getstate__(self):
"""Called when pickling."""
state = self.__dict__.copy()
del state["_socket"]
return state
def __setstate__(self, state):
"""Called when unpickling."""
self.__dict__.update(state)
self._socket = connect(self.host)Using reduce
class ComplexObject:
def __init__(self, data):
self.data = data
def __reduce__(self):
# Return (callable, args) to reconstruct
return (self.__class__, (self.data,))Security Warning
⚠️ Never unpickle untrusted data!
# DANGER: This can execute arbitrary code
pickle.loads(untrusted_bytes) # Don't do this!Malicious pickles can:
- Execute shell commands
- Delete files
- Open network connections
- Run any Python code
Safer Alternatives
# For simple data, use JSON
import json
json.dumps({"key": "value"})
# For configuration, use TOML/YAML
import tomllib # Python 3.11+
# For data exchange, use structured formats
# - Protocol Buffers
# - MessagePack
# - Apache AvroIf You Must Use pickle
# Only load from trusted sources
# Verify file integrity with hashes
import hashlib
def save_secure(obj, path, hash_path):
data = pickle.dumps(obj)
Path(path).write_bytes(data)
hash_val = hashlib.sha256(data).hexdigest()
Path(hash_path).write_text(hash_val)
def load_secure(path, hash_path):
data = Path(path).read_bytes()
expected = Path(hash_path).read_text()
actual = hashlib.sha256(data).hexdigest()
if actual != expected:
raise ValueError("File integrity check failed")
return pickle.loads(data)Common Use Cases
Caching
from pathlib import Path
import pickle
import hashlib
def cached(func):
cache_dir = Path(".cache")
cache_dir.mkdir(exist_ok=True)
def wrapper(*args, **kwargs):
key = hashlib.md5(
pickle.dumps((func.__name__, args, kwargs))
).hexdigest()
cache_file = cache_dir / f"{key}.pkl"
if cache_file.exists():
return pickle.loads(cache_file.read_bytes())
result = func(*args, **kwargs)
cache_file.write_bytes(pickle.dumps(result))
return result
return wrapper
@cached
def expensive_computation(n):
# ...
return resultSaving ML Models
import pickle
# Save model
with open("model.pkl", "wb") as f:
pickle.dump(model, f)
# Load model
with open("model.pkl", "rb") as f:
model = pickle.load(f)
# Better: Use joblib for large numpy arrays
import joblib
joblib.dump(model, "model.joblib")
model = joblib.load("model.joblib")Session State
class Session:
def __init__(self, path):
self.path = Path(path)
self.data = {}
def save(self):
self.path.write_bytes(pickle.dumps(self.data))
def load(self):
if self.path.exists():
self.data = pickle.loads(self.path.read_bytes())Performance Tips
# Use highest protocol for speed and size
pickle.dump(data, f, protocol=pickle.HIGHEST_PROTOCOL)
# For large objects, consider:
# - joblib (efficient for numpy)
# - cloudpickle (for lambdas and closures)
# - dill (more object types)Quick Reference
import pickle
# Save to file
with open("data.pkl", "wb") as f:
pickle.dump(obj, f)
# Load from file
with open("data.pkl", "rb") as f:
obj = pickle.load(f)
# To/from bytes
data = pickle.dumps(obj)
obj = pickle.loads(data)
# With protocol
pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL)
# Custom pickling
class MyClass:
def __getstate__(self):
return self.__dict__
def __setstate__(self, state):
self.__dict__.update(state)Use pickle for caching and temporary storage. Never unpickle untrusted data. For data exchange, prefer JSON or other structured formats.
React to this post: