The tarfile module handles TAR archives, the Unix standard for bundling files. Supports gzip, bzip2, and xz compression.
Reading TAR Files
import tarfile
# List contents
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
for member in tar.getmembers():
print(f"{member.name} ({member.size} bytes)")
# Simple list
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
print(tar.getnames())Extracting Files
import tarfile
# Extract all
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
tar.extractall('output_dir')
# Extract single file
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
tar.extract('file.txt', 'output_dir')
# Extract with filter (Python 3.12+ security)
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
tar.extractall('output_dir', filter='data')Reading File Contents
import tarfile
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
# Get file object
f = tar.extractfile('file.txt')
if f:
content = f.read().decode('utf-8')
print(content)Creating TAR Files
import tarfile
# Uncompressed TAR
with tarfile.open('archive.tar', 'w') as tar:
tar.add('file1.txt')
tar.add('file2.txt')
tar.add('folder/')
# With gzip compression
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
tar.add('file.txt')
# With bzip2 compression
with tarfile.open('archive.tar.bz2', 'w:bz2') as tar:
tar.add('file.txt')
# With xz compression (best ratio)
with tarfile.open('archive.tar.xz', 'w:xz') as tar:
tar.add('file.txt')Rename Files in Archive
import tarfile
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
# Add with different name
tar.add('local/path/file.txt', arcname='renamed.txt')
# Add directory with different name
tar.add('src/', arcname='source/')Filter Files
import tarfile
def exclude_pyc(tarinfo):
if tarinfo.name.endswith('.pyc'):
return None # Exclude
return tarinfo
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
tar.add('project/', filter=exclude_pyc)Add String Content
import tarfile
import io
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
# Create TarInfo
data = b"File content here"
info = tarfile.TarInfo(name='generated.txt')
info.size = len(data)
# Add from bytes
tar.addfile(info, io.BytesIO(data))Open Modes
import tarfile
# Read modes
'r' # Auto-detect compression
'r:' # No compression
'r:gz' # Gzip
'r:bz2' # Bzip2
'r:xz' # XZ/LZMA
# Write modes
'w' # No compression
'w:gz' # Gzip
'w:bz2' # Bzip2
'w:xz' # XZ/LZMA
# Append (uncompressed only)
'a' # Append to existingPractical Examples
Backup with Exclusions
import tarfile
import os
from datetime import datetime
def backup_project(source_dir, backup_dir):
timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
backup_path = os.path.join(backup_dir, f'backup_{timestamp}.tar.gz')
def exclude(tarinfo):
excludes = ['.git', '__pycache__', 'node_modules', '.env']
for excl in excludes:
if excl in tarinfo.name:
return None
return tarinfo
with tarfile.open(backup_path, 'w:gz') as tar:
tar.add(source_dir, arcname='backup', filter=exclude)
return backup_pathStream TAR to Response
import tarfile
import io
def create_tar_response(files):
"""Create TAR in memory for HTTP response."""
buffer = io.BytesIO()
with tarfile.open(fileobj=buffer, mode='w:gz') as tar:
for name, content in files.items():
data = content.encode('utf-8')
info = tarfile.TarInfo(name=name)
info.size = len(data)
tar.addfile(info, io.BytesIO(data))
buffer.seek(0)
return buffer.getvalue()Extract Specific Files
import tarfile
def extract_matching(tar_path, pattern, output_dir):
import fnmatch
with tarfile.open(tar_path, 'r:*') as tar:
for member in tar.getmembers():
if fnmatch.fnmatch(member.name, pattern):
tar.extract(member, output_dir)
print(f"Extracted: {member.name}")
extract_matching('archive.tar.gz', '*.py', './extracted')Safe Extraction
import tarfile
import os
def safe_extract(tar_path, output_dir):
"""Extract with path traversal protection."""
output_dir = os.path.abspath(output_dir)
with tarfile.open(tar_path, 'r:*') as tar:
for member in tar.getmembers():
# Check for path traversal
member_path = os.path.abspath(
os.path.join(output_dir, member.name)
)
if not member_path.startswith(output_dir):
raise ValueError(f"Path traversal detected: {member.name}")
tar.extractall(output_dir)TarInfo Attributes
import tarfile
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
for info in tar.getmembers():
print(f"Name: {info.name}")
print(f"Size: {info.size}")
print(f"Mode: {oct(info.mode)}")
print(f"UID/GID: {info.uid}/{info.gid}")
print(f"Is file: {info.isfile()}")
print(f"Is dir: {info.isdir()}")
print(f"Is link: {info.issym()}")
print()Quick Reference
import tarfile
# Open
tar = tarfile.open('file.tar.gz', 'r:gz')
tar = tarfile.open('file.tar.gz', 'w:gz')
# Read operations
tar.getnames() # List file names
tar.getmembers() # List TarInfo objects
tar.extractfile(name) # Get file object
tar.extract(name, path) # Extract one file
tar.extractall(path) # Extract all
# Write operations
tar.add(path) # Add file/directory
tar.add(path, arcname='new') # Add with different name
tar.addfile(tarinfo, fileobj) # Add from TarInfo + data
# Always close
tar.close()
# Or use context manager
with tarfile.open(...) as tar:
...| Mode | Extension | Compression |
|---|---|---|
w: | .tar | None |
w:gz | .tar.gz | Gzip |
w:bz2 | .tar.bz2 | Bzip2 |
w:xz | .tar.xz | XZ/LZMA |
tarfile is the Unix archiving standard in Python. Use it for backups, deployment packages, and cross-platform file bundles.
React to this post: