The struct module converts between Python values and C structs represented as bytes. Essential for binary protocols, file formats, and low-level data.
Basic Pack and Unpack
import struct
# Pack Python values into bytes
data = struct.pack("ihf", 1, 2, 3.0)
print(data) # b'\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00@@'
# Unpack bytes into Python values
values = struct.unpack("ihf", data)
print(values) # (1, 2, 3.0)Format Characters
| Format | C Type | Python Type | Size |
|---|---|---|---|
x | pad byte | no value | 1 |
c | char | bytes (len 1) | 1 |
b | signed char | int | 1 |
B | unsigned char | int | 1 |
? | _Bool | bool | 1 |
h | short | int | 2 |
H | unsigned short | int | 2 |
i | int | int | 4 |
I | unsigned int | int | 4 |
l | long | int | 4 |
L | unsigned long | int | 4 |
q | long long | int | 8 |
Q | unsigned long long | int | 8 |
f | float | float | 4 |
d | double | float | 8 |
s | char[] | bytes | |
p | char[] | bytes |
Byte Order
import struct
value = 0x12345678
# Native byte order (default)
struct.pack("I", value)
# Little-endian
struct.pack("<I", value) # b'xV4\x12'
# Big-endian (network order)
struct.pack(">I", value) # b'\x124Vx'
# Network order (big-endian)
struct.pack("!I", value) # b'\x124Vx'| Prefix | Byte Order |
|---|---|
@ | Native (default) |
= | Native, standard size |
< | Little-endian |
> | Big-endian |
! | Network (big-endian) |
Strings and Bytes
import struct
# Fixed-length string
data = struct.pack("10s", b"hello")
print(data) # b'hello\x00\x00\x00\x00\x00'
# Unpack string
text = struct.unpack("10s", data)[0]
print(text.rstrip(b"\x00")) # b'hello'
# Pascal string (length-prefixed)
data = struct.pack("5p", b"hello")
name = struct.unpack("5p", data)[0]Repetition
import struct
# Pack 3 integers
data = struct.pack("3i", 1, 2, 3)
values = struct.unpack("3i", data)
print(values) # (1, 2, 3)
# Pack array
numbers = [1, 2, 3, 4, 5]
data = struct.pack(f"{len(numbers)}i", *numbers)Struct Objects
import struct
# Pre-compile format for efficiency
header_struct = struct.Struct(">HHI")
# Use compiled struct
data = header_struct.pack(1, 2, 3)
values = header_struct.unpack(data)
print(header_struct.size) # 8 bytescalcsize
import struct
# Get size of packed data
size = struct.calcsize("ihf")
print(size) # 12 bytes
# With byte order
size = struct.calcsize("<ihf")
print(size) # 12 bytesPadding and Alignment
import struct
# Native alignment (default)
struct.calcsize("@ci") # May be 8 (with padding)
# No padding
struct.calcsize("=ci") # 5 bytes
# Explicit padding
struct.pack("cxxxxi", b"A", 42) # 4 pad bytesunpack_from and pack_into
import struct
# Unpack from offset
data = b"\x00\x00\x01\x00\x02\x00"
values = struct.unpack_from("<HH", data, offset=2)
print(values) # (1, 2)
# Pack into buffer
buffer = bytearray(10)
struct.pack_into("<HH", buffer, 2, 1, 2)
print(buffer) # bytearray(b'\x00\x00\x01\x00\x02\x00\x00\x00\x00\x00')iter_unpack
import struct
# Unpack repeated structures
data = struct.pack("3i", 1, 2, 3) + struct.pack("3i", 4, 5, 6)
for values in struct.iter_unpack("3i", data):
print(values)
# (1, 2, 3)
# (4, 5, 6)Reading Binary Files
import struct
# Read fixed-format file
def read_header(file):
data = file.read(12)
magic, version, count = struct.unpack(">4sII", data)
return {
"magic": magic,
"version": version,
"count": count,
}
# Read records
def read_records(file, count):
record_struct = struct.Struct(">If32s")
records = []
for _ in range(count):
data = file.read(record_struct.size)
id_, value, name = record_struct.unpack(data)
records.append({
"id": id_,
"value": value,
"name": name.rstrip(b"\x00").decode(),
})
return recordsWriting Binary Files
import struct
def write_binary_file(path, records):
header_struct = struct.Struct(">4sII")
record_struct = struct.Struct(">If32s")
with open(path, "wb") as f:
# Write header
header = header_struct.pack(b"DATA", 1, len(records))
f.write(header)
# Write records
for rec in records:
name_bytes = rec["name"].encode()[:32].ljust(32, b"\x00")
data = record_struct.pack(rec["id"], rec["value"], name_bytes)
f.write(data)Network Protocols
import struct
import socket
def create_packet(msg_type, payload):
"""Create packet: [type:1][length:2][payload:n]"""
length = len(payload)
header = struct.pack("!BH", msg_type, length)
return header + payload
def parse_packet(data):
"""Parse packet header and payload."""
msg_type, length = struct.unpack("!BH", data[:3])
payload = data[3:3 + length]
return msg_type, payload
# TCP example
def send_message(sock, msg_type, payload):
packet = create_packet(msg_type, payload)
sock.sendall(packet)
def recv_message(sock):
header = sock.recv(3)
msg_type, length = struct.unpack("!BH", header)
payload = sock.recv(length)
return msg_type, payloadBMP Header Example
import struct
def read_bmp_header(path):
"""Read BMP file header."""
with open(path, "rb") as f:
# BMP file header (14 bytes)
magic = f.read(2)
if magic != b"BM":
raise ValueError("Not a BMP file")
file_size, _, _, data_offset = struct.unpack("<IHHI", f.read(12))
# DIB header (40 bytes for BITMAPINFOHEADER)
header_size = struct.unpack("<I", f.read(4))[0]
width, height = struct.unpack("<ii", f.read(8))
planes, bpp = struct.unpack("<HH", f.read(4))
return {
"width": width,
"height": abs(height),
"bpp": bpp,
"data_offset": data_offset,
}Common Patterns
import struct
class BinaryReader:
"""Helper for reading binary data."""
def __init__(self, data: bytes):
self.data = data
self.offset = 0
def read(self, fmt: str):
size = struct.calcsize(fmt)
values = struct.unpack_from(fmt, self.data, self.offset)
self.offset += size
return values[0] if len(values) == 1 else values
def read_string(self, length: int) -> str:
data = self.data[self.offset:self.offset + length]
self.offset += length
return data.rstrip(b"\x00").decode()
# Usage
reader = BinaryReader(data)
magic = reader.read(">I")
name = reader.read_string(32)
value = reader.read(">f")Best Practices
# Always specify byte order for portability
struct.pack(">I", value) # Not struct.pack("I", value)
# Use network byte order for protocols
struct.pack("!I", value)
# Pre-compile frequently used formats
MY_STRUCT = struct.Struct(">HHI")
# Handle strings carefully
name_bytes = name.encode()[:32].ljust(32, b"\x00")
# Use calcsize for buffer allocation
buffer = bytearray(struct.calcsize(fmt))struct is essential for binary formats and protocols. Use it when you need precise control over byte layout and cross-platform compatibility.
React to this post: