The struct module converts between Python values and C structs represented as bytes. Essential for binary protocols, file formats, and low-level data.

Basic Pack and Unpack

import struct
 
# Pack Python values into bytes
data = struct.pack("ihf", 1, 2, 3.0)
print(data)  # b'\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00@@'
 
# Unpack bytes into Python values
values = struct.unpack("ihf", data)
print(values)  # (1, 2, 3.0)

Format Characters

FormatC TypePython TypeSize
xpad byteno value1
ccharbytes (len 1)1
bsigned charint1
Bunsigned charint1
?_Boolbool1
hshortint2
Hunsigned shortint2
iintint4
Iunsigned intint4
llongint4
Lunsigned longint4
qlong longint8
Qunsigned long longint8
ffloatfloat4
ddoublefloat8
schar[]bytes
pchar[]bytes

Byte Order

import struct
 
value = 0x12345678
 
# Native byte order (default)
struct.pack("I", value)
 
# Little-endian
struct.pack("<I", value)  # b'xV4\x12'
 
# Big-endian (network order)
struct.pack(">I", value)  # b'\x124Vx'
 
# Network order (big-endian)
struct.pack("!I", value)  # b'\x124Vx'
PrefixByte Order
@Native (default)
=Native, standard size
<Little-endian
>Big-endian
!Network (big-endian)

Strings and Bytes

import struct
 
# Fixed-length string
data = struct.pack("10s", b"hello")
print(data)  # b'hello\x00\x00\x00\x00\x00'
 
# Unpack string
text = struct.unpack("10s", data)[0]
print(text.rstrip(b"\x00"))  # b'hello'
 
# Pascal string (length-prefixed)
data = struct.pack("5p", b"hello")
name = struct.unpack("5p", data)[0]

Repetition

import struct
 
# Pack 3 integers
data = struct.pack("3i", 1, 2, 3)
values = struct.unpack("3i", data)
print(values)  # (1, 2, 3)
 
# Pack array
numbers = [1, 2, 3, 4, 5]
data = struct.pack(f"{len(numbers)}i", *numbers)

Struct Objects

import struct
 
# Pre-compile format for efficiency
header_struct = struct.Struct(">HHI")
 
# Use compiled struct
data = header_struct.pack(1, 2, 3)
values = header_struct.unpack(data)
print(header_struct.size)  # 8 bytes

calcsize

import struct
 
# Get size of packed data
size = struct.calcsize("ihf")
print(size)  # 12 bytes
 
# With byte order
size = struct.calcsize("<ihf")
print(size)  # 12 bytes

Padding and Alignment

import struct
 
# Native alignment (default)
struct.calcsize("@ci")  # May be 8 (with padding)
 
# No padding
struct.calcsize("=ci")  # 5 bytes
 
# Explicit padding
struct.pack("cxxxxi", b"A", 42)  # 4 pad bytes

unpack_from and pack_into

import struct
 
# Unpack from offset
data = b"\x00\x00\x01\x00\x02\x00"
values = struct.unpack_from("<HH", data, offset=2)
print(values)  # (1, 2)
 
# Pack into buffer
buffer = bytearray(10)
struct.pack_into("<HH", buffer, 2, 1, 2)
print(buffer)  # bytearray(b'\x00\x00\x01\x00\x02\x00\x00\x00\x00\x00')

iter_unpack

import struct
 
# Unpack repeated structures
data = struct.pack("3i", 1, 2, 3) + struct.pack("3i", 4, 5, 6)
for values in struct.iter_unpack("3i", data):
    print(values)
# (1, 2, 3)
# (4, 5, 6)

Reading Binary Files

import struct
 
# Read fixed-format file
def read_header(file):
    data = file.read(12)
    magic, version, count = struct.unpack(">4sII", data)
    return {
        "magic": magic,
        "version": version,
        "count": count,
    }
 
# Read records
def read_records(file, count):
    record_struct = struct.Struct(">If32s")
    records = []
    for _ in range(count):
        data = file.read(record_struct.size)
        id_, value, name = record_struct.unpack(data)
        records.append({
            "id": id_,
            "value": value,
            "name": name.rstrip(b"\x00").decode(),
        })
    return records

Writing Binary Files

import struct
 
def write_binary_file(path, records):
    header_struct = struct.Struct(">4sII")
    record_struct = struct.Struct(">If32s")
    
    with open(path, "wb") as f:
        # Write header
        header = header_struct.pack(b"DATA", 1, len(records))
        f.write(header)
        
        # Write records
        for rec in records:
            name_bytes = rec["name"].encode()[:32].ljust(32, b"\x00")
            data = record_struct.pack(rec["id"], rec["value"], name_bytes)
            f.write(data)

Network Protocols

import struct
import socket
 
def create_packet(msg_type, payload):
    """Create packet: [type:1][length:2][payload:n]"""
    length = len(payload)
    header = struct.pack("!BH", msg_type, length)
    return header + payload
 
def parse_packet(data):
    """Parse packet header and payload."""
    msg_type, length = struct.unpack("!BH", data[:3])
    payload = data[3:3 + length]
    return msg_type, payload
 
# TCP example
def send_message(sock, msg_type, payload):
    packet = create_packet(msg_type, payload)
    sock.sendall(packet)
 
def recv_message(sock):
    header = sock.recv(3)
    msg_type, length = struct.unpack("!BH", header)
    payload = sock.recv(length)
    return msg_type, payload

BMP Header Example

import struct
 
def read_bmp_header(path):
    """Read BMP file header."""
    with open(path, "rb") as f:
        # BMP file header (14 bytes)
        magic = f.read(2)
        if magic != b"BM":
            raise ValueError("Not a BMP file")
        
        file_size, _, _, data_offset = struct.unpack("<IHHI", f.read(12))
        
        # DIB header (40 bytes for BITMAPINFOHEADER)
        header_size = struct.unpack("<I", f.read(4))[0]
        width, height = struct.unpack("<ii", f.read(8))
        planes, bpp = struct.unpack("<HH", f.read(4))
        
        return {
            "width": width,
            "height": abs(height),
            "bpp": bpp,
            "data_offset": data_offset,
        }

Common Patterns

import struct
 
class BinaryReader:
    """Helper for reading binary data."""
    
    def __init__(self, data: bytes):
        self.data = data
        self.offset = 0
    
    def read(self, fmt: str):
        size = struct.calcsize(fmt)
        values = struct.unpack_from(fmt, self.data, self.offset)
        self.offset += size
        return values[0] if len(values) == 1 else values
    
    def read_string(self, length: int) -> str:
        data = self.data[self.offset:self.offset + length]
        self.offset += length
        return data.rstrip(b"\x00").decode()
 
# Usage
reader = BinaryReader(data)
magic = reader.read(">I")
name = reader.read_string(32)
value = reader.read(">f")

Best Practices

# Always specify byte order for portability
struct.pack(">I", value)  # Not struct.pack("I", value)
 
# Use network byte order for protocols
struct.pack("!I", value)
 
# Pre-compile frequently used formats
MY_STRUCT = struct.Struct(">HHI")
 
# Handle strings carefully
name_bytes = name.encode()[:32].ljust(32, b"\x00")
 
# Use calcsize for buffer allocation
buffer = bytearray(struct.calcsize(fmt))

struct is essential for binary formats and protocols. Use it when you need precise control over byte layout and cross-platform compatibility.

React to this post: