Python struct: Packing and Unpacking Binary Data

The struct module converts between Python values and C-style binary data. Essential for binary protocols, file formats, and interop with C code.

Basic Packing and Unpacking

import struct
 
# Pack Python values to bytes
data = struct.pack('ihf', 42, 1000, 3.14)
print(data)  # b'*\x00\x00\x00\xe8\x03\xc3\xf5H@'
print(len(data))  # 12 bytes
 
# Unpack bytes to Python values
values = struct.unpack('ihf', data)
print(values)  # (42, 1000, 3.140000104904175)

Format Characters

import struct
 
# Integer types
struct.pack('b', -128)      # signed char (1 byte)
struct.pack('B', 255)       # unsigned char (1 byte)
struct.pack('h', -32768)    # short (2 bytes)
struct.pack('H', 65535)     # unsigned short (2 bytes)
struct.pack('i', -2147483648)  # int (4 bytes)
struct.pack('I', 4294967295)   # unsigned int (4 bytes)
struct.pack('q', -9223372036854775808)  # long long (8 bytes)
struct.pack('Q', 18446744073709551615)  # unsigned long long (8 bytes)
 
# Floating point
struct.pack('f', 3.14)      # float (4 bytes)
struct.pack('d', 3.14159265358979)  # double (8 bytes)
 
# Other
struct.pack('?', True)      # bool (1 byte)
struct.pack('c', b'A')      # char (1 byte)
struct.pack('5s', b'hello') # string (5 bytes)
struct.pack('x')            # pad byte

Byte Order

import struct
 
value = 0x12345678
 
# Native byte order (system dependent)
struct.pack('I', value)
 
# Little-endian (x86, ARM)
struct.pack('<I', value)  # b'xV4\x12'
 
# Big-endian (network, some file formats)
struct.pack('>I', value)  # b'\x124Vx'
 
# Network order (big-endian)
struct.pack('!I', value)  # b'\x124Vx'

Reading Binary Files

import struct
from pathlib import Path
 
def read_bmp_header(path: str) -> dict:
    """Read BMP file header."""
    data = Path(path).read_bytes()
    
    # BMP header format
    header = struct.unpack('<2sIHHI', data[:14])
    
    return {
        'signature': header[0],      # 'BM'
        'file_size': header[1],
        'reserved1': header[2],
        'reserved2': header[3],
        'data_offset': header[4],
    }
 
# Read DIB header
def read_dib_header(path: str) -> dict:
    """Read BMP DIB header."""
    data = Path(path).read_bytes()[14:54]
    
    dib = struct.unpack('<IiiHHIIiiII', data)
    
    return {
        'header_size': dib[0],
        'width': dib[1],
        'height': dib[2],
        'planes': dib[3],
        'bits_per_pixel': dib[4],
        'compression': dib[5],
        'image_size': dib[6],
    }

Writing Binary Files

import struct
 
def write_wav_header(f, sample_rate: int, channels: int, 
                     bits_per_sample: int, data_size: int):
    """Write WAV file header."""
    byte_rate = sample_rate * channels * bits_per_sample // 8
    block_align = channels * bits_per_sample // 8
    
    header = struct.pack(
        '<4sI4s4sIHHIIHH4sI',
        b'RIFF',
        36 + data_size,     # File size - 8
        b'WAVE',
        b'fmt ',
        16,                 # Subchunk1 size
        1,                  # Audio format (PCM)
        channels,
        sample_rate,
        byte_rate,
        block_align,
        bits_per_sample,
        b'data',
        data_size
    )
    
    f.write(header)
 
# Create WAV file
with open('output.wav', 'wb') as f:
    audio_data = b'\x00' * 44100  # 1 second of silence
    write_wav_header(f, 44100, 1, 8, len(audio_data))
    f.write(audio_data)

Struct Objects for Efficiency

import struct
 
# Pre-compile format for repeated use
header_struct = struct.Struct('<IBBHI')
 
print(header_struct.size)  # 12 bytes
 
# Pack and unpack
data = header_struct.pack(100, 1, 2, 300, 4)
values = header_struct.unpack(data)
 
# Unpack from buffer at offset
buffer = b'\x00' * 10 + data
values = header_struct.unpack_from(buffer, offset=10)
 
# Pack into existing buffer
buffer = bytearray(20)
header_struct.pack_into(buffer, 5, 100, 1, 2, 300, 4)

Variable-Length Data

import struct
 
def pack_string(s: str) -> bytes:
    """Pack string with length prefix."""
    encoded = s.encode('utf-8')
    return struct.pack(f'<I{len(encoded)}s', len(encoded), encoded)
 
def unpack_string(data: bytes, offset: int = 0) -> tuple[str, int]:
    """Unpack length-prefixed string, return value and new offset."""
    length = struct.unpack_from('<I', data, offset)[0]
    offset += 4
    
    s = struct.unpack_from(f'{length}s', data, offset)[0]
    return s.decode('utf-8'), offset + length
 
# Usage
packed = pack_string("Hello, World!")
text, _ = unpack_string(packed)
print(text)  # Hello, World!

Network Protocol Parsing

import struct
from dataclasses import dataclass
 
@dataclass
class IPHeader:
    version: int
    ihl: int
    tos: int
    total_length: int
    identification: int
    flags: int
    fragment_offset: int
    ttl: int
    protocol: int
    checksum: int
    src_addr: str
    dst_addr: str
 
def parse_ip_header(data: bytes) -> IPHeader:
    """Parse IPv4 header."""
    fields = struct.unpack('!BBHHHBBH4s4s', data[:20])
    
    version_ihl = fields[0]
    flags_fragment = fields[4]
    
    return IPHeader(
        version=(version_ihl >> 4),
        ihl=(version_ihl & 0x0F),
        tos=fields[1],
        total_length=fields[2],
        identification=fields[3],
        flags=(flags_fragment >> 13),
        fragment_offset=(flags_fragment & 0x1FFF),
        ttl=fields[5],
        protocol=fields[6],
        checksum=fields[7],
        src_addr='.'.join(str(b) for b in fields[8]),
        dst_addr='.'.join(str(b) for b in fields[9]),
    )

Padding and Alignment

import struct
 
# Native alignment may add padding
native = struct.pack('ci', b'A', 1000)
print(len(native))  # 8 (3 bytes padding)
 
# No padding with explicit size
packed = struct.pack('<cxxxI', b'A', 1000)
print(len(packed))  # 8 (explicit padding)
 
# Calculate struct size
print(struct.calcsize('ci'))   # 8 (with padding)
print(struct.calcsize('<cI'))  # 5 (no padding)

Iterating Over Records

import struct
 
def read_records(path: str, format_str: str):
    """Iterate over binary records in file."""
    record_struct = struct.Struct(format_str)
    
    with open(path, 'rb') as f:
        while True:
            data = f.read(record_struct.size)
            if len(data) < record_struct.size:
                break
            yield record_struct.unpack(data)
 
# Read log records
for timestamp, level, code in read_records('log.bin', '<QBI'):
    print(f"{timestamp}: Level {level}, Code {code}")

C Struct Interop

import struct
 
# Match C struct:
# struct Point {
#     double x;
#     double y;
#     int32_t id;
# };
 
POINT_FORMAT = '<ddl'  # Little-endian, 2 doubles, 1 long
 
def to_c_point(x: float, y: float, id: int) -> bytes:
    return struct.pack(POINT_FORMAT, x, y, id)
 
def from_c_point(data: bytes) -> tuple:
    return struct.unpack(POINT_FORMAT, data)
 
# Send to C library
point_bytes = to_c_point(1.5, 2.5, 42)
 
# Receive from C library
x, y, point_id = from_c_point(point_bytes)

Format String Reference

Character	Type	Size
`x`	pad byte	1
`c`	char	1
`b`/`B`	signed/unsigned char	1
`?`	bool	1
`h`/`H`	short/unsigned short	2
`i`/`I`	int/unsigned int	4
`l`/`L`	long/unsigned long	4
`q`/`Q`	long long/unsigned	8
`f`	float	4
`d`	double	8
`s`	char[]	n
`p`	pascal string	n

Byte order prefixes: @ (native), = (native, no pad), < (little), > (big), ! (network/big)

The struct module bridges Python and binary data. Master it to work with file formats, network protocols, and C libraries.

React to this post:

#Basic Packing and Unpacking

#Format Characters

#Byte Order

#Reading Binary Files

#Writing Binary Files

#Struct Objects for Efficiency

#Variable-Length Data

#Network Protocol Parsing

#Padding and Alignment

#Iterating Over Records

#C Struct Interop

#Format String Reference

Need help shipping fast?