Python difflib Module: Comparing Sequences

The difflib module compares sequences—strings, lists, or any iterables. Generate diffs, find similar matches, and compute similarity ratios.

SequenceMatcher: Similarity Ratio

from difflib import SequenceMatcher
 
# Compare two strings
s1 = "hello world"
s2 = "hello there"
 
matcher = SequenceMatcher(None, s1, s2)
print(matcher.ratio())  # 0.636... (63.6% similar)
 
# Quick ratio (faster, approximate)
print(matcher.quick_ratio())
 
# Real quick ratio (fastest, less accurate)
print(matcher.real_quick_ratio())

get_close_matches: Fuzzy Matching

from difflib import get_close_matches
 
words = ["apple", "application", "apply", "banana", "bandana"]
 
# Find closest matches
matches = get_close_matches("appel", words)
print(matches)  # ['apple', 'apply', 'application']
 
# With cutoff and max results
matches = get_close_matches(
    "appel", 
    words, 
    n=2,        # Max results
    cutoff=0.6  # Minimum similarity
)

unified_diff: Git-Style Diffs

from difflib import unified_diff
 
old = """line one
line two
line three""".splitlines(keepends=True)
 
new = """line one
line 2
line three
line four""".splitlines(keepends=True)
 
diff = unified_diff(
    old, new,
    fromfile='old.txt',
    tofile='new.txt'
)
print(''.join(diff))
# --- old.txt
# +++ new.txt
# @@ -1,3 +1,4 @@
#  line one
# -line two
# +line 2
#  line three
# +line four

context_diff: Classic Context Diffs

from difflib import context_diff
 
diff = context_diff(
    old, new,
    fromfile='old.txt',
    tofile='new.txt'
)
print(''.join(diff))
# *** old.txt
# --- new.txt
# ***************
# *** 1,3 ****
#   line one
# ! line two
#   line three
# --- 1,4 ----
#   line one
# ! line 2
#   line three
# + line four

ndiff: Human-Readable Diffs

from difflib import ndiff
 
diff = ndiff(
    "hello world".split(),
    "hello there world".split()
)
print('\n'.join(diff))
#   hello
# + there
#   world

Differ: Detailed Comparison

from difflib import Differ
 
d = Differ()
diff = d.compare(
    ["one", "two", "three"],
    ["one", "too", "three", "four"]
)
print('\n'.join(diff))
#   one
# - two
# + too
# ?  ^
#   three
# + four

The ? lines show exactly where characters differ.

HtmlDiff: HTML Table Diffs

from difflib import HtmlDiff
 
old_lines = ["line one", "line two", "line three"]
new_lines = ["line one", "line 2", "line three", "line four"]
 
differ = HtmlDiff()
html = differ.make_file(old_lines, new_lines)
# Returns full HTML document with diff table
 
# Just the table
html_table = differ.make_table(old_lines, new_lines)

Finding Matching Blocks

from difflib import SequenceMatcher
 
s1 = "abcd"
s2 = "bcde"
 
matcher = SequenceMatcher(None, s1, s2)
 
# Get matching blocks
for block in matcher.get_matching_blocks():
    print(f"s1[{block.a}:{block.a+block.size}] == s2[{block.b}:{block.b+block.size}]")
# s1[1:4] == s2[0:3] (bcd)

Get Operations (Edit Script)

from difflib import SequenceMatcher
 
s1 = "hello"
s2 = "hallo"
 
matcher = SequenceMatcher(None, s1, s2)
 
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
    print(f"{tag}: s1[{i1}:{i2}] -> s2[{j1}:{j2}]")
# equal: s1[0:1] -> s2[0:1]
# replace: s1[1:2] -> s2[1:2]
# equal: s1[2:5] -> s2[2:5]

Tags: equal, replace, insert, delete

Practical Examples

Spell Checker Suggestions

from difflib import get_close_matches
 
dictionary = ["python", "programming", "function", "variable", "class"]
 
def suggest_corrections(word: str, dictionary: list[str]) -> list[str]:
    return get_close_matches(word, dictionary, n=3, cutoff=0.6)
 
print(suggest_corrections("pythn", dictionary))
# ['python']
print(suggest_corrections("functon", dictionary))
# ['function']

Fuzzy Command Matching

from difflib import get_close_matches
 
commands = ["start", "stop", "status", "restart", "help", "quit"]
 
def parse_command(user_input: str) -> str | None:
    matches = get_close_matches(user_input.lower(), commands, n=1, cutoff=0.6)
    return matches[0] if matches else None
 
print(parse_command("strt"))    # 'start'
print(parse_command("halp"))    # 'help'
print(parse_command("xyz"))     # None

Config File Diff

from difflib import unified_diff
from pathlib import Path
 
def diff_files(path1: str, path2: str) -> str:
    old = Path(path1).read_text().splitlines(keepends=True)
    new = Path(path2).read_text().splitlines(keepends=True)
    
    diff = unified_diff(old, new, fromfile=path1, tofile=path2)
    return ''.join(diff)

Similarity Scoring

from difflib import SequenceMatcher
 
def similarity(s1: str, s2: str) -> float:
    """Return similarity ratio between 0 and 1."""
    return SequenceMatcher(None, s1.lower(), s2.lower()).ratio()
 
# Find most similar string
candidates = ["New York", "New Jersey", "New Orleans", "Newark"]
query = "new yrok"
 
best = max(candidates, key=lambda c: similarity(query, c))
print(best)  # "New York"

Quick Reference

from difflib import (
    SequenceMatcher,   # Compare sequences
    get_close_matches, # Find similar strings
    unified_diff,      # Git-style diff
    context_diff,      # Context diff
    ndiff,             # Human-readable diff
    Differ,            # Detailed comparison
    HtmlDiff,          # HTML table diff
)
 
# Similarity ratio
SequenceMatcher(None, a, b).ratio()
 
# Find similar
get_close_matches(word, possibilities, n=3, cutoff=0.6)
 
# Generate diff
unified_diff(old, new, fromfile='a', tofile='b')

Function	Output
`SequenceMatcher.ratio()`	Float 0-1
`get_close_matches()`	List of matches
`unified_diff()`	Generator of lines
`context_diff()`	Generator of lines
`ndiff()`	Generator of lines

difflib is the stdlib's diff engine. Use it for fuzzy matching, generating patches, and comparing anything sequential.

React to this post:

#SequenceMatcher: Similarity Ratio

#get_close_matches: Fuzzy Matching

#unified_diff: Git-Style Diffs

#context_diff: Classic Context Diffs

#ndiff: Human-Readable Diffs

#Differ: Detailed Comparison

#HtmlDiff: HTML Table Diffs

#Finding Matching Blocks

#Get Operations (Edit Script)

#Practical Examples

#Spell Checker Suggestions

#Fuzzy Command Matching

#Config File Diff

#Similarity Scoring

#Quick Reference

Keep Reading

Need help shipping fast?