The difflib module compares sequences—strings, lists, or any iterables. Generate diffs, find similar matches, and compute similarity ratios.
SequenceMatcher: Similarity Ratio
from difflib import SequenceMatcher
# Compare two strings
s1 = "hello world"
s2 = "hello there"
matcher = SequenceMatcher(None, s1, s2)
print(matcher.ratio()) # 0.636... (63.6% similar)
# Quick ratio (faster, approximate)
print(matcher.quick_ratio())
# Real quick ratio (fastest, less accurate)
print(matcher.real_quick_ratio())get_close_matches: Fuzzy Matching
from difflib import get_close_matches
words = ["apple", "application", "apply", "banana", "bandana"]
# Find closest matches
matches = get_close_matches("appel", words)
print(matches) # ['apple', 'apply', 'application']
# With cutoff and max results
matches = get_close_matches(
"appel",
words,
n=2, # Max results
cutoff=0.6 # Minimum similarity
)unified_diff: Git-Style Diffs
from difflib import unified_diff
old = """line one
line two
line three""".splitlines(keepends=True)
new = """line one
line 2
line three
line four""".splitlines(keepends=True)
diff = unified_diff(
old, new,
fromfile='old.txt',
tofile='new.txt'
)
print(''.join(diff))
# --- old.txt
# +++ new.txt
# @@ -1,3 +1,4 @@
# line one
# -line two
# +line 2
# line three
# +line fourcontext_diff: Classic Context Diffs
from difflib import context_diff
diff = context_diff(
old, new,
fromfile='old.txt',
tofile='new.txt'
)
print(''.join(diff))
# *** old.txt
# --- new.txt
# ***************
# *** 1,3 ****
# line one
# ! line two
# line three
# --- 1,4 ----
# line one
# ! line 2
# line three
# + line fourndiff: Human-Readable Diffs
from difflib import ndiff
diff = ndiff(
"hello world".split(),
"hello there world".split()
)
print('\n'.join(diff))
# hello
# + there
# worldDiffer: Detailed Comparison
from difflib import Differ
d = Differ()
diff = d.compare(
["one", "two", "three"],
["one", "too", "three", "four"]
)
print('\n'.join(diff))
# one
# - two
# + too
# ? ^
# three
# + fourThe ? lines show exactly where characters differ.
HtmlDiff: HTML Table Diffs
from difflib import HtmlDiff
old_lines = ["line one", "line two", "line three"]
new_lines = ["line one", "line 2", "line three", "line four"]
differ = HtmlDiff()
html = differ.make_file(old_lines, new_lines)
# Returns full HTML document with diff table
# Just the table
html_table = differ.make_table(old_lines, new_lines)Finding Matching Blocks
from difflib import SequenceMatcher
s1 = "abcd"
s2 = "bcde"
matcher = SequenceMatcher(None, s1, s2)
# Get matching blocks
for block in matcher.get_matching_blocks():
print(f"s1[{block.a}:{block.a+block.size}] == s2[{block.b}:{block.b+block.size}]")
# s1[1:4] == s2[0:3] (bcd)Get Operations (Edit Script)
from difflib import SequenceMatcher
s1 = "hello"
s2 = "hallo"
matcher = SequenceMatcher(None, s1, s2)
for tag, i1, i2, j1, j2 in matcher.get_opcodes():
print(f"{tag}: s1[{i1}:{i2}] -> s2[{j1}:{j2}]")
# equal: s1[0:1] -> s2[0:1]
# replace: s1[1:2] -> s2[1:2]
# equal: s1[2:5] -> s2[2:5]Tags: equal, replace, insert, delete
Practical Examples
Spell Checker Suggestions
from difflib import get_close_matches
dictionary = ["python", "programming", "function", "variable", "class"]
def suggest_corrections(word: str, dictionary: list[str]) -> list[str]:
return get_close_matches(word, dictionary, n=3, cutoff=0.6)
print(suggest_corrections("pythn", dictionary))
# ['python']
print(suggest_corrections("functon", dictionary))
# ['function']Fuzzy Command Matching
from difflib import get_close_matches
commands = ["start", "stop", "status", "restart", "help", "quit"]
def parse_command(user_input: str) -> str | None:
matches = get_close_matches(user_input.lower(), commands, n=1, cutoff=0.6)
return matches[0] if matches else None
print(parse_command("strt")) # 'start'
print(parse_command("halp")) # 'help'
print(parse_command("xyz")) # NoneConfig File Diff
from difflib import unified_diff
from pathlib import Path
def diff_files(path1: str, path2: str) -> str:
old = Path(path1).read_text().splitlines(keepends=True)
new = Path(path2).read_text().splitlines(keepends=True)
diff = unified_diff(old, new, fromfile=path1, tofile=path2)
return ''.join(diff)Similarity Scoring
from difflib import SequenceMatcher
def similarity(s1: str, s2: str) -> float:
"""Return similarity ratio between 0 and 1."""
return SequenceMatcher(None, s1.lower(), s2.lower()).ratio()
# Find most similar string
candidates = ["New York", "New Jersey", "New Orleans", "Newark"]
query = "new yrok"
best = max(candidates, key=lambda c: similarity(query, c))
print(best) # "New York"Quick Reference
from difflib import (
SequenceMatcher, # Compare sequences
get_close_matches, # Find similar strings
unified_diff, # Git-style diff
context_diff, # Context diff
ndiff, # Human-readable diff
Differ, # Detailed comparison
HtmlDiff, # HTML table diff
)
# Similarity ratio
SequenceMatcher(None, a, b).ratio()
# Find similar
get_close_matches(word, possibilities, n=3, cutoff=0.6)
# Generate diff
unified_diff(old, new, fromfile='a', tofile='b')| Function | Output |
|---|---|
SequenceMatcher.ratio() | Float 0-1 |
get_close_matches() | List of matches |
unified_diff() | Generator of lines |
context_diff() | Generator of lines |
ndiff() | Generator of lines |
difflib is the stdlib's diff engine. Use it for fuzzy matching, generating patches, and comparing anything sequential.
React to this post: