The glob module finds files matching shell-style wildcards. Simpler than walking directories manually.

Basic Patterns

import glob
 
# All .txt files in current directory
glob.glob('*.txt')
# ['file1.txt', 'file2.txt', 'notes.txt']
 
# All Python files
glob.glob('*.py')
 
# Files starting with "test"
glob.glob('test*')
 
# Single character wildcard
glob.glob('file?.txt')
# ['file1.txt', 'file2.txt'] but not 'file10.txt'

Recursive Patterns

import glob
 
# All .py files in subdirectories (Python 3.5+)
glob.glob('**/*.py', recursive=True)
 
# Everything recursively
glob.glob('**/*', recursive=True)
 
# Specific depth
glob.glob('*/*.py')      # One level deep
glob.glob('*/*/*.py')    # Two levels deep

iglob: Iterator Version

import glob
 
# Returns iterator (memory efficient for many files)
for filepath in glob.iglob('**/*.log', recursive=True):
    print(filepath)
 
# glob() returns list (loads all into memory)
files = glob.glob('**/*.log', recursive=True)

Character Classes

import glob
 
# Match specific characters
glob.glob('[abc].txt')
# ['a.txt', 'b.txt', 'c.txt']
 
# Match range
glob.glob('[0-9].txt')
# ['1.txt', '2.txt', ...]
 
# Exclude characters
glob.glob('[!0-9]*.txt')
# Files not starting with digits

Hidden Files

import glob
 
# By default, doesn't match hidden files (starting with .)
glob.glob('*')  # Skips .gitignore, .env, etc.
 
# Include hidden files (Python 3.11+)
glob.glob('*', include_hidden=True)
 
# Or match explicitly
glob.glob('.*')  # Only hidden files
glob.glob('*') + glob.glob('.*')  # All files

Absolute Paths

import glob
from pathlib import Path
 
# Relative paths (default)
glob.glob('*.txt')
# ['file.txt', 'notes.txt']
 
# Absolute paths
glob.glob('/home/user/docs/*.txt')
 
# Using Path for cross-platform
base = Path.home() / 'Documents'
glob.glob(str(base / '*.pdf'))

pathlib Integration

from pathlib import Path
 
# Path.glob() method
p = Path('.')
for txt in p.glob('*.txt'):
    print(txt)
 
# Recursive
for py in p.glob('**/*.py'):
    print(py)
 
# rglob = recursive glob
for py in p.rglob('*.py'):
    print(py)

Practical Examples

Find and Process Files

import glob
 
for filepath in glob.iglob('data/**/*.csv', recursive=True):
    print(f"Processing {filepath}")
    # process_csv(filepath)

Cleanup Old Logs

import glob
import os
from datetime import datetime, timedelta
 
def cleanup_old_logs(days=30):
    cutoff = datetime.now() - timedelta(days=days)
    
    for log in glob.iglob('logs/**/*.log', recursive=True):
        mtime = datetime.fromtimestamp(os.path.getmtime(log))
        if mtime < cutoff:
            os.remove(log)
            print(f"Deleted: {log}")

Collect File Stats

import glob
import os
from pathlib import Path
 
def get_project_stats(pattern='**/*.py'):
    stats = {'files': 0, 'lines': 0, 'bytes': 0}
    
    for filepath in glob.iglob(pattern, recursive=True):
        stats['files'] += 1
        stats['bytes'] += os.path.getsize(filepath)
        with open(filepath) as f:
            stats['lines'] += sum(1 for _ in f)
    
    return stats
 
print(get_project_stats())

Multi-Pattern Matching

import glob
from itertools import chain
 
def multi_glob(*patterns):
    """Match multiple patterns."""
    return list(chain.from_iterable(
        glob.iglob(p, recursive=True) for p in patterns
    ))
 
# Find all source files
sources = multi_glob('**/*.py', '**/*.js', '**/*.ts')

Exclude Patterns

import glob
import fnmatch
 
def glob_exclude(pattern, exclude_patterns):
    """Glob with exclusions."""
    for path in glob.iglob(pattern, recursive=True):
        if not any(fnmatch.fnmatch(path, ex) for ex in exclude_patterns):
            yield path
 
# All Python files except tests
for f in glob_exclude('**/*.py', ['**/test_*.py', '**/*_test.py']):
    print(f)

Pattern Reference

PatternMatches
*Any characters except /
**Any characters including / (recursive)
?Single character
[abc]a, b, or c
[0-9]Digit
[!abc]Not a, b, or c

glob vs os.walk vs pathlib

# glob: pattern matching
glob.glob('**/*.py', recursive=True)
 
# os.walk: full directory traversal with control
for root, dirs, files in os.walk('.'):
    dirs[:] = [d for d in dirs if not d.startswith('.')]  # Skip hidden
    for f in files:
        if f.endswith('.py'):
            print(os.path.join(root, f))
 
# pathlib: object-oriented
Path('.').rglob('*.py')

Quick Reference

import glob
 
# Find files
glob.glob(pattern)              # Returns list
glob.iglob(pattern)             # Returns iterator
 
# Recursive (Python 3.5+)
glob.glob('**/*.py', recursive=True)
 
# Options (Python 3.11+)
glob.glob('*', include_hidden=True)
glob.glob('*', root_dir='/path')
 
# pathlib alternative
from pathlib import Path
Path('.').glob('*.txt')
Path('.').rglob('*.txt')  # Recursive

glob is the quick way to find files by pattern. Use it when os.listdir() isn't enough but os.walk() is overkill.

React to this post: