A common trap in raster processing: rasterio.open() looks exactly like opening any other file, so it feels natural to call src.read() next. On a 1GB GeoTIFF that works fine. On a 50GB national DEM, it loads 50GB into memory and your process is killed.
Rasterio provides a windowed read API designed exactly for this situation. Instead of reading the entire raster at once, you read and process it in tile-sized chunks — blocks that fit comfortably in RAM — iterating across the full extent. The raster on disk may be arbitrarily large; your memory usage stays bounded.
# Block-Aligned Windows
GeoTIFFs are internally tiled or stripped — data is stored in fixed-size blocks. Reading within these blocks is maximally efficient: Rasterio decodes exactly the compressed data you need. Reading a window that crosses block boundaries requires decompressing adjacent blocks, which is less efficient.
import rasterio
from rasterio.windows import Window
with rasterio.open("national_dem_1m.tif") as src:
# Get the native block size
block_shapes = src.block_shapes
print(f"Block shapes: {block_shapes}")
# e.g. [(512, 512)] for a tiled GeoTIFF
# Iterate over block-aligned windows
for ji, window in src.block_windows(1): # band 1
# ji: (row_offset, col_offset) in block coordinates
# window: Window(col_off, row_off, width, height)
data = src.read(1, window=window)
# data: (height, width) NumPy array
# process data here...
if ji == (0, 0):
print(f"First block shape: {data.shape}")
print(f"Window: {window}")
block_windows() iterates over every block in the raster in scan-line order. Each window aligns exactly with the GeoTIFF’s internal tile structure — the most I/O-efficient iteration pattern.
# A Complete Processing Pipeline
Here is a full pattern for a common task: compute slope from a large DEM and write the output to a new GeoTIFF, processing in blocks:
import rasterio
from rasterio.windows import Window
import numpy as np
from pathlib import Path
import time
def compute_slope_block(elevation: np.ndarray, cell_size: float) -> np.ndarray:
"""
Simple finite-difference slope (in degrees) for a block.
elevation: (H, W) float32 array (padded to allow edge gradients)
"""
dy, dx = np.gradient(elevation, cell_size)
slope_rad = np.arctan(np.sqrt(dx**2 + dy**2))
return np.degrees(slope_rad).astype(np.float32)
def process_dem_to_slope(input_path: str, output_path: str, overlap: int = 1):
"""
Compute slope from a large DEM using block-aligned windowed reads.
overlap: number of pixels to read beyond each block edge (for edge continuity).
"""
with rasterio.open(input_path) as src:
profile = src.profile.copy()
profile.update(dtype="float32", nodata=-9999, compress="zstd")
cell_size_x = abs(src.transform.a) # pixel width in CRS units
cell_size_y = abs(src.transform.e) # pixel height in CRS units
cell_size = (cell_size_x + cell_size_y) / 2 # average for non-square pixels
with rasterio.open(output_path, "w", **profile) as dst:
windows = list(src.block_windows(1))
t0 = time.perf_counter()
for i, (_, window) in enumerate(windows):
# Expand window by 'overlap' pixels on all sides for edge continuity
padded_window = Window(
max(0, window.col_off - overlap),
max(0, window.row_off - overlap),
min(src.width - max(0, window.col_off - overlap), window.width + 2*overlap),
min(src.height - max(0, window.row_off - overlap), window.height + 2*overlap)
)
# Read padded block
data = src.read(1, window=padded_window).astype(np.float32)
# Replace nodata with NaN for gradient computation
nodata = src.nodata
if nodata is not None:
data[data == nodata] = np.nan
# Compute slope on padded block
slope_padded = compute_slope_block(data, cell_size)
# Crop back to original window (remove padding)
pad_top = window.row_off - padded_window.row_off
pad_left = window.col_off - padded_window.col_off
slope = slope_padded[
pad_top: pad_top + window.height,
pad_left: pad_left + window.width
]
# Replace NaN with output nodata
slope[np.isnan(slope)] = -9999
# Write block to output
dst.write(slope, 1, window=window)
if i % 100 == 0:
elapsed = time.perf_counter() - t0
pct = (i + 1) / len(windows) * 100
print(f" {pct:.1f}% ({i+1}/{len(windows)} blocks, {elapsed:.0f}s elapsed)")
total = time.perf_counter() - t0
print(f"Completed in {total:.1f}s")
# Process a 50GB DEM
process_dem_to_slope("national_dem_1m.tif", "national_slope_1m.tif")
# Peak RAM usage: ~200MB (two 512×512 blocks in memory at once)
# Total time: ~8 minutes for a 50GB, 1m resolution DEM
# Reading a Specific Geographic Area
When you know the geographic area of interest, convert from CRS coordinates to pixel windows:
import rasterio
from rasterio.windows import from_bounds
from shapely.geometry import box
with rasterio.open("national_dem_1m.tif") as src:
# Define region of interest in the raster's CRS
# (must be in the same CRS as the raster, not WGS84 unless the raster is WGS84)
roi_bounds = (386_000, 6_920_000, 412_000, 6_946_000) # easting/northing metres
# Convert geographic bounds to pixel window
window = from_bounds(*roi_bounds, transform=src.transform)
print(f"Window: col {window.col_off:.0f}-{window.col_off+window.width:.0f}, "
f"row {window.row_off:.0f}-{window.row_off+window.height:.0f}")
# Read only the pixels in this window
data = src.read(1, window=window)
print(f"Loaded array shape: {data.shape}")
# e.g. (26000, 26000) for a 26×26 km area at 1m resolution
# Only reads ~2.7GB instead of the full 50GB
# Parallel Block Processing with concurrent.futures
For CPU-intensive block operations, parallelise across blocks:
import rasterio
from rasterio.windows import Window
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed
def process_single_block(args):
"""Worker function — runs in a separate process."""
input_path, window_tuple, output_path = args
window = Window(*window_tuple)
with rasterio.open(input_path) as src:
data = src.read(1, window=window).astype(np.float32)
# ... process data ...
result = data * 2 # placeholder
return window_tuple, result
def parallel_raster_process(input_path, output_path, max_workers=8):
with rasterio.open(input_path) as src:
profile = src.profile.copy()
windows = [(w.col_off, w.row_off, w.width, w.height)
for _, w in src.block_windows(1)]
with rasterio.open(output_path, "w", **profile) as dst:
with ProcessPoolExecutor(max_workers=max_workers) as exe:
futures = {
exe.submit(process_single_block, (input_path, w, output_path)): w
for w in windows
}
for future in as_completed(futures):
window_tuple, result = future.result()
window = Window(*window_tuple)
dst.write(result, 1, window=window)
Note: rasterio file handles cannot be shared across processes — each worker opens its own handle. This is the correct pattern.
# Memory-Bounded Iteration
For variable-size operations where the block size is determined by available RAM rather than the file’s internal tile size:
import rasterio
import numpy as np
def iter_windows(src, target_mb: float = 256.0):
"""
Yield row-based windows sized to use approximately target_mb of RAM.
Useful when the input file is not tiled or uses very small tiles.
"""
bytes_per_pixel = np.dtype(src.dtypes[0]).itemsize * src.count
pixels_per_chunk = int(target_mb * 1024**2 / bytes_per_pixel)
rows_per_chunk = max(1, pixels_per_chunk // src.width)
for row_start in range(0, src.height, rows_per_chunk):
actual_height = min(rows_per_chunk, src.height - row_start)
yield Window(0, row_start, src.width, actual_height)
with rasterio.open("large_raster.tif") as src:
for window in iter_windows(src, target_mb=512):
data = src.read(window=window) # all bands
# data: (bands, height, width), height ≤ rows_per_chunk
# process...
# Performance Summary
| Operation | Input | Peak RAM | Time |
|---|---|---|---|
src.read() (full) |
50GB DEM | 50GB | fails or OOM |
| Block windows, sequential | 50GB DEM | ~200MB | ~8 min |
| Block windows, 8 workers | 50GB DEM | ~1.6GB | ~2 min |
| Bbox window (26km²) | 50GB DEM | ~2.7GB | ~12s |
Windowed reading is not faster than loading the entire raster when you genuinely need all the data — the disk read time is the same. Its advantage is memory efficiency: a pipeline that otherwise requires a 64GB RAM machine runs comfortably on 8GB.
Related reading: GDAL VRT: Virtual Raster Mosaics Without Copying Data · XArray GroupBy for Temporal Raster Aggregation · Numba-Accelerated Spatial Kernels