A common trap in raster processing: rasterio.open() looks exactly like opening any other file, so it feels natural to call src.read() next. On a 1GB GeoTIFF that works fine. On a 50GB national DEM, it loads 50GB into memory and your process is killed.

Rasterio provides a windowed read API designed exactly for this situation. Instead of reading the entire raster at once, you read and process it in tile-sized chunks — blocks that fit comfortably in RAM — iterating across the full extent. The raster on disk may be arbitrarily large; your memory usage stays bounded.

# Block-Aligned Windows

GeoTIFFs are internally tiled or stripped — data is stored in fixed-size blocks. Reading within these blocks is maximally efficient: Rasterio decodes exactly the compressed data you need. Reading a window that crosses block boundaries requires decompressing adjacent blocks, which is less efficient.

import rasterio
from rasterio.windows import Window

with rasterio.open("national_dem_1m.tif") as src:
    # Get the native block size
    block_shapes = src.block_shapes
    print(f"Block shapes: {block_shapes}")
    # e.g. [(512, 512)] for a tiled GeoTIFF

    # Iterate over block-aligned windows
    for ji, window in src.block_windows(1):   # band 1
        # ji: (row_offset, col_offset) in block coordinates
        # window: Window(col_off, row_off, width, height)
        data = src.read(1, window=window)
        # data: (height, width) NumPy array
        # process data here...

        if ji == (0, 0):
            print(f"First block shape: {data.shape}")
            print(f"Window: {window}")

block_windows() iterates over every block in the raster in scan-line order. Each window aligns exactly with the GeoTIFF’s internal tile structure — the most I/O-efficient iteration pattern.

# A Complete Processing Pipeline

Here is a full pattern for a common task: compute slope from a large DEM and write the output to a new GeoTIFF, processing in blocks:

import rasterio
from rasterio.windows import Window
import numpy as np
from pathlib import Path
import time

def compute_slope_block(elevation: np.ndarray, cell_size: float) -> np.ndarray:
    """
    Simple finite-difference slope (in degrees) for a block.
    elevation: (H, W) float32 array (padded to allow edge gradients)
    """
    dy, dx = np.gradient(elevation, cell_size)
    slope_rad = np.arctan(np.sqrt(dx**2 + dy**2))
    return np.degrees(slope_rad).astype(np.float32)

def process_dem_to_slope(input_path: str, output_path: str, overlap: int = 1):
    """
    Compute slope from a large DEM using block-aligned windowed reads.
    overlap: number of pixels to read beyond each block edge (for edge continuity).
    """
    with rasterio.open(input_path) as src:
        profile = src.profile.copy()
        profile.update(dtype="float32", nodata=-9999, compress="zstd")

        cell_size_x = abs(src.transform.a)  # pixel width in CRS units
        cell_size_y = abs(src.transform.e)  # pixel height in CRS units
        cell_size = (cell_size_x + cell_size_y) / 2  # average for non-square pixels

        with rasterio.open(output_path, "w", **profile) as dst:
            windows = list(src.block_windows(1))
            t0 = time.perf_counter()

            for i, (_, window) in enumerate(windows):
                # Expand window by 'overlap' pixels on all sides for edge continuity
                padded_window = Window(
                    max(0, window.col_off - overlap),
                    max(0, window.row_off - overlap),
                    min(src.width  - max(0, window.col_off - overlap), window.width  + 2*overlap),
                    min(src.height - max(0, window.row_off - overlap), window.height + 2*overlap)
                )

                # Read padded block
                data = src.read(1, window=padded_window).astype(np.float32)

                # Replace nodata with NaN for gradient computation
                nodata = src.nodata
                if nodata is not None:
                    data[data == nodata] = np.nan

                # Compute slope on padded block
                slope_padded = compute_slope_block(data, cell_size)

                # Crop back to original window (remove padding)
                pad_top  = window.row_off - padded_window.row_off
                pad_left = window.col_off - padded_window.col_off
                slope = slope_padded[
                    pad_top: pad_top + window.height,
                    pad_left: pad_left + window.width
                ]

                # Replace NaN with output nodata
                slope[np.isnan(slope)] = -9999

                # Write block to output
                dst.write(slope, 1, window=window)

                if i % 100 == 0:
                    elapsed = time.perf_counter() - t0
                    pct = (i + 1) / len(windows) * 100
                    print(f"  {pct:.1f}% ({i+1}/{len(windows)} blocks, {elapsed:.0f}s elapsed)")

        total = time.perf_counter() - t0
        print(f"Completed in {total:.1f}s")

# Process a 50GB DEM
process_dem_to_slope("national_dem_1m.tif", "national_slope_1m.tif")
# Peak RAM usage: ~200MB (two 512×512 blocks in memory at once)
# Total time: ~8 minutes for a 50GB, 1m resolution DEM

# Reading a Specific Geographic Area

When you know the geographic area of interest, convert from CRS coordinates to pixel windows:

import rasterio
from rasterio.windows import from_bounds
from shapely.geometry import box

with rasterio.open("national_dem_1m.tif") as src:
    # Define region of interest in the raster's CRS
    # (must be in the same CRS as the raster, not WGS84 unless the raster is WGS84)
    roi_bounds = (386_000, 6_920_000, 412_000, 6_946_000)  # easting/northing metres

    # Convert geographic bounds to pixel window
    window = from_bounds(*roi_bounds, transform=src.transform)
    print(f"Window: col {window.col_off:.0f}-{window.col_off+window.width:.0f}, "
          f"row {window.row_off:.0f}-{window.row_off+window.height:.0f}")

    # Read only the pixels in this window
    data = src.read(1, window=window)
    print(f"Loaded array shape: {data.shape}")
    # e.g. (26000, 26000) for a 26×26 km area at 1m resolution
    # Only reads ~2.7GB instead of the full 50GB

# Parallel Block Processing with concurrent.futures

For CPU-intensive block operations, parallelise across blocks:

import rasterio
from rasterio.windows import Window
import numpy as np
from concurrent.futures import ProcessPoolExecutor, as_completed

def process_single_block(args):
    """Worker function — runs in a separate process."""
    input_path, window_tuple, output_path = args
    window = Window(*window_tuple)

    with rasterio.open(input_path) as src:
        data = src.read(1, window=window).astype(np.float32)

    # ... process data ...
    result = data * 2   # placeholder

    return window_tuple, result

def parallel_raster_process(input_path, output_path, max_workers=8):
    with rasterio.open(input_path) as src:
        profile = src.profile.copy()
        windows = [(w.col_off, w.row_off, w.width, w.height)
                   for _, w in src.block_windows(1)]

    with rasterio.open(output_path, "w", **profile) as dst:
        with ProcessPoolExecutor(max_workers=max_workers) as exe:
            futures = {
                exe.submit(process_single_block, (input_path, w, output_path)): w
                for w in windows
            }
            for future in as_completed(futures):
                window_tuple, result = future.result()
                window = Window(*window_tuple)
                dst.write(result, 1, window=window)

Note: rasterio file handles cannot be shared across processes — each worker opens its own handle. This is the correct pattern.

# Memory-Bounded Iteration

For variable-size operations where the block size is determined by available RAM rather than the file’s internal tile size:

import rasterio
import numpy as np

def iter_windows(src, target_mb: float = 256.0):
    """
    Yield row-based windows sized to use approximately target_mb of RAM.
    Useful when the input file is not tiled or uses very small tiles.
    """
    bytes_per_pixel = np.dtype(src.dtypes[0]).itemsize * src.count
    pixels_per_chunk = int(target_mb * 1024**2 / bytes_per_pixel)
    rows_per_chunk = max(1, pixels_per_chunk // src.width)

    for row_start in range(0, src.height, rows_per_chunk):
        actual_height = min(rows_per_chunk, src.height - row_start)
        yield Window(0, row_start, src.width, actual_height)

with rasterio.open("large_raster.tif") as src:
    for window in iter_windows(src, target_mb=512):
        data = src.read(window=window)  # all bands
        # data: (bands, height, width), height ≤ rows_per_chunk
        # process...

# Performance Summary

Operation Input Peak RAM Time
src.read() (full) 50GB DEM 50GB fails or OOM
Block windows, sequential 50GB DEM ~200MB ~8 min
Block windows, 8 workers 50GB DEM ~1.6GB ~2 min
Bbox window (26km²) 50GB DEM ~2.7GB ~12s

Windowed reading is not faster than loading the entire raster when you genuinely need all the data — the disk read time is the same. Its advantage is memory efficiency: a pipeline that otherwise requires a 64GB RAM machine runs comfortably on 8GB.


Related reading: GDAL VRT: Virtual Raster Mosaics Without Copying Data · XArray GroupBy for Temporal Raster Aggregation · Numba-Accelerated Spatial Kernels