H3 Hierarchical Smoothing: Continuous Spatial Surfaces from Sparse Data

A hexagonal binning map of crime reports in a city looks authoritative — until you look closely and notice that cells with zero reports sit next to cells with 15 reports simply because the data is sparse. A block with no reported incidents is not necessarily safe; it may just be a block where incidents go unreported, or where the reporting threshold is not met.

The naive heatmap of sparse data encodes noise as signal. H3’s neighbourhood structure offers a clean, computationally simple smoothing approach: blur the signal by averaging across K-ring neighbours, and fill gaps using parent-resolution fallback values. The result is a continuous surface that respects spatial autocorrelation without requiring interpolation infrastructure.

# The Problem: Sparse H3 Data

import h3
import pandas as pd
import numpy as np
import shapely

rng = np.random.default_rng(7)

# ── Simulate sparse incident data: 10k reports in a city ─────────────────────
# Sparse: only ~3% of resolution-9 cells have any reports
n_incidents = 10_000
lats = rng.uniform(-34.0, -33.7, n_incidents)
lons = rng.uniform(150.8, 151.3, n_incidents)
severity = rng.integers(1, 5, n_incidents)   # 1-4

incidents = pd.DataFrame({'lat': lats, 'lon': lons, 'severity': severity})

# Index to resolution 9
incidents['h3_9'] = [h3.latlng_to_cell(lat, lon, 9) for lat, lon in zip(lats, lons)]

# Aggregate
raw_agg = incidents.groupby('h3_9').agg(
    count=('severity', 'count'),
    mean_severity=('severity', 'mean')
).reset_index()

# How many cells have data?
total_cells = len(h3.get_res0_cells())   # coarse estimate only
unique_cells = raw_agg['h3_9'].nunique()
print(f"Cells with data: {unique_cells:,}")
print(f"Max count in any cell: {raw_agg['count'].max()}")
print(f"Cells with count=1: {(raw_agg['count'] == 1).sum():,} ({(raw_agg['count'] == 1).mean():.0%})")
# → Many singletons — sparse, noisy

# Method 1: K-Ring Smoothing

For each cell with data, spread its value across its K-ring neighbourhood. Each neighbour receives a contribution weighted by distance (number of grid steps):

import h3
import pandas as pd
import numpy as np
from collections import defaultdict

def h3_kring_smooth(
    cell_values: pd.Series,   # index: h3 cell ID, values: metric to smooth
    k: int = 2,
    decay: float = 0.5        # weight multiplier per ring step (geometric decay)
) -> pd.Series:
    """
    Apply K-ring smoothing to H3 cell values.
    Each cell's smoothed value is a weighted average including K-ring neighbours.
    Cells with no data contribute 0 and are excluded from the denominator.
    """
    value_dict = cell_values.to_dict()

    smoothed_value = defaultdict(float)
    smoothed_weight = defaultdict(float)

    for cell, value in value_dict.items():
        # Get all cells within k steps
        for ring_step in range(0, k + 1):
            ring = h3.grid_ring(cell, ring_step)
            weight = decay ** ring_step
            for neighbour in ring:
                smoothed_value[neighbour]  += value * weight
                smoothed_weight[neighbour] += weight

    # Compute weighted average; cells with no contributors stay NaN
    result = {}
    for cell in smoothed_value:
        result[cell] = smoothed_value[cell] / smoothed_weight[cell]

    return pd.Series(result)

# Apply K-ring smoothing
cell_counts = raw_agg.set_index('h3_9')['count']
smoothed = h3_kring_smooth(cell_counts, k=2, decay=0.6)

# Smoothed surface includes many more cells than the raw data
print(f"Cells in raw data: {len(cell_counts):,}")
print(f"Cells in smoothed surface: {len(smoothed):,}")
# Raw: 4,821 cells
# Smoothed: 27,340 cells (K=2 spreads to neighbours up to 2 steps away)

The smoothed surface fills in neighbouring cells with estimated values, creating a continuous gradient rather than isolated spikes.

# Method 2: Hierarchical Fallback

When a resolution-9 cell has no data, use the aggregated value from its parent resolution-8 cell (or grandparent at resolution-7) as a fallback estimate:

import h3
import pandas as pd

def add_hierarchical_fallback(
    fine_agg: pd.DataFrame,   # columns: h3_id, count, mean_severity
    h3_col: str,
    coarse_resolutions: list = [8, 7, 6]
) -> pd.DataFrame:
    """
    Add fallback values from coarser resolutions for cells with no fine-res data.
    The resulting DataFrame includes both observed cells and imputed cells.
    """
    result = fine_agg.copy()
    result['source_resolution'] = 9   # original resolution

    for coarse_res in coarse_resolutions:
        # Compute parent cells for all cells with data
        result['parent'] = result[h3_col].apply(
            lambda c: h3.cell_to_parent(c, coarse_res)
        )

        # Aggregate to parent resolution
        parent_agg = result.groupby('parent').agg(
            count=('count', 'sum'),
            mean_severity=('mean_severity', 'mean')
        ).reset_index().rename(columns={'parent': h3_col})

        # Find cells covered by parent cells
        # (expanding: for each parent, list all child cells)
        covered_cells = set(result[h3_col])
        imputed_rows = []

        for _, row in parent_agg.iterrows():
            children = h3.cell_to_children(row[h3_col], 9)
            for child in children:
                if child not in covered_cells:
                    imputed_rows.append({
                        h3_col: child,
                        'count': row['count'] / len(children),   # distributed count
                        'mean_severity': row['mean_severity'],
                        'source_resolution': coarse_res
                    })
                    covered_cells.add(child)

        if imputed_rows:
            result = pd.concat(
                [result.drop(columns=['parent']),
                 pd.DataFrame(imputed_rows)],
                ignore_index=True
            )
            print(f"Added {len(imputed_rows):,} cells via resolution-{coarse_res} fallback")

    return result

# Apply hierarchical fallback
complete_surface = add_hierarchical_fallback(raw_agg, 'h3_9')
print(f"Total cells in complete surface: {len(complete_surface):,}")
print(complete_surface['source_resolution'].value_counts())
# source_resolution
# 9    4,821    ← directly observed
# 8    12,340   ← imputed from resolution-8 parent
# 7    8,210    ← imputed from resolution-7 grandparent

# Method 3: Kernel Density Estimation via H3

A more statistically rigorous approach: treat each incident as contributing a spatial kernel, and sum kernels within each cell:

import h3
import pandas as pd
import numpy as np
from collections import defaultdict

def h3_kde(
    lats: np.ndarray,
    lons: np.ndarray,
    values: np.ndarray,
    resolution: int = 9,
    bandwidth_km: float = 1.0
) -> pd.Series:
    """
    H3-based kernel density estimation.
    Each point contributes to all cells within bandwidth_km.
    """
    # Approximate km to H3 grid steps at this resolution
    # Average cell diameter at res 9 ≈ 174m, so 1km ≈ 6 steps
    cell_diameters_km = {
        6: 3.23, 7: 1.22, 8: 0.46, 9: 0.174, 10: 0.066
    }
    cell_diam = cell_diameters_km.get(resolution, 0.5)
    k_steps = max(1, int(bandwidth_km / cell_diam))

    density = defaultdict(float)

    for lat, lon, val in zip(lats, lons, values):
        origin_cell = h3.latlng_to_cell(lat, lon, resolution)

        # Gaussian-like kernel: weight decays with ring distance
        for k in range(0, k_steps + 1):
            ring = h3.grid_ring(origin_cell, k)
            # Gaussian weight: exp(-0.5 * (k/k_steps)^2)
            weight = np.exp(-0.5 * (k / max(1, k_steps))**2) * val
            for cell in ring:
                density[cell] += weight

    return pd.Series(density)

# Apply KDE with 1km bandwidth
kde_surface = h3_kde(
    incidents['lat'].values,
    incidents['lon'].values,
    incidents['severity'].values,
    resolution=9,
    bandwidth_km=1.0
)

print(f"KDE surface cells: {len(kde_surface):,}")

# Visualising the Smoothed Surface

import geopandas as gpd
import shapely

def smoothed_to_geodataframe(smoothed_series: pd.Series, crs: str = "EPSG:4326") -> gpd.GeoDataFrame:
    """Convert a smoothed H3 series to a GeoDataFrame for visualisation."""
    cells = smoothed_series.index.tolist()
    values = smoothed_series.values

    geometries = []
    for cell in cells:
        boundary = h3.cell_to_boundary(cell)
        coords = [[lon, lat] for lat, lon in boundary]
        coords.append(coords[0])   # close ring
        geometries.append(shapely.polygons(coords))

    return gpd.GeoDataFrame(
        {'h3_id': cells, 'value': values},
        geometry=geometries,
        crs=crs
    )

# Create GeoDataFrame for Folium/Leaflet/Kepler.gl
gdf = smoothed_to_geodataframe(smoothed)

# Export to GeoJSON for web mapping
gdf.to_file("smoothed_incidents.geojson", driver="GeoJSON")

# Or to GeoParquet for downstream analytics
gdf.to_parquet("smoothed_incidents.parquet")

# Choosing Smoothing Parameters

The right parameters depend on your use case:

Use case	Resolution	K-ring	Bandwidth
City-level crime heatmap	8–9	1–2	500m–1km
Disease incidence mapping	7–8	2–3	2–5km
Retail catchment density	9–10	1–2	200–500m
National weather station interpolation	4–5	3–5	50–100km

The general rule: K-ring ≈ 1–2 for preserving local structure, K-ring ≈ 3–5 for filling large gaps. Higher decay values (0.7–0.9) preserve more of the original signal; lower values (0.3–0.5) create smoother, more diffuse surfaces.

H3-based smoothing is not a substitute for proper spatial statistics (kriging, GAMs) where rigorous uncertainty quantification is needed. But for exploratory analysis, dashboard visualisation, and operational heatmaps, it offers a practical, fast, and interpretable alternative that runs in pure Python without additional statistical dependencies.