H3 hexagons have a property that makes them uniquely suited to multi-scale spatial analytics: a strict containment hierarchy. Every resolution-9 cell has exactly one parent at resolution 8, which has exactly one parent at resolution 7, and so on up to resolution 0. Seven children per parent at each level.
This means that if you aggregate a dataset at resolution 9 — city-block scale — you can roll up to resolution 8 (neighbourhood), resolution 7 (suburb), or resolution 5 (district) by simply summing the child values. No re-reading source data. No re-scanning records. Just a group-by-parent aggregation on the already-aggregated values.
For datasets that need to be queryable at multiple zoom levels — a traffic heatmap, a disease incidence map, a property value surface — this hierarchy allows you to build a complete multi-resolution data cube in a single pass.
# H3 Resolution Reference
| Resolution | Avg Cell Area | Typical Use |
|---|---|---|
| 3 | 825 km² | State/region level |
| 4 | 118 km² | Large area analytics |
| 5 | 16.8 km² | Sub-regional |
| 6 | 2.4 km² | District/suburb |
| 7 | 0.34 km² | Neighbourhood |
| 8 | 0.049 km² | ~7 city blocks |
| 9 | 0.007 km² | ~1 city block |
| 10 | ~900 m² | Individual property |
# The Rollup Pattern
import h3
import pandas as pd
import numpy as np
# ── Simulate: 5 million delivery events with location and value ───────────────
rng = np.random.default_rng(42)
n = 5_000_000
# Random points within Sydney metro bounds
lats = rng.uniform(-34.1, -33.6, n)
lons = rng.uniform(150.7, 151.4, n)
delivery_value = rng.exponential(scale=80, size=n) # $ value
# ── Step 1: Assign each event to resolution-10 H3 cell ───────────────────────
import time
t0 = time.perf_counter()
# h3.latlng_to_cell is vectorised via list comprehension or pandas apply
# For large datasets, use the numpy-vectorised form:
h3_res10 = pd.array([
h3.latlng_to_cell(lat, lon, 10)
for lat, lon in zip(lats, lons)
])
print(f"H3 index assignment: {time.perf_counter() - t0:.1f}s")
# → ~18s (pure Python loop — see optimisation below)
df = pd.DataFrame({
'h3_10': h3_res10,
'value': delivery_value
})
# ── Step 2: Aggregate at resolution 10 ────────────────────────────────────────
t0 = time.perf_counter()
agg_10 = df.groupby('h3_10').agg(
count=('value', 'count'),
total_value=('value', 'sum'),
mean_value=('value', 'mean')
).reset_index()
print(f"Resolution-10 aggregation: {time.perf_counter() - t0:.3f}s, {len(agg_10):,} cells")
# → 0.8s, 142,847 cells
# ── Step 3: Roll up to coarser resolutions ────────────────────────────────────
def rollup_to_resolution(df_fine: pd.DataFrame, h3_col: str, target_res: int) -> pd.DataFrame:
"""Roll up H3 aggregates from the current resolution to target_res."""
df = df_fine.copy()
df['h3_parent'] = df[h3_col].apply(lambda c: h3.cell_to_parent(c, target_res))
return df.groupby('h3_parent').agg(
count=('count', 'sum'),
total_value=('total_value', 'sum'),
mean_value=('total_value', 'sum') # recompute mean from totals
).assign(
mean_value=lambda x: x['total_value'] / x['count']
).reset_index().rename(columns={'h3_parent': f'h3_{target_res}'})
t0 = time.perf_counter()
agg_9 = rollup_to_resolution(agg_10, 'h3_10', 9)
agg_8 = rollup_to_resolution(agg_9, 'h3_9', 8)
agg_7 = rollup_to_resolution(agg_8, 'h3_8', 7)
agg_6 = rollup_to_resolution(agg_7, 'h3_7', 6)
agg_5 = rollup_to_resolution(agg_6, 'h3_6', 5)
print(f"Rollup to resolutions 9→5: {time.perf_counter() - t0:.3f}s")
# → 0.09s total
print(f"Cells at each resolution:")
for name, df in [('res10', agg_10), ('res9', agg_9), ('res8', agg_8),
('res7', agg_7), ('res6', agg_6), ('res5', agg_5)]:
print(f" {name}: {len(df):,}")
# res10: 142,847
# res9: 21,036
# res8: 3,045
# res7: 446
# res6: 66
# res5: 10
The entire rollup chain — from 142,847 resolution-10 cells to 10 resolution-5 cells — completes in 90 milliseconds. No re-reading source data. No re-scanning 5 million records.
# Fast H3 Indexing with Vectorised Lookup
The bottleneck in the example above is h3.latlng_to_cell() inside a Python loop — 18 seconds for 5 million points. The h3 library (version 4+) provides a vectorised form:
import h3
import numpy as np
# h3 4.x: pass arrays directly
h3_cells = h3.latlng_to_cell(lats, lons, resolution=10)
# Returns a numpy array of H3 cell IDs
For even higher throughput with older h3 versions, use h3pandas or the h3ronpy Rust extension:
# Using h3pandas
import h3pandas
import pandas as pd
df = pd.DataFrame({'lat': lats, 'lng': lons, 'value': delivery_value})
df = df.h3.geo_to_h3(10, lat_col='lat', lng_col='lng')
# Adds 'h3_10' column, vectorised implementation
With h3 4.x native array API: 5 million points indexed in approximately 0.6 seconds vs 18 seconds for the Python loop — 30× faster.
# Building a Multi-Resolution Data Cube
For production use, store all resolution levels together as a dictionary of GeoParquet files or as a single Parquet file with a resolution column:
import geopandas as gpd
import shapely
def h3_cells_to_geometries(h3_series: pd.Series) -> gpd.GeoDataFrame:
"""Convert H3 cell IDs to polygon GeoDataFrame."""
geometries = []
for cell_id in h3_series:
boundary = h3.cell_to_boundary(cell_id)
# boundary: list of (lat, lon) pairs
geom = shapely.polygons(
[[lon, lat] for lat, lon in boundary] # H3 returns lat/lon, Shapely wants lon/lat
)
geometries.append(geom)
return gpd.GeoSeries(geometries, crs="EPSG:4326")
# Build GeoParquet for each resolution level
resolutions = {
'res10': agg_10.rename(columns={'h3_10': 'h3_id'}),
'res9': agg_9.rename(columns={'h3_9': 'h3_id'}),
'res8': agg_8.rename(columns={'h3_8': 'h3_id'}),
'res7': agg_7.rename(columns={'h3_7': 'h3_id'}),
'res6': agg_6.rename(columns={'h3_6': 'h3_id'}),
'res5': agg_5.rename(columns={'h3_5': 'h3_id'}),
}
for res_name, agg_df in resolutions.items():
gdf = gpd.GeoDataFrame(
agg_df,
geometry=h3_cells_to_geometries(agg_df['h3_id']),
crs="EPSG:4326"
)
gdf.to_parquet(f"delivery_heatmap_{res_name}.parquet", compression="zstd")
print(f"Written {res_name}: {len(gdf):,} cells")
# Serving Multi-Resolution Data in an API
With the data cube written to GeoParquet, a FastAPI endpoint can return the appropriate resolution level based on the map zoom:
from fastapi import FastAPI
from fastapi.responses import JSONResponse
import geopandas as gpd
from shapely.geometry import box
app = FastAPI()
# Load all resolution levels at startup
RESOLUTION_FILES = {
3: "delivery_heatmap_res5.parquet",
5: "delivery_heatmap_res6.parquet",
7: "delivery_heatmap_res7.parquet",
9: "delivery_heatmap_res8.parquet",
11: "delivery_heatmap_res9.parquet",
13: "delivery_heatmap_res10.parquet",
}
@app.get("/heatmap")
async def get_heatmap(
xmin: float, ymin: float, xmax: float, ymax: float,
zoom: int = 10
):
# Select resolution based on zoom level
res_file = min(RESOLUTION_FILES.items(), key=lambda kv: abs(kv[0] - zoom))[1]
# Read only cells in the viewport
gdf = gpd.read_parquet(res_file, bbox=(xmin, ymin, xmax, ymax))
return JSONResponse(gdf.to_json())
This pattern delivers pre-aggregated data at the appropriate spatial scale for every zoom level without any server-side computation at request time.
# Incremental Updates
A key advantage of the parent-rollup approach: when new data arrives, you only need to recompute the affected resolution-10 cells, then re-roll them up:
def update_cells(new_events_df, existing_agg_10):
"""Incrementally update aggregates when new events arrive."""
# Assign H3 cells to new events
new_events_df['h3_10'] = h3.latlng_to_cell(
new_events_df['lat'].values,
new_events_df['lng'].values,
10
)
# Aggregate new events at resolution 10
new_agg = new_events_df.groupby('h3_10').agg(
count=('value', 'count'),
total_value=('value', 'sum')
).reset_index()
# Merge with existing: add counts and totals for matching cells
merged = existing_agg_10.set_index('h3_10').add(
new_agg.set_index('h3_10'), fill_value=0
).reset_index()
merged['mean_value'] = merged['total_value'] / merged['count']
return merged
Incremental updates mean a live dashboard can maintain near-real-time aggregates without reprocessing the entire dataset on every event.
Related reading: H3 Hexagonal Aggregation for Spatial Analytics · H3 as a Spatial Join Accelerator · H3 Hierarchical Smoothing for Continuous Spatial Surfaces