diff --git a/Dockerfile b/Dockerfile index d5a229a..e9dfa4b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,33 +1,30 @@ -# ─── Build base (Alpine — small, used for npm install + tsc) ────────────────── +# ─── Web app Dockerfile ─────────────────────────────────────────────────────── +# Alpine — small final image; no native addons needed for web. + FROM node:22-alpine AS base RUN apk add --no-cache libc6-compat WORKDIR /app -# ─── Dependencies (include devDeps — needed for tsc, next build, etc.) ──────── +# ─── Dependencies ───────────────────────────────────────────────────────────── FROM base AS deps COPY package.json package-lock.json* tsconfig.base.json ./ COPY apps/web/package.json ./apps/web/ COPY worker/package.json ./worker/ COPY shared/package.json ./shared/ -# NODE_ENV must NOT be production here — devDependencies (tsc, tsx, etc.) are required to build +# NODE_ENV must NOT be production here — devDependencies (tsc, next, etc.) are required RUN npm install --workspace=apps/web --workspace=worker --workspace=shared -# ─── Shared build ──────────────────────────────────────────────────────────── +# ─── Shared build ───────────────────────────────────────────────────────────── FROM deps AS shared-build COPY shared/ ./shared/ RUN npm run build --workspace=shared -# ─── Next.js build ────────────────────────────────────────────────────────── +# ─── Next.js build ──────────────────────────────────────────────────────────── FROM shared-build AS web-build COPY apps/web/ ./apps/web/ RUN npm run build --workspace=apps/web -# ─── Worker build ────────────────────────────────────────────────────────── -FROM shared-build AS worker-build -COPY worker/ ./worker/ -RUN npm run build --workspace=worker - -# ─── Web runtime (Alpine) ───────────────────────────────────────────────────── +# ─── Web runtime ────────────────────────────────────────────────────────────── FROM node:22-alpine AS web RUN apk add --no-cache libc6-compat RUN addgroup --system --gid 1001 nodejs && adduser --system --uid 1001 nextjs @@ -42,59 +39,4 @@ COPY apps/web/package.json ./apps/web/ USER nextjs WORKDIR /app/apps/web EXPOSE 3000 -# Use absolute path — WORKDIR is /app/apps/web but node_modules are at /app/node_modules CMD ["/app/node_modules/.bin/next", "start"] - -# ─── Valhalla worker (gis-ops Valhalla image + Node.js 22) ─────────────────── -# This container runs both a BullMQ worker (build-valhalla jobs) AND the -# valhalla_service HTTP server. It has valhalla_build_tiles and friends -# pre-installed from the base image. Node.js is added for the BullMQ consumer. -FROM ghcr.io/gis-ops/docker-valhalla/valhalla:latest AS valhalla-worker -USER root -RUN apt-get update \ - && apt-get install -y --no-install-recommends ca-certificates curl gnupg osmium-tool \ - && mkdir -p /etc/apt/keyrings \ - && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ - | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ - && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \ - > /etc/apt/sources.list.d/nodesource.list \ - && apt-get update \ - && apt-get install -y --no-install-recommends nodejs \ - && rm -rf /var/lib/apt/lists/* -WORKDIR /app -ENV NODE_ENV=production -# BullMQ and postgres are pure JS — no native add-ons — so Alpine-built -# node_modules from the deps stage work on this Debian/glibc base. -COPY --from=deps /app/node_modules ./node_modules -COPY --from=worker-build /app/worker/dist ./worker/dist -COPY --from=shared-build /app/shared/dist ./shared/dist -COPY shared/package.json ./shared/ -COPY worker/package.json ./worker/ -# /data/osm is shared with the pipeline worker (osm_data volume). -# Make it world-writable so the worker (UID 1001) can write PBF files here -# regardless of which container initialises the Docker volume first. -# valhalla mounts this volume :ro so it can never write here accidentally. -RUN mkdir -p /data/osm /data/valhalla && chmod 1777 /data/osm -ENTRYPOINT ["/bin/node"] -CMD ["worker/dist/valhalla-main.js"] - -# ─── Worker runtime (Debian slim — osmium-tool + osm2pgsql are in apt) ──────── -FROM node:22-slim AS worker -RUN apt-get update && apt-get install -y --no-install-recommends \ - osmium-tool \ - osm2pgsql \ - && rm -rf /var/lib/apt/lists/* -RUN groupadd --system --gid 1001 nodejs && useradd --system --uid 1001 --gid nodejs workeruser -WORKDIR /app -ENV NODE_ENV=production -COPY --from=deps /app/node_modules ./node_modules -COPY --from=worker-build /app/worker/dist ./worker/dist -COPY --from=shared-build /app/shared/dist ./shared/dist -COPY shared/package.json ./shared/ -COPY infra/ ./infra/ -COPY worker/package.json ./worker/ -# Create data directories owned by workeruser so Docker named volumes -# are initialized with the correct permissions on first run. -RUN mkdir -p /data/osm /data/valhalla && chown -R workeruser:nodejs /data -USER workeruser -CMD ["node", "worker/dist/index.js"] diff --git a/Dockerfile.valhalla-worker b/Dockerfile.valhalla-worker new file mode 100644 index 0000000..ce4cc7c --- /dev/null +++ b/Dockerfile.valhalla-worker @@ -0,0 +1,47 @@ +# ─── Valhalla tile-builder Dockerfile ───────────────────────────────────────── +# Builds per-city Valhalla road/transit tile directories. +# The gis-ops Valhalla image provides valhalla_build_tiles and friends. +# Node.js is added for the BullMQ job consumer. +# @valhallajs/valhallajs is NOT used here — tile building uses the CLI tools. + +FROM node:22-slim AS build +RUN apt-get update && apt-get install -y --no-install-recommends python3 make g++ \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /app + +COPY package.json package-lock.json* tsconfig.base.json ./ +COPY apps/web/package.json ./apps/web/ +COPY worker/package.json ./worker/ +COPY shared/package.json ./shared/ +# NODE_ENV must NOT be production here — devDependencies needed to build +RUN npm install --workspace=apps/web --workspace=worker --workspace=shared + +COPY shared/ ./shared/ +RUN npm run build --workspace=shared +COPY worker/ ./worker/ +RUN npm run build --workspace=worker + +# ─── Runtime (Valhalla + Node.js) ───────────────────────────────────────────── +FROM ghcr.io/gis-ops/docker-valhalla/valhalla:latest AS valhalla-worker +USER root +RUN apt-get update \ + && apt-get install -y --no-install-recommends ca-certificates curl gnupg osmium-tool \ + && mkdir -p /etc/apt/keyrings \ + && curl -fsSL https://deb.nodesource.com/gpgkey/nodesource-repo.gpg.key \ + | gpg --dearmor -o /etc/apt/keyrings/nodesource.gpg \ + && echo "deb [signed-by=/etc/apt/keyrings/nodesource.gpg] https://deb.nodesource.com/node_22.x nodistro main" \ + > /etc/apt/sources.list.d/nodesource.list \ + && apt-get update \ + && apt-get install -y --no-install-recommends nodejs \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /app +ENV NODE_ENV=production +# node_modules built on Debian glibc (same ABI as this container) +COPY --from=build /app/node_modules ./node_modules +COPY --from=build /app/worker/dist ./worker/dist +COPY --from=build /app/shared/dist ./shared/dist +COPY shared/package.json ./shared/ +COPY worker/package.json ./worker/ +RUN mkdir -p /data/osm /data/valhalla && chmod 1777 /data/osm +ENTRYPOINT ["/bin/node"] +CMD ["worker/dist/valhalla-main.js"] diff --git a/Dockerfile.worker b/Dockerfile.worker new file mode 100644 index 0000000..4af8dab --- /dev/null +++ b/Dockerfile.worker @@ -0,0 +1,45 @@ +# ─── Pipeline worker Dockerfile ─────────────────────────────────────────────── +# Debian slim — needed for osmium-tool and osm2pgsql (not in Alpine). +# Also serves as the routing queue consumer using @valhallajs/valhallajs +# (glibc prebuilt binary; no separate valhalla HTTP server needed). + +FROM node:22-slim AS build +RUN apt-get update && apt-get install -y --no-install-recommends python3 make g++ \ + && rm -rf /var/lib/apt/lists/* +WORKDIR /app + +# ─── Dependencies (glibc — required for @valhallajs/valhallajs native addon) ── +COPY package.json package-lock.json* tsconfig.base.json ./ +COPY apps/web/package.json ./apps/web/ +COPY worker/package.json ./worker/ +COPY shared/package.json ./shared/ +# NODE_ENV must NOT be production here — devDependencies (tsc, tsx, etc.) needed +RUN npm install --workspace=apps/web --workspace=worker --workspace=shared + +# ─── Shared + worker build ──────────────────────────────────────────────────── +COPY shared/ ./shared/ +RUN npm run build --workspace=shared +COPY worker/ ./worker/ +RUN npm run build --workspace=worker + +# ─── Runtime ────────────────────────────────────────────────────────────────── +FROM node:22-slim AS worker +RUN apt-get update && apt-get install -y --no-install-recommends \ + osmium-tool \ + osm2pgsql \ + && rm -rf /var/lib/apt/lists/* +RUN groupadd --system --gid 1001 nodejs && useradd --system --uid 1001 --gid nodejs workeruser +WORKDIR /app +ENV NODE_ENV=production +COPY --from=build /app/node_modules ./node_modules +COPY --from=build /app/worker/dist ./worker/dist +COPY --from=build /app/shared/dist ./shared/dist +COPY shared/package.json ./shared/ +COPY infra/ ./infra/ +COPY worker/package.json ./worker/ +# Create data directories owned by workeruser so Docker named volumes +# are initialized with the correct permissions on first run. +RUN mkdir -p /data/osm /data/valhalla /data/valhalla_road /data/valhalla_transit \ + && chown -R workeruser:nodejs /data +USER workeruser +CMD ["node", "worker/dist/index.js"] diff --git a/README.md b/README.md index b1a0aa3..0b1f49c 100644 --- a/README.md +++ b/README.md @@ -16,39 +16,41 @@ Next.js App Server └── Valkey (API response cache, BullMQ queues) BullMQ Worker (download queue, concurrency 1) - └── download-pbf → streams OSM PBF from Geofabrik (serialised to avoid - redundant parallel downloads; idempotent if file exists) + └── download-pbf → streams OSM PBF from Geofabrik; cached by URL basename + (multiple cities sharing the same regional PBF download it once), + 24 h TTL, serialised to avoid redundant parallel downloads BullMQ Worker (pipeline queue, concurrency 8) ├── refresh-city → orchestrates full ingest via FlowProducer ├── extract-pois → osmium filter + osm2pgsql flex → raw_pois ├── generate-grid → PostGIS 200 m rectangular grid → grid_points ├── compute-scores → two-phase orchestrator (see Scoring below) - ├── compute-routing → Valhalla matrix → grid_poi_details + ├── compute-routing → dispatches matrix job to 'routing' queue → grid_poi_details │ (15 parallel jobs: 3 modes × 5 categories) - └── compute-transit → Valhalla isochrones → grid_poi_details (travel_mode='transit') + └── compute-transit → dispatches isochrone jobs to 'routing-transit' queue → + grid_poi_details (travel_mode='transit') (1 job per city, covers all categories via PostGIS spatial join) -BullMQ Worker (valhalla queue, concurrency 1) — road-only instance - └── build-valhalla → osmium clip + valhalla_build_tiles (road graph only, no transit - connections) → manages valhalla_service on :8002 - Clean tiles ensure cycling/walking/driving routing is never - affected by ghost edges from failed transit connections. +BullMQ Worker (valhalla queue, concurrency 1) — road-only tile builder + router + ├── build-valhalla → osmium clip + valhalla_build_tiles (road graph only, no transit + │ connections per city tile dir VALHALLA_TILES_BASE/{citySlug}/) + │ Clean tiles ensure cycling/walking/driving routing is never + │ affected by ghost edges from failed transit connections. + └── routing queue → @valhallajs/valhallajs Actor pool (per-city, lazy-loaded) + ├── matrix jobs (walking/cycling/driving) ← compute-routing + └── isochrone jobs (non-transit) ← /api/isochrones -BullMQ Worker (valhalla-transit queue, concurrency 1) — transit instance - ├── download-gtfs-de → downloads & filters GTFS feed for German ÖPNV (bbox-clipped to - │ known cities, single-stop trips removed) - └── build-valhalla → osmium clip + valhalla_ingest_transit + valhalla_convert_transit - + valhalla_build_tiles (road graph with transit connections) - → manages valhalla_service on :8002 (separate container/port) - -Valhalla road instance (child process of valhalla worker, port 8002) - ├── sources_to_targets matrix → compute-routing jobs (walking/cycling/driving) - └── isochrone endpoint → user click → /api/isochrones (non-transit modes) - -Valhalla transit instance (child process of valhalla-transit worker, port 8002) - ├── isochrone (multimodal) → compute-transit jobs - └── isochrone endpoint → user click → /api/isochrones (transit mode) +BullMQ Worker (valhalla-transit queue, concurrency 1) — transit tile builder + router + ├── download-gtfs-de → raw feed cached globally in gtfs/raw/ (keyed by source URL); + │ per-city filtered feed in gtfs/{citySlug}/feed/ (bbox-clipped, + │ trips with <2 bbox stops removed); 24 h source TTL + ├── build-valhalla → osmium clip (expanded to transit stops bbox) + + │ valhalla_ingest_transit + valhalla_convert_transit + + │ valhalla_build_tiles (road graph with transit connections) + │ per city tile dir VALHALLA_TILES_BASE/{citySlug}/ + └── routing-transit queue → @valhallajs/valhallajs Actor pool (per-city, lazy-loaded) + ├── isochrone jobs (multimodal) ← compute-transit + └── isochrone jobs (transit) ← /api/isochrones Protomaps → self-hosted map tiles (PMTiles) ``` @@ -127,7 +129,7 @@ Each city is covered by a regular rectangular grid at 200 m spacing, generated i Travel times are obtained from [Valhalla](https://github.com/valhalla/valhalla), a real-network routing engine built on OSM data: -- **Walking, cycling, driving** — Valhalla's `sources_to_targets` matrix endpoint. For each grid point the 6 spatially nearest POIs in the category are sent as targets; the resulting travel-time matrix gives the exact routed time to each. The nearest POI *per subcategory* is retained. +- **Walking, cycling, driving** — Valhalla's `sources_to_targets` matrix endpoint. For each grid point the 6 spatially nearest POIs (by KNN) in the category are sent as targets; the resulting travel-time matrix gives the exact routed time to each. The fastest-routed POI *per subcategory* is retained. - **Transit** — Valhalla's matrix endpoint does not support transit. Instead, a multimodal isochrone is computed per grid point at contour intervals of 5, 10, 15, 20, and 30 minutes (fixed departure: next Tuesday 08:00 for reproducible GTFS results). PostGIS `ST_Within` then classifies every POI in the city into the smallest contour it falls within, giving estimated times of 300 / 600 / 900 / 1200 / 1800 seconds. Grid points outside the transit network are silently skipped — they receive no transit score. - **Cyclist (`cyclist`)** — synthetic persona: `MIN(walking, cycling, transit)` per POI. Represents someone who cycles and also uses transit when faster. No extra routing calls needed. - **Cyclist, no transit (`cycling_walk`)** — synthetic persona: `MIN(walking, cycling)`. Represents someone who cycles but avoids public transit. No extra routing calls needed. @@ -141,10 +143,10 @@ All scores are precomputed at ingest time for every combination of threshold (5 Each subcategory *i* contributes a proximity score based on travel time `t` and threshold `T` (both in seconds) using exponential decay: ``` -score(t, T) = exp(−3 × t / T) +score(t, T) = exp(−t / T) ``` -At t = 0 the score is 1.0. At the threshold it is exp(−3) ≈ 0.05 — a POI reachable in exactly the threshold time barely contributes. Close proximity dominates: a third of the threshold away scores ~0.37, halfway scores ~0.22. This ensures that genuinely nearby POIs are rated much more highly than merely reachable ones. +At t = 0 the score is 1.0. At the threshold it is exp(−1) ≈ 0.37 — a POI reachable in exactly the threshold time still contributes meaningfully. A third of the threshold away scores ~0.72, halfway scores ~0.61. The curve rewards proximity without harshly penalising destinations that are merely a bit further than ideal. The category score aggregates across subcategories **and** across multiple nearby POIs of the same subcategory via a **complement product** weighted by profile-specific importance weights `w_i ∈ [0, 1]`: @@ -249,8 +251,8 @@ For each city the worker pipeline runs in two phases: **Phase 1 — Routing** (parallel child jobs, dispatched by `compute-scores`) -- *Walking, cycling, driving* — 15 parallel jobs (3 modes × 5 categories). A PostGIS KNN lateral join finds the 6 spatially nearest POIs per grid point in the category; those coordinates are sent to Valhalla's `sources_to_targets` matrix API in batches. The nearest POI per subcategory is persisted to `grid_poi_details`. -- *Transit* — 1 job per city (`compute-transit`). Concurrent isochrone calls (8 at a time) to the dedicated transit Valhalla instance; PostGIS `ST_Within` classifies POIs into contour bands. Runs first so it overlaps with the routing jobs. +- *Walking, cycling, driving* — 15 parallel jobs (3 modes × 5 categories). A PostGIS KNN lateral join finds the 6 spatially nearest POIs per grid point in the category; those coordinates are dispatched as a matrix job to the `routing` BullMQ queue, where the valhalla worker's Actor pool runs `sources_to_targets`. The nearest POI per subcategory is persisted to `grid_poi_details`. +- *Transit* — 1 job per city (`compute-transit`). Concurrent isochrone jobs (8 at a time) dispatched to the `routing-transit` queue; the transit valhalla worker's Actor pool runs multimodal isochrones and PostGIS `ST_Within` classifies POIs into contour bands. Runs first so it overlaps with the routing jobs. **Phase 2 — Score aggregation** @@ -274,8 +276,8 @@ When a user places a pin on the map: 1. The nearest grid point is found via a PostGIS `<->` KNN query. 2. Precomputed `grid_scores` rows for that grid point, travel mode, threshold, and profile are returned — one row per category. -3. Per-subcategory detail rows from `grid_poi_details` are also fetched, showing the name, straight-line distance, and travel time to the nearest POI in each subcategory for the requested mode. -4. An isochrone overlay is fetched live from Valhalla and shown on the map. For `transit` mode the multimodal isochrone comes from the dedicated transit Valhalla instance. For `fifteen` (Best mode), cycling is used as the representative display isochrone since Valhalla's interactive isochrone only supports single-mode costing. +3. Per-subcategory detail rows from `grid_poi_details` are also fetched, showing the name, straight-line distance, and travel time to the fastest-routed POI in each subcategory for the requested mode. The category headline time shown in the panel is the minimum across all subcategories. +4. An isochrone overlay is dispatched via BullMQ and computed by the valhalla worker's Actor pool. For `transit` mode the job goes to the `routing-transit` queue (multimodal Actor). For `fifteen` (Best mode), cycling is used as the representative display isochrone since Valhalla's isochrone only supports single-mode costing. The pin panel also shows estate value data (land price in €/m² from the BORIS NI cadastre) for cities in Lower Saxony, including a percentile rank among all zones in the city and a "peer percentile" rank among zones with similar accessibility scores. diff --git a/apps/web/app/admin/cities/[slug]/page.tsx b/apps/web/app/admin/cities/[slug]/page.tsx index cf9a55d..d897e98 100644 --- a/apps/web/app/admin/cities/[slug]/page.tsx +++ b/apps/web/app/admin/cities/[slug]/page.tsx @@ -106,6 +106,18 @@ export default function CityDetailPage() { +