From 25c67b2536289bb22ef3904593d0aeaf473dd8fb Mon Sep 17 00:00:00 2001 From: Jan-Henrik Bruhn Date: Fri, 6 Mar 2026 14:18:01 +0100 Subject: [PATCH] fix: actually save multiple POIs for subcategories --- infra/schema.sql | 27 +++++++++-- worker/src/jobs/compute-routing.ts | 77 ++++++++++++++---------------- worker/src/jobs/compute-scores.ts | 6 ++- worker/src/jobs/compute-transit.ts | 18 ++++--- 4 files changed, 73 insertions(+), 55 deletions(-) diff --git a/infra/schema.sql b/infra/schema.sql index cb1e84a..e7c3625 100644 --- a/infra/schema.sql +++ b/infra/schema.sql @@ -94,11 +94,14 @@ ALTER TABLE grid_scores ADD COLUMN IF NOT EXISTS profile TEXT NOT NULL DEFAULT ' CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup ON grid_scores (grid_point_id, travel_mode, threshold_min, profile); --- ─── Nearest POI per subcategory per grid point ─────────────────────────────── --- Populated by compute-scores job. Stores the nearest (by routing time) POI for --- each subcategory at each grid point, for each travel mode. Threshold-independent. +-- ─── Nearest POIs per subcategory per grid point ───────────────────────────── +-- Populated by compute-routing / compute-transit jobs. Stores up to K nearest +-- POIs per (grid_point, category, subcategory, travel_mode) so that the scoring +-- phase can apply the complement-product formula across multiple POIs, giving +-- diminishing returns for POI diversity within a subcategory. CREATE TABLE IF NOT EXISTS grid_poi_details ( + id BIGSERIAL PRIMARY KEY, grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE, category TEXT NOT NULL, subcategory TEXT NOT NULL, @@ -107,13 +110,27 @@ CREATE TABLE IF NOT EXISTS grid_poi_details ( nearest_poi_name TEXT, distance_m FLOAT, travel_time_s FLOAT, - computed_at TIMESTAMPTZ NOT NULL DEFAULT now(), - PRIMARY KEY (grid_point_id, category, subcategory, travel_mode) + computed_at TIMESTAMPTZ NOT NULL DEFAULT now() ); CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup ON grid_poi_details (grid_point_id, travel_mode); +-- Migration: replace old composite PK with surrogate id for diversity scoring +DO $$ +BEGIN + IF NOT EXISTS ( + SELECT 1 FROM information_schema.columns + WHERE table_name = 'grid_poi_details' AND column_name = 'id' + ) THEN + -- Old single-POI-per-subcategory data is incompatible; clear it + DELETE FROM grid_scores; + DELETE FROM grid_poi_details; + ALTER TABLE grid_poi_details DROP CONSTRAINT IF EXISTS grid_poi_details_pkey; + ALTER TABLE grid_poi_details ADD COLUMN id BIGSERIAL PRIMARY KEY; + END IF; +END $$; + -- ─── Isochrone cache ────────────────────────────────────────────────────────── CREATE TABLE IF NOT EXISTS isochrone_cache ( diff --git a/worker/src/jobs/compute-routing.ts b/worker/src/jobs/compute-routing.ts index 12d09ad..caa6be2 100644 --- a/worker/src/jobs/compute-routing.ts +++ b/worker/src/jobs/compute-routing.ts @@ -10,8 +10,9 @@ export type ComputeRoutingData = { category: string; }; -/** Number of nearest POI candidates per grid point. */ -const K = 6; +/** Number of nearest POI candidates per grid point (across all subcategories). + * Higher K means more diversity candidates for the complement-product formula. */ +const K = 15; /** Grid points per Valhalla matrix call. */ const BATCH_SIZE = 5; /** Concurrent Valhalla calls within this job. */ @@ -55,14 +56,15 @@ export async function handleComputeRouting(job: Job): Promis `); if (count === 0) return; - // Nearest POI per (gridPointId, subcategory). - const result = new Map>(); - for (const gp of gridPoints) result.set(gp.id, new Map()); + }> = []; const batches: Array<{ id: string; lat: number; lng: number }[]> = []; for (let i = 0; i < gridPoints.length; i += BATCH_SIZE) { @@ -141,17 +143,16 @@ export async function handleComputeRouting(job: Job): Promis const knn = gpKnn.get(gp.id); if (!knn || knn.length === 0) continue; - const subcatMap = result.get(gp.id)!; for (const row of knn) { - if (!subcatMap.has(row.subcategory)) { - const idx = targetIdx.get(row.poi_id); - subcatMap.set(row.subcategory, { - poiId: row.poi_id, - poiName: row.poi_name, - distM: row.dist_m, - timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null, - }); - } + const idx = targetIdx.get(row.poi_id); + rows.push({ + gpId: gp.id, + subcategory: row.subcategory, + poiId: row.poi_id, + poiName: row.poi_name, + distM: row.dist_m, + timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null, + }); } } @@ -162,24 +163,25 @@ export async function handleComputeRouting(job: Job): Promis } satisfies JobProgress); }); - // Bulk-insert nearest POI per subcategory into grid_poi_details. - const gpIdArr: string[] = []; - const subcatArr: string[] = []; - const poiIdArr: (string | null)[] = []; - const poiNameArr: (string | null)[] = []; - const distArr: (number | null)[] = []; - const timeArr: (number | null)[] = []; + if (rows.length === 0) return; - for (const [gpId, subcatMap] of result) { - for (const [subcategory, detail] of subcatMap) { - gpIdArr.push(gpId); - subcatArr.push(subcategory); - poiIdArr.push(detail.poiId); - poiNameArr.push(detail.poiName); - distArr.push(detail.distM); - timeArr.push(detail.timeS); - } - } + // Delete stale rows for this (city, mode, category) before re-inserting. + await Promise.resolve(sql` + DELETE FROM grid_poi_details gpd + USING grid_points gp + WHERE gpd.grid_point_id = gp.id + AND gp.city_slug = ${citySlug} + AND gpd.category = ${category} + AND gpd.travel_mode = ${mode} + `); + + // Bulk-insert all POI candidates (multiple per subcategory for diversity scoring). + const gpIdArr = rows.map((r) => r.gpId); + const subcatArr = rows.map((r) => r.subcategory); + const poiIdArr = rows.map((r) => r.poiId); + const poiNameArr = rows.map((r) => r.poiName); + const distArr = rows.map((r) => r.distM); + const timeArr = rows.map((r) => r.timeS); for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) { const end = Math.min(i + INSERT_CHUNK, gpIdArr.length); @@ -193,7 +195,7 @@ export async function handleComputeRouting(job: Job): Promis ${category}, subcat, ${mode}, - CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END, + poi_id::bigint, poi_name, dist, time_s @@ -205,13 +207,6 @@ export async function handleComputeRouting(job: Job): Promis ${distArr.slice(i, end)}::float8[], ${timeArr.slice(i, end)}::float8[] ) AS t(gp_id, subcat, poi_id, poi_name, dist, time_s) - ON CONFLICT (grid_point_id, category, subcategory, travel_mode) - DO UPDATE SET - nearest_poi_id = EXCLUDED.nearest_poi_id, - nearest_poi_name = EXCLUDED.nearest_poi_name, - distance_m = EXCLUDED.distance_m, - travel_time_s = EXCLUDED.travel_time_s, - computed_at = now() `); } } diff --git a/worker/src/jobs/compute-scores.ts b/worker/src/jobs/compute-scores.ts index 0585a58..141e78e 100644 --- a/worker/src/jobs/compute-scores.ts +++ b/worker/src/jobs/compute-scores.ts @@ -185,14 +185,16 @@ export async function handleComputeScores( WHERE gp.city_slug = ${citySlug} ), fifteen_subcat AS ( - -- "fifteen" mode: best (lowest) travel time across walking / cycling / transit + -- "fifteen" mode: for each unique POI, take the best time across + -- walking / cycling / transit so each POI contributes independently + -- to the complement-product formula (preserving diversity). SELECT grid_point_id, category, subcategory, 'fifteen'::text AS travel_mode, MIN(travel_time_s) AS travel_time_s FROM base WHERE travel_mode IN ('walking', 'cycling', 'transit') - GROUP BY grid_point_id, category, subcategory + GROUP BY grid_point_id, category, subcategory, nearest_poi_id ), all_subcat AS ( SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base diff --git a/worker/src/jobs/compute-transit.ts b/worker/src/jobs/compute-transit.ts index d08695f..56c7349 100644 --- a/worker/src/jobs/compute-transit.ts +++ b/worker/src/jobs/compute-transit.ts @@ -116,6 +116,16 @@ export async function handleComputeTransit(job: Job): Promis if (gridPoints.length === 0) return; + // Delete stale transit rows before recomputing (no ON CONFLICT since multiple + // rows per subcategory are allowed with the diversity-scoring schema). + await Promise.resolve(sql` + DELETE FROM grid_poi_details gpd + USING grid_points gp + WHERE gpd.grid_point_id = gp.id + AND gp.city_slug = ${citySlug} + AND gpd.travel_mode = 'transit' + `); + await job.updateProgress({ stage: "Transit routing", pct: 1, @@ -245,13 +255,7 @@ export async function handleComputeTransit(job: Job): Promis ${distArr.slice(i, end)}::float8[], ${timeArr.slice(i, end)}::float8[] ) AS t(gp_id, cat, subcat, poi_id, poi_name, dist, time_s) - ON CONFLICT (grid_point_id, category, subcategory, travel_mode) - DO UPDATE SET - nearest_poi_id = EXCLUDED.nearest_poi_id, - nearest_poi_name = EXCLUDED.nearest_poi_name, - distance_m = EXCLUDED.distance_m, - travel_time_s = EXCLUDED.travel_time_s, - computed_at = now() + -- No ON CONFLICT: stale rows were deleted at job start `); }