fix: actually save multiple POIs for subcategories

This commit is contained in:
Jan-Henrik 2026-03-06 14:18:01 +01:00
parent fcad3d867a
commit 25c67b2536
4 changed files with 73 additions and 55 deletions

View file

@ -94,11 +94,14 @@ ALTER TABLE grid_scores ADD COLUMN IF NOT EXISTS profile TEXT NOT NULL DEFAULT '
CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup
ON grid_scores (grid_point_id, travel_mode, threshold_min, profile);
-- ─── Nearest POI per subcategory per grid point ───────────────────────────────
-- Populated by compute-scores job. Stores the nearest (by routing time) POI for
-- each subcategory at each grid point, for each travel mode. Threshold-independent.
-- ─── Nearest POIs per subcategory per grid point ─────────────────────────────
-- Populated by compute-routing / compute-transit jobs. Stores up to K nearest
-- POIs per (grid_point, category, subcategory, travel_mode) so that the scoring
-- phase can apply the complement-product formula across multiple POIs, giving
-- diminishing returns for POI diversity within a subcategory.
CREATE TABLE IF NOT EXISTS grid_poi_details (
id BIGSERIAL PRIMARY KEY,
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
category TEXT NOT NULL,
subcategory TEXT NOT NULL,
@ -107,13 +110,27 @@ CREATE TABLE IF NOT EXISTS grid_poi_details (
nearest_poi_name TEXT,
distance_m FLOAT,
travel_time_s FLOAT,
computed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (grid_point_id, category, subcategory, travel_mode)
computed_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup
ON grid_poi_details (grid_point_id, travel_mode);
-- Migration: replace old composite PK with surrogate id for diversity scoring
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'grid_poi_details' AND column_name = 'id'
) THEN
-- Old single-POI-per-subcategory data is incompatible; clear it
DELETE FROM grid_scores;
DELETE FROM grid_poi_details;
ALTER TABLE grid_poi_details DROP CONSTRAINT IF EXISTS grid_poi_details_pkey;
ALTER TABLE grid_poi_details ADD COLUMN id BIGSERIAL PRIMARY KEY;
END IF;
END $$;
-- ─── Isochrone cache ──────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS isochrone_cache (

View file

@ -10,8 +10,9 @@ export type ComputeRoutingData = {
category: string;
};
/** Number of nearest POI candidates per grid point. */
const K = 6;
/** Number of nearest POI candidates per grid point (across all subcategories).
* Higher K means more diversity candidates for the complement-product formula. */
const K = 15;
/** Grid points per Valhalla matrix call. */
const BATCH_SIZE = 5;
/** Concurrent Valhalla calls within this job. */
@ -55,14 +56,15 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
`);
if (count === 0) return;
// Nearest POI per (gridPointId, subcategory).
const result = new Map<string, Map<string, {
// All POI candidates per grid point — multiple per subcategory for diversity scoring.
const rows: Array<{
gpId: string;
subcategory: string;
poiId: string;
poiName: string | null;
distM: number;
timeS: number | null;
}>>();
for (const gp of gridPoints) result.set(gp.id, new Map());
}> = [];
const batches: Array<{ id: string; lat: number; lng: number }[]> = [];
for (let i = 0; i < gridPoints.length; i += BATCH_SIZE) {
@ -141,17 +143,16 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
const knn = gpKnn.get(gp.id);
if (!knn || knn.length === 0) continue;
const subcatMap = result.get(gp.id)!;
for (const row of knn) {
if (!subcatMap.has(row.subcategory)) {
const idx = targetIdx.get(row.poi_id);
subcatMap.set(row.subcategory, {
poiId: row.poi_id,
poiName: row.poi_name,
distM: row.dist_m,
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
});
}
const idx = targetIdx.get(row.poi_id);
rows.push({
gpId: gp.id,
subcategory: row.subcategory,
poiId: row.poi_id,
poiName: row.poi_name,
distM: row.dist_m,
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
});
}
}
@ -162,24 +163,25 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
} satisfies JobProgress);
});
// Bulk-insert nearest POI per subcategory into grid_poi_details.
const gpIdArr: string[] = [];
const subcatArr: string[] = [];
const poiIdArr: (string | null)[] = [];
const poiNameArr: (string | null)[] = [];
const distArr: (number | null)[] = [];
const timeArr: (number | null)[] = [];
if (rows.length === 0) return;
for (const [gpId, subcatMap] of result) {
for (const [subcategory, detail] of subcatMap) {
gpIdArr.push(gpId);
subcatArr.push(subcategory);
poiIdArr.push(detail.poiId);
poiNameArr.push(detail.poiName);
distArr.push(detail.distM);
timeArr.push(detail.timeS);
}
}
// Delete stale rows for this (city, mode, category) before re-inserting.
await Promise.resolve(sql`
DELETE FROM grid_poi_details gpd
USING grid_points gp
WHERE gpd.grid_point_id = gp.id
AND gp.city_slug = ${citySlug}
AND gpd.category = ${category}
AND gpd.travel_mode = ${mode}
`);
// Bulk-insert all POI candidates (multiple per subcategory for diversity scoring).
const gpIdArr = rows.map((r) => r.gpId);
const subcatArr = rows.map((r) => r.subcategory);
const poiIdArr = rows.map((r) => r.poiId);
const poiNameArr = rows.map((r) => r.poiName);
const distArr = rows.map((r) => r.distM);
const timeArr = rows.map((r) => r.timeS);
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
@ -193,7 +195,7 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
${category},
subcat,
${mode},
CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END,
poi_id::bigint,
poi_name,
dist,
time_s
@ -205,13 +207,6 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
${distArr.slice(i, end)}::float8[],
${timeArr.slice(i, end)}::float8[]
) AS t(gp_id, subcat, poi_id, poi_name, dist, time_s)
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
DO UPDATE SET
nearest_poi_id = EXCLUDED.nearest_poi_id,
nearest_poi_name = EXCLUDED.nearest_poi_name,
distance_m = EXCLUDED.distance_m,
travel_time_s = EXCLUDED.travel_time_s,
computed_at = now()
`);
}
}

View file

@ -185,14 +185,16 @@ export async function handleComputeScores(
WHERE gp.city_slug = ${citySlug}
),
fifteen_subcat AS (
-- "fifteen" mode: best (lowest) travel time across walking / cycling / transit
-- "fifteen" mode: for each unique POI, take the best time across
-- walking / cycling / transit so each POI contributes independently
-- to the complement-product formula (preserving diversity).
SELECT
grid_point_id, category, subcategory,
'fifteen'::text AS travel_mode,
MIN(travel_time_s) AS travel_time_s
FROM base
WHERE travel_mode IN ('walking', 'cycling', 'transit')
GROUP BY grid_point_id, category, subcategory
GROUP BY grid_point_id, category, subcategory, nearest_poi_id
),
all_subcat AS (
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base

View file

@ -116,6 +116,16 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
if (gridPoints.length === 0) return;
// Delete stale transit rows before recomputing (no ON CONFLICT since multiple
// rows per subcategory are allowed with the diversity-scoring schema).
await Promise.resolve(sql`
DELETE FROM grid_poi_details gpd
USING grid_points gp
WHERE gpd.grid_point_id = gp.id
AND gp.city_slug = ${citySlug}
AND gpd.travel_mode = 'transit'
`);
await job.updateProgress({
stage: "Transit routing",
pct: 1,
@ -245,13 +255,7 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
${distArr.slice(i, end)}::float8[],
${timeArr.slice(i, end)}::float8[]
) AS t(gp_id, cat, subcat, poi_id, poi_name, dist, time_s)
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
DO UPDATE SET
nearest_poi_id = EXCLUDED.nearest_poi_id,
nearest_poi_name = EXCLUDED.nearest_poi_name,
distance_m = EXCLUDED.distance_m,
travel_time_s = EXCLUDED.travel_time_s,
computed_at = now()
-- No ON CONFLICT: stale rows were deleted at job start
`);
}