fix: actually save multiple POIs for subcategories
This commit is contained in:
parent
fcad3d867a
commit
25c67b2536
4 changed files with 73 additions and 55 deletions
|
|
@ -94,11 +94,14 @@ ALTER TABLE grid_scores ADD COLUMN IF NOT EXISTS profile TEXT NOT NULL DEFAULT '
|
|||
CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup
|
||||
ON grid_scores (grid_point_id, travel_mode, threshold_min, profile);
|
||||
|
||||
-- ─── Nearest POI per subcategory per grid point ───────────────────────────────
|
||||
-- Populated by compute-scores job. Stores the nearest (by routing time) POI for
|
||||
-- each subcategory at each grid point, for each travel mode. Threshold-independent.
|
||||
-- ─── Nearest POIs per subcategory per grid point ─────────────────────────────
|
||||
-- Populated by compute-routing / compute-transit jobs. Stores up to K nearest
|
||||
-- POIs per (grid_point, category, subcategory, travel_mode) so that the scoring
|
||||
-- phase can apply the complement-product formula across multiple POIs, giving
|
||||
-- diminishing returns for POI diversity within a subcategory.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS grid_poi_details (
|
||||
id BIGSERIAL PRIMARY KEY,
|
||||
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
|
||||
category TEXT NOT NULL,
|
||||
subcategory TEXT NOT NULL,
|
||||
|
|
@ -107,13 +110,27 @@ CREATE TABLE IF NOT EXISTS grid_poi_details (
|
|||
nearest_poi_name TEXT,
|
||||
distance_m FLOAT,
|
||||
travel_time_s FLOAT,
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
||||
PRIMARY KEY (grid_point_id, category, subcategory, travel_mode)
|
||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||
);
|
||||
|
||||
CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup
|
||||
ON grid_poi_details (grid_point_id, travel_mode);
|
||||
|
||||
-- Migration: replace old composite PK with surrogate id for diversity scoring
|
||||
DO $$
|
||||
BEGIN
|
||||
IF NOT EXISTS (
|
||||
SELECT 1 FROM information_schema.columns
|
||||
WHERE table_name = 'grid_poi_details' AND column_name = 'id'
|
||||
) THEN
|
||||
-- Old single-POI-per-subcategory data is incompatible; clear it
|
||||
DELETE FROM grid_scores;
|
||||
DELETE FROM grid_poi_details;
|
||||
ALTER TABLE grid_poi_details DROP CONSTRAINT IF EXISTS grid_poi_details_pkey;
|
||||
ALTER TABLE grid_poi_details ADD COLUMN id BIGSERIAL PRIMARY KEY;
|
||||
END IF;
|
||||
END $$;
|
||||
|
||||
-- ─── Isochrone cache ──────────────────────────────────────────────────────────
|
||||
|
||||
CREATE TABLE IF NOT EXISTS isochrone_cache (
|
||||
|
|
|
|||
|
|
@ -10,8 +10,9 @@ export type ComputeRoutingData = {
|
|||
category: string;
|
||||
};
|
||||
|
||||
/** Number of nearest POI candidates per grid point. */
|
||||
const K = 6;
|
||||
/** Number of nearest POI candidates per grid point (across all subcategories).
|
||||
* Higher K means more diversity candidates for the complement-product formula. */
|
||||
const K = 15;
|
||||
/** Grid points per Valhalla matrix call. */
|
||||
const BATCH_SIZE = 5;
|
||||
/** Concurrent Valhalla calls within this job. */
|
||||
|
|
@ -55,14 +56,15 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
|||
`);
|
||||
if (count === 0) return;
|
||||
|
||||
// Nearest POI per (gridPointId, subcategory).
|
||||
const result = new Map<string, Map<string, {
|
||||
// All POI candidates per grid point — multiple per subcategory for diversity scoring.
|
||||
const rows: Array<{
|
||||
gpId: string;
|
||||
subcategory: string;
|
||||
poiId: string;
|
||||
poiName: string | null;
|
||||
distM: number;
|
||||
timeS: number | null;
|
||||
}>>();
|
||||
for (const gp of gridPoints) result.set(gp.id, new Map());
|
||||
}> = [];
|
||||
|
||||
const batches: Array<{ id: string; lat: number; lng: number }[]> = [];
|
||||
for (let i = 0; i < gridPoints.length; i += BATCH_SIZE) {
|
||||
|
|
@ -141,17 +143,16 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
|||
const knn = gpKnn.get(gp.id);
|
||||
if (!knn || knn.length === 0) continue;
|
||||
|
||||
const subcatMap = result.get(gp.id)!;
|
||||
for (const row of knn) {
|
||||
if (!subcatMap.has(row.subcategory)) {
|
||||
const idx = targetIdx.get(row.poi_id);
|
||||
subcatMap.set(row.subcategory, {
|
||||
poiId: row.poi_id,
|
||||
poiName: row.poi_name,
|
||||
distM: row.dist_m,
|
||||
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
|
||||
});
|
||||
}
|
||||
const idx = targetIdx.get(row.poi_id);
|
||||
rows.push({
|
||||
gpId: gp.id,
|
||||
subcategory: row.subcategory,
|
||||
poiId: row.poi_id,
|
||||
poiName: row.poi_name,
|
||||
distM: row.dist_m,
|
||||
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -162,24 +163,25 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
|||
} satisfies JobProgress);
|
||||
});
|
||||
|
||||
// Bulk-insert nearest POI per subcategory into grid_poi_details.
|
||||
const gpIdArr: string[] = [];
|
||||
const subcatArr: string[] = [];
|
||||
const poiIdArr: (string | null)[] = [];
|
||||
const poiNameArr: (string | null)[] = [];
|
||||
const distArr: (number | null)[] = [];
|
||||
const timeArr: (number | null)[] = [];
|
||||
if (rows.length === 0) return;
|
||||
|
||||
for (const [gpId, subcatMap] of result) {
|
||||
for (const [subcategory, detail] of subcatMap) {
|
||||
gpIdArr.push(gpId);
|
||||
subcatArr.push(subcategory);
|
||||
poiIdArr.push(detail.poiId);
|
||||
poiNameArr.push(detail.poiName);
|
||||
distArr.push(detail.distM);
|
||||
timeArr.push(detail.timeS);
|
||||
}
|
||||
}
|
||||
// Delete stale rows for this (city, mode, category) before re-inserting.
|
||||
await Promise.resolve(sql`
|
||||
DELETE FROM grid_poi_details gpd
|
||||
USING grid_points gp
|
||||
WHERE gpd.grid_point_id = gp.id
|
||||
AND gp.city_slug = ${citySlug}
|
||||
AND gpd.category = ${category}
|
||||
AND gpd.travel_mode = ${mode}
|
||||
`);
|
||||
|
||||
// Bulk-insert all POI candidates (multiple per subcategory for diversity scoring).
|
||||
const gpIdArr = rows.map((r) => r.gpId);
|
||||
const subcatArr = rows.map((r) => r.subcategory);
|
||||
const poiIdArr = rows.map((r) => r.poiId);
|
||||
const poiNameArr = rows.map((r) => r.poiName);
|
||||
const distArr = rows.map((r) => r.distM);
|
||||
const timeArr = rows.map((r) => r.timeS);
|
||||
|
||||
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
|
||||
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
|
||||
|
|
@ -193,7 +195,7 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
|||
${category},
|
||||
subcat,
|
||||
${mode},
|
||||
CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END,
|
||||
poi_id::bigint,
|
||||
poi_name,
|
||||
dist,
|
||||
time_s
|
||||
|
|
@ -205,13 +207,6 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
|||
${distArr.slice(i, end)}::float8[],
|
||||
${timeArr.slice(i, end)}::float8[]
|
||||
) AS t(gp_id, subcat, poi_id, poi_name, dist, time_s)
|
||||
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
|
||||
DO UPDATE SET
|
||||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
||||
nearest_poi_name = EXCLUDED.nearest_poi_name,
|
||||
distance_m = EXCLUDED.distance_m,
|
||||
travel_time_s = EXCLUDED.travel_time_s,
|
||||
computed_at = now()
|
||||
`);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -185,14 +185,16 @@ export async function handleComputeScores(
|
|||
WHERE gp.city_slug = ${citySlug}
|
||||
),
|
||||
fifteen_subcat AS (
|
||||
-- "fifteen" mode: best (lowest) travel time across walking / cycling / transit
|
||||
-- "fifteen" mode: for each unique POI, take the best time across
|
||||
-- walking / cycling / transit so each POI contributes independently
|
||||
-- to the complement-product formula (preserving diversity).
|
||||
SELECT
|
||||
grid_point_id, category, subcategory,
|
||||
'fifteen'::text AS travel_mode,
|
||||
MIN(travel_time_s) AS travel_time_s
|
||||
FROM base
|
||||
WHERE travel_mode IN ('walking', 'cycling', 'transit')
|
||||
GROUP BY grid_point_id, category, subcategory
|
||||
GROUP BY grid_point_id, category, subcategory, nearest_poi_id
|
||||
),
|
||||
all_subcat AS (
|
||||
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base
|
||||
|
|
|
|||
|
|
@ -116,6 +116,16 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
|
|||
|
||||
if (gridPoints.length === 0) return;
|
||||
|
||||
// Delete stale transit rows before recomputing (no ON CONFLICT since multiple
|
||||
// rows per subcategory are allowed with the diversity-scoring schema).
|
||||
await Promise.resolve(sql`
|
||||
DELETE FROM grid_poi_details gpd
|
||||
USING grid_points gp
|
||||
WHERE gpd.grid_point_id = gp.id
|
||||
AND gp.city_slug = ${citySlug}
|
||||
AND gpd.travel_mode = 'transit'
|
||||
`);
|
||||
|
||||
await job.updateProgress({
|
||||
stage: "Transit routing",
|
||||
pct: 1,
|
||||
|
|
@ -245,13 +255,7 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
|
|||
${distArr.slice(i, end)}::float8[],
|
||||
${timeArr.slice(i, end)}::float8[]
|
||||
) AS t(gp_id, cat, subcat, poi_id, poi_name, dist, time_s)
|
||||
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
|
||||
DO UPDATE SET
|
||||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
||||
nearest_poi_name = EXCLUDED.nearest_poi_name,
|
||||
distance_m = EXCLUDED.distance_m,
|
||||
travel_time_s = EXCLUDED.travel_time_s,
|
||||
computed_at = now()
|
||||
-- No ON CONFLICT: stale rows were deleted at job start
|
||||
`);
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue