fix: actually save multiple POIs for subcategories
This commit is contained in:
parent
fcad3d867a
commit
25c67b2536
4 changed files with 73 additions and 55 deletions
|
|
@ -94,11 +94,14 @@ ALTER TABLE grid_scores ADD COLUMN IF NOT EXISTS profile TEXT NOT NULL DEFAULT '
|
||||||
CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup
|
CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup
|
||||||
ON grid_scores (grid_point_id, travel_mode, threshold_min, profile);
|
ON grid_scores (grid_point_id, travel_mode, threshold_min, profile);
|
||||||
|
|
||||||
-- ─── Nearest POI per subcategory per grid point ───────────────────────────────
|
-- ─── Nearest POIs per subcategory per grid point ─────────────────────────────
|
||||||
-- Populated by compute-scores job. Stores the nearest (by routing time) POI for
|
-- Populated by compute-routing / compute-transit jobs. Stores up to K nearest
|
||||||
-- each subcategory at each grid point, for each travel mode. Threshold-independent.
|
-- POIs per (grid_point, category, subcategory, travel_mode) so that the scoring
|
||||||
|
-- phase can apply the complement-product formula across multiple POIs, giving
|
||||||
|
-- diminishing returns for POI diversity within a subcategory.
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS grid_poi_details (
|
CREATE TABLE IF NOT EXISTS grid_poi_details (
|
||||||
|
id BIGSERIAL PRIMARY KEY,
|
||||||
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
|
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
|
||||||
category TEXT NOT NULL,
|
category TEXT NOT NULL,
|
||||||
subcategory TEXT NOT NULL,
|
subcategory TEXT NOT NULL,
|
||||||
|
|
@ -107,13 +110,27 @@ CREATE TABLE IF NOT EXISTS grid_poi_details (
|
||||||
nearest_poi_name TEXT,
|
nearest_poi_name TEXT,
|
||||||
distance_m FLOAT,
|
distance_m FLOAT,
|
||||||
travel_time_s FLOAT,
|
travel_time_s FLOAT,
|
||||||
computed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
|
computed_at TIMESTAMPTZ NOT NULL DEFAULT now()
|
||||||
PRIMARY KEY (grid_point_id, category, subcategory, travel_mode)
|
|
||||||
);
|
);
|
||||||
|
|
||||||
CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup
|
CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup
|
||||||
ON grid_poi_details (grid_point_id, travel_mode);
|
ON grid_poi_details (grid_point_id, travel_mode);
|
||||||
|
|
||||||
|
-- Migration: replace old composite PK with surrogate id for diversity scoring
|
||||||
|
DO $$
|
||||||
|
BEGIN
|
||||||
|
IF NOT EXISTS (
|
||||||
|
SELECT 1 FROM information_schema.columns
|
||||||
|
WHERE table_name = 'grid_poi_details' AND column_name = 'id'
|
||||||
|
) THEN
|
||||||
|
-- Old single-POI-per-subcategory data is incompatible; clear it
|
||||||
|
DELETE FROM grid_scores;
|
||||||
|
DELETE FROM grid_poi_details;
|
||||||
|
ALTER TABLE grid_poi_details DROP CONSTRAINT IF EXISTS grid_poi_details_pkey;
|
||||||
|
ALTER TABLE grid_poi_details ADD COLUMN id BIGSERIAL PRIMARY KEY;
|
||||||
|
END IF;
|
||||||
|
END $$;
|
||||||
|
|
||||||
-- ─── Isochrone cache ──────────────────────────────────────────────────────────
|
-- ─── Isochrone cache ──────────────────────────────────────────────────────────
|
||||||
|
|
||||||
CREATE TABLE IF NOT EXISTS isochrone_cache (
|
CREATE TABLE IF NOT EXISTS isochrone_cache (
|
||||||
|
|
|
||||||
|
|
@ -10,8 +10,9 @@ export type ComputeRoutingData = {
|
||||||
category: string;
|
category: string;
|
||||||
};
|
};
|
||||||
|
|
||||||
/** Number of nearest POI candidates per grid point. */
|
/** Number of nearest POI candidates per grid point (across all subcategories).
|
||||||
const K = 6;
|
* Higher K means more diversity candidates for the complement-product formula. */
|
||||||
|
const K = 15;
|
||||||
/** Grid points per Valhalla matrix call. */
|
/** Grid points per Valhalla matrix call. */
|
||||||
const BATCH_SIZE = 5;
|
const BATCH_SIZE = 5;
|
||||||
/** Concurrent Valhalla calls within this job. */
|
/** Concurrent Valhalla calls within this job. */
|
||||||
|
|
@ -55,14 +56,15 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
||||||
`);
|
`);
|
||||||
if (count === 0) return;
|
if (count === 0) return;
|
||||||
|
|
||||||
// Nearest POI per (gridPointId, subcategory).
|
// All POI candidates per grid point — multiple per subcategory for diversity scoring.
|
||||||
const result = new Map<string, Map<string, {
|
const rows: Array<{
|
||||||
|
gpId: string;
|
||||||
|
subcategory: string;
|
||||||
poiId: string;
|
poiId: string;
|
||||||
poiName: string | null;
|
poiName: string | null;
|
||||||
distM: number;
|
distM: number;
|
||||||
timeS: number | null;
|
timeS: number | null;
|
||||||
}>>();
|
}> = [];
|
||||||
for (const gp of gridPoints) result.set(gp.id, new Map());
|
|
||||||
|
|
||||||
const batches: Array<{ id: string; lat: number; lng: number }[]> = [];
|
const batches: Array<{ id: string; lat: number; lng: number }[]> = [];
|
||||||
for (let i = 0; i < gridPoints.length; i += BATCH_SIZE) {
|
for (let i = 0; i < gridPoints.length; i += BATCH_SIZE) {
|
||||||
|
|
@ -141,17 +143,16 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
||||||
const knn = gpKnn.get(gp.id);
|
const knn = gpKnn.get(gp.id);
|
||||||
if (!knn || knn.length === 0) continue;
|
if (!knn || knn.length === 0) continue;
|
||||||
|
|
||||||
const subcatMap = result.get(gp.id)!;
|
|
||||||
for (const row of knn) {
|
for (const row of knn) {
|
||||||
if (!subcatMap.has(row.subcategory)) {
|
const idx = targetIdx.get(row.poi_id);
|
||||||
const idx = targetIdx.get(row.poi_id);
|
rows.push({
|
||||||
subcatMap.set(row.subcategory, {
|
gpId: gp.id,
|
||||||
poiId: row.poi_id,
|
subcategory: row.subcategory,
|
||||||
poiName: row.poi_name,
|
poiId: row.poi_id,
|
||||||
distM: row.dist_m,
|
poiName: row.poi_name,
|
||||||
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
|
distM: row.dist_m,
|
||||||
});
|
timeS: idx !== undefined ? (matrix[bi]?.[idx] ?? null) : null,
|
||||||
}
|
});
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -162,24 +163,25 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
||||||
} satisfies JobProgress);
|
} satisfies JobProgress);
|
||||||
});
|
});
|
||||||
|
|
||||||
// Bulk-insert nearest POI per subcategory into grid_poi_details.
|
if (rows.length === 0) return;
|
||||||
const gpIdArr: string[] = [];
|
|
||||||
const subcatArr: string[] = [];
|
|
||||||
const poiIdArr: (string | null)[] = [];
|
|
||||||
const poiNameArr: (string | null)[] = [];
|
|
||||||
const distArr: (number | null)[] = [];
|
|
||||||
const timeArr: (number | null)[] = [];
|
|
||||||
|
|
||||||
for (const [gpId, subcatMap] of result) {
|
// Delete stale rows for this (city, mode, category) before re-inserting.
|
||||||
for (const [subcategory, detail] of subcatMap) {
|
await Promise.resolve(sql`
|
||||||
gpIdArr.push(gpId);
|
DELETE FROM grid_poi_details gpd
|
||||||
subcatArr.push(subcategory);
|
USING grid_points gp
|
||||||
poiIdArr.push(detail.poiId);
|
WHERE gpd.grid_point_id = gp.id
|
||||||
poiNameArr.push(detail.poiName);
|
AND gp.city_slug = ${citySlug}
|
||||||
distArr.push(detail.distM);
|
AND gpd.category = ${category}
|
||||||
timeArr.push(detail.timeS);
|
AND gpd.travel_mode = ${mode}
|
||||||
}
|
`);
|
||||||
}
|
|
||||||
|
// Bulk-insert all POI candidates (multiple per subcategory for diversity scoring).
|
||||||
|
const gpIdArr = rows.map((r) => r.gpId);
|
||||||
|
const subcatArr = rows.map((r) => r.subcategory);
|
||||||
|
const poiIdArr = rows.map((r) => r.poiId);
|
||||||
|
const poiNameArr = rows.map((r) => r.poiName);
|
||||||
|
const distArr = rows.map((r) => r.distM);
|
||||||
|
const timeArr = rows.map((r) => r.timeS);
|
||||||
|
|
||||||
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
|
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
|
||||||
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
|
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
|
||||||
|
|
@ -193,7 +195,7 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
||||||
${category},
|
${category},
|
||||||
subcat,
|
subcat,
|
||||||
${mode},
|
${mode},
|
||||||
CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END,
|
poi_id::bigint,
|
||||||
poi_name,
|
poi_name,
|
||||||
dist,
|
dist,
|
||||||
time_s
|
time_s
|
||||||
|
|
@ -205,13 +207,6 @@ export async function handleComputeRouting(job: Job<ComputeRoutingData>): Promis
|
||||||
${distArr.slice(i, end)}::float8[],
|
${distArr.slice(i, end)}::float8[],
|
||||||
${timeArr.slice(i, end)}::float8[]
|
${timeArr.slice(i, end)}::float8[]
|
||||||
) AS t(gp_id, subcat, poi_id, poi_name, dist, time_s)
|
) AS t(gp_id, subcat, poi_id, poi_name, dist, time_s)
|
||||||
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
|
|
||||||
DO UPDATE SET
|
|
||||||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
|
||||||
nearest_poi_name = EXCLUDED.nearest_poi_name,
|
|
||||||
distance_m = EXCLUDED.distance_m,
|
|
||||||
travel_time_s = EXCLUDED.travel_time_s,
|
|
||||||
computed_at = now()
|
|
||||||
`);
|
`);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -185,14 +185,16 @@ export async function handleComputeScores(
|
||||||
WHERE gp.city_slug = ${citySlug}
|
WHERE gp.city_slug = ${citySlug}
|
||||||
),
|
),
|
||||||
fifteen_subcat AS (
|
fifteen_subcat AS (
|
||||||
-- "fifteen" mode: best (lowest) travel time across walking / cycling / transit
|
-- "fifteen" mode: for each unique POI, take the best time across
|
||||||
|
-- walking / cycling / transit so each POI contributes independently
|
||||||
|
-- to the complement-product formula (preserving diversity).
|
||||||
SELECT
|
SELECT
|
||||||
grid_point_id, category, subcategory,
|
grid_point_id, category, subcategory,
|
||||||
'fifteen'::text AS travel_mode,
|
'fifteen'::text AS travel_mode,
|
||||||
MIN(travel_time_s) AS travel_time_s
|
MIN(travel_time_s) AS travel_time_s
|
||||||
FROM base
|
FROM base
|
||||||
WHERE travel_mode IN ('walking', 'cycling', 'transit')
|
WHERE travel_mode IN ('walking', 'cycling', 'transit')
|
||||||
GROUP BY grid_point_id, category, subcategory
|
GROUP BY grid_point_id, category, subcategory, nearest_poi_id
|
||||||
),
|
),
|
||||||
all_subcat AS (
|
all_subcat AS (
|
||||||
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base
|
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base
|
||||||
|
|
|
||||||
|
|
@ -116,6 +116,16 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
|
||||||
|
|
||||||
if (gridPoints.length === 0) return;
|
if (gridPoints.length === 0) return;
|
||||||
|
|
||||||
|
// Delete stale transit rows before recomputing (no ON CONFLICT since multiple
|
||||||
|
// rows per subcategory are allowed with the diversity-scoring schema).
|
||||||
|
await Promise.resolve(sql`
|
||||||
|
DELETE FROM grid_poi_details gpd
|
||||||
|
USING grid_points gp
|
||||||
|
WHERE gpd.grid_point_id = gp.id
|
||||||
|
AND gp.city_slug = ${citySlug}
|
||||||
|
AND gpd.travel_mode = 'transit'
|
||||||
|
`);
|
||||||
|
|
||||||
await job.updateProgress({
|
await job.updateProgress({
|
||||||
stage: "Transit routing",
|
stage: "Transit routing",
|
||||||
pct: 1,
|
pct: 1,
|
||||||
|
|
@ -245,13 +255,7 @@ export async function handleComputeTransit(job: Job<ComputeTransitData>): Promis
|
||||||
${distArr.slice(i, end)}::float8[],
|
${distArr.slice(i, end)}::float8[],
|
||||||
${timeArr.slice(i, end)}::float8[]
|
${timeArr.slice(i, end)}::float8[]
|
||||||
) AS t(gp_id, cat, subcat, poi_id, poi_name, dist, time_s)
|
) AS t(gp_id, cat, subcat, poi_id, poi_name, dist, time_s)
|
||||||
ON CONFLICT (grid_point_id, category, subcategory, travel_mode)
|
-- No ON CONFLICT: stale rows were deleted at job start
|
||||||
DO UPDATE SET
|
|
||||||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
|
||||||
nearest_poi_name = EXCLUDED.nearest_poi_name,
|
|
||||||
distance_m = EXCLUDED.distance_m,
|
|
||||||
travel_time_s = EXCLUDED.travel_time_s,
|
|
||||||
computed_at = now()
|
|
||||||
`);
|
`);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue