443 lines
16 KiB
TypeScript
443 lines
16 KiB
TypeScript
import type { Job } from "bullmq";
|
||
import { Queue, WaitingChildrenError } from "bullmq";
|
||
import { getSql } from "../db.js";
|
||
import { createBullMQConnection } from "../redis.js";
|
||
import type { JobProgress, ComputeScoresJobData as ComputeScoresData } from "@transportationer/shared";
|
||
import {
|
||
CATEGORY_IDS,
|
||
PROFILES,
|
||
PROFILE_IDS,
|
||
DEFAULT_SUBCATEGORY_WEIGHT,
|
||
} from "@transportationer/shared";
|
||
|
||
const INSERT_CHUNK = 2000;
|
||
|
||
function subcategoryWeight(profileId: string, subcategory: string): number {
|
||
const weights = PROFILES[profileId as keyof typeof PROFILES]?.subcategoryWeights;
|
||
if (!weights) return DEFAULT_SUBCATEGORY_WEIGHT;
|
||
return weights[subcategory] ?? DEFAULT_SUBCATEGORY_WEIGHT;
|
||
}
|
||
|
||
function sigmoid(t_s: number, threshold_s: number): number {
|
||
return 1 / (1 + Math.exp(4 * (t_s - threshold_s) / threshold_s));
|
||
}
|
||
|
||
function complementProduct(
|
||
subcategoryTimes: Array<{ subcategory: string; timeS: number | null }>,
|
||
threshold_s: number,
|
||
profileId: string,
|
||
): number {
|
||
let logProd = 0;
|
||
let hasAny = false;
|
||
for (const { subcategory, timeS } of subcategoryTimes) {
|
||
const weight = subcategoryWeight(profileId, subcategory);
|
||
if (timeS === null || weight <= 0) continue;
|
||
hasAny = true;
|
||
logProd += Math.log(Math.max(1 - weight * sigmoid(timeS, threshold_s), 1e-10));
|
||
}
|
||
return hasAny ? 1 - Math.exp(logProd) : 0;
|
||
}
|
||
|
||
/**
|
||
* Two-phase orchestrator for accessibility score computation.
|
||
*
|
||
* Phase 1 (first activation, after generate-grid completes):
|
||
* – Clears stale data.
|
||
* – Enqueues one `compute-routing` child job per (mode × category) pair.
|
||
* – Suspends itself via moveToWaitingChildren; BullMQ re-queues it when
|
||
* all routing children finish.
|
||
*
|
||
* Phase 2 (re-activation after all routing children complete):
|
||
* – Reads grid_poi_details (populated by the routing jobs).
|
||
* – Computes weighted complement-product scores for every
|
||
* (grid_point × mode × category × threshold × profile) combination.
|
||
* – Bulk-inserts into grid_scores and marks the city ready.
|
||
*/
|
||
export async function handleComputeScores(
|
||
job: Job<ComputeScoresData>,
|
||
token?: string,
|
||
): Promise<void> {
|
||
const { citySlug, modes, thresholds } = job.data;
|
||
const sql = getSql();
|
||
|
||
// ── Phase 1: dispatch compute-routing children ────────────────────────────
|
||
if (!job.data.routingDispatched) {
|
||
const totalRoutingJobs = modes.length * CATEGORY_IDS.length;
|
||
await job.updateProgress({
|
||
stage: "Computing scores",
|
||
pct: 2,
|
||
message: `Dispatching ${totalRoutingJobs} routing jobs for ${citySlug}…`,
|
||
} satisfies JobProgress);
|
||
|
||
// Clear any stale scores from a previous run.
|
||
await Promise.resolve(sql`
|
||
DELETE FROM grid_scores
|
||
USING grid_points gp
|
||
WHERE grid_scores.grid_point_id = gp.id
|
||
AND gp.city_slug = ${citySlug}
|
||
`);
|
||
await Promise.resolve(sql`
|
||
DELETE FROM grid_poi_details
|
||
USING grid_points gp
|
||
WHERE grid_poi_details.grid_point_id = gp.id
|
||
AND gp.city_slug = ${citySlug}
|
||
`);
|
||
|
||
// Enqueue one routing child per (mode, category). Each child registers
|
||
// itself to this parent job via opts.parent, so BullMQ tracks completion.
|
||
// Transit is handled by a single compute-transit job (not per-category)
|
||
// since it uses isochrones rather than the matrix API.
|
||
// For NI cities, ingest-boris-ni is also enqueued here so it runs in
|
||
// parallel with the routing jobs rather than sequentially after them.
|
||
const queue = new Queue("pipeline", { connection: createBullMQConnection() });
|
||
try {
|
||
for (const mode of modes) {
|
||
if (mode === "transit") continue; // handled below as a single job
|
||
for (const category of CATEGORY_IDS) {
|
||
await queue.add(
|
||
"compute-routing",
|
||
{ type: "compute-routing", citySlug, mode, category },
|
||
{
|
||
attempts: 2,
|
||
backoff: { type: "fixed", delay: 3000 },
|
||
removeOnComplete: { age: 86400 * 7 },
|
||
removeOnFail: { age: 86400 * 30 },
|
||
parent: {
|
||
id: job.id!,
|
||
// qualifiedName = "bull:pipeline" — the Redis key BullMQ uses
|
||
// to track parent/child relationships.
|
||
queue: queue.qualifiedName,
|
||
},
|
||
},
|
||
);
|
||
}
|
||
}
|
||
|
||
// Dispatch transit scoring as a sibling child (one job covers all categories
|
||
// via PostGIS isochrone spatial joins, unlike per-category routing jobs).
|
||
if (modes.includes("transit")) {
|
||
await queue.add(
|
||
"compute-transit",
|
||
{ type: "compute-transit", citySlug },
|
||
{
|
||
attempts: 1,
|
||
removeOnComplete: { age: 86400 * 7 },
|
||
removeOnFail: { age: 86400 * 30 },
|
||
parent: { id: job.id!, queue: queue.qualifiedName },
|
||
},
|
||
);
|
||
}
|
||
|
||
// Dispatch BORIS NI ingest as a sibling child so it runs during routing.
|
||
if (job.data.ingestBorisNi) {
|
||
await queue.add(
|
||
"ingest-boris-ni",
|
||
{ type: "ingest-boris-ni", citySlug },
|
||
{
|
||
attempts: 2,
|
||
backoff: { type: "fixed", delay: 5000 },
|
||
removeOnComplete: { age: 86400 * 7 },
|
||
removeOnFail: { age: 86400 * 30 },
|
||
parent: { id: job.id!, queue: queue.qualifiedName },
|
||
},
|
||
);
|
||
}
|
||
} finally {
|
||
await queue.close();
|
||
}
|
||
|
||
// Persist the dispatched flag so phase 2 is triggered on re-activation.
|
||
await job.updateData({ ...job.data, routingDispatched: true });
|
||
|
||
// Suspend until all routing children complete.
|
||
// Throwing WaitingChildrenError tells the worker not to mark the job
|
||
// completed — BullMQ will re-activate it once all children finish.
|
||
await job.moveToWaitingChildren(token!);
|
||
throw new WaitingChildrenError();
|
||
}
|
||
|
||
// ── Phase 2: aggregate scores from grid_poi_details ──────────────────────
|
||
await job.updateProgress({
|
||
stage: "Computing scores",
|
||
pct: 70,
|
||
message: `All routing complete — computing profile scores…`,
|
||
} satisfies JobProgress);
|
||
|
||
// Load all per-subcategory routing results for this city in one query.
|
||
// Ordered by distance so the first row per (gpId, mode, category) is nearest.
|
||
const detailRows = await Promise.resolve(sql<{
|
||
grid_point_id: string;
|
||
category: string;
|
||
subcategory: string;
|
||
travel_mode: string;
|
||
nearest_poi_id: string | null;
|
||
distance_m: number | null;
|
||
travel_time_s: number | null;
|
||
}[]>`
|
||
SELECT
|
||
gpd.grid_point_id::text,
|
||
gpd.category,
|
||
gpd.subcategory,
|
||
gpd.travel_mode,
|
||
gpd.nearest_poi_id::text,
|
||
gpd.distance_m,
|
||
gpd.travel_time_s
|
||
FROM grid_poi_details gpd
|
||
JOIN grid_points gp ON gp.id = gpd.grid_point_id
|
||
WHERE gp.city_slug = ${citySlug}
|
||
ORDER BY gpd.grid_point_id, gpd.travel_mode, gpd.category, gpd.distance_m
|
||
`);
|
||
|
||
// Build in-memory structure keyed by "gpId:mode:category".
|
||
type GroupEntry = {
|
||
gpId: string;
|
||
mode: string;
|
||
category: string;
|
||
subcategoryTimes: Array<{ subcategory: string; timeS: number | null }>;
|
||
nearestPoiId: string | null;
|
||
nearestDistM: number | null;
|
||
nearestTimeS: number | null;
|
||
};
|
||
const groups = new Map<string, GroupEntry>();
|
||
|
||
for (const row of detailRows) {
|
||
const key = `${row.grid_point_id}:${row.travel_mode}:${row.category}`;
|
||
let entry = groups.get(key);
|
||
if (!entry) {
|
||
entry = {
|
||
gpId: row.grid_point_id,
|
||
mode: row.travel_mode,
|
||
category: row.category,
|
||
subcategoryTimes: [],
|
||
nearestPoiId: null,
|
||
nearestDistM: null,
|
||
nearestTimeS: null,
|
||
};
|
||
groups.set(key, entry);
|
||
}
|
||
entry.subcategoryTimes.push({ subcategory: row.subcategory, timeS: row.travel_time_s });
|
||
// Track the overall nearest POI for this category (minimum distance).
|
||
if (
|
||
row.distance_m !== null &&
|
||
(entry.nearestDistM === null || row.distance_m < entry.nearestDistM)
|
||
) {
|
||
entry.nearestPoiId = row.nearest_poi_id;
|
||
entry.nearestDistM = row.distance_m;
|
||
entry.nearestTimeS = row.travel_time_s;
|
||
}
|
||
}
|
||
|
||
// Synthesize "multimodal" groups: for each (gpId, category, subcategory),
|
||
// take the minimum travel time across walking and cycling so that a
|
||
// destination reachable by either mode counts as accessible.
|
||
// Driving is intentionally excluded (not a 15-min city metric).
|
||
const MULTIMODAL_MODES = new Set(["walking", "cycling", "transit"]); // modes combined into "fifteen"
|
||
const mmAccumulator = new Map<string, {
|
||
gpId: string;
|
||
category: string;
|
||
subTimes: Map<string, number | null>;
|
||
nearestDistM: number | null;
|
||
nearestPoiId: string | null;
|
||
nearestTimeS: number | null;
|
||
}>();
|
||
|
||
for (const entry of groups.values()) {
|
||
if (!MULTIMODAL_MODES.has(entry.mode)) continue;
|
||
const mmKey = `${entry.gpId}:${entry.category}`;
|
||
if (!mmAccumulator.has(mmKey)) {
|
||
mmAccumulator.set(mmKey, {
|
||
gpId: entry.gpId,
|
||
category: entry.category,
|
||
subTimes: new Map(),
|
||
nearestDistM: null,
|
||
nearestPoiId: null,
|
||
nearestTimeS: null,
|
||
});
|
||
}
|
||
const acc = mmAccumulator.get(mmKey)!;
|
||
// Track nearest POI across all multimodal modes
|
||
if (entry.nearestDistM !== null && (acc.nearestDistM === null || entry.nearestDistM < acc.nearestDistM)) {
|
||
acc.nearestDistM = entry.nearestDistM;
|
||
acc.nearestPoiId = entry.nearestPoiId;
|
||
acc.nearestTimeS = entry.nearestTimeS;
|
||
}
|
||
// For each subcategory, keep the minimum travel time across modes
|
||
for (const { subcategory, timeS } of entry.subcategoryTimes) {
|
||
const existing = acc.subTimes.get(subcategory);
|
||
if (existing === undefined) {
|
||
acc.subTimes.set(subcategory, timeS);
|
||
} else if (existing === null && timeS !== null) {
|
||
acc.subTimes.set(subcategory, timeS);
|
||
} else if (timeS !== null && existing !== null && timeS < existing) {
|
||
acc.subTimes.set(subcategory, timeS);
|
||
}
|
||
}
|
||
}
|
||
|
||
for (const acc of mmAccumulator.values()) {
|
||
const key = `${acc.gpId}:fifteen:${acc.category}`;
|
||
groups.set(key, {
|
||
gpId: acc.gpId,
|
||
mode: "fifteen",
|
||
category: acc.category,
|
||
subcategoryTimes: Array.from(acc.subTimes.entries()).map(([subcategory, timeS]) => ({ subcategory, timeS })),
|
||
nearestPoiId: acc.nearestPoiId,
|
||
nearestDistM: acc.nearestDistM,
|
||
nearestTimeS: acc.nearestTimeS,
|
||
});
|
||
}
|
||
|
||
// Compute and insert scores for every threshold × profile combination.
|
||
// Each threshold writes to distinct rows (threshold_min is part of the PK),
|
||
// so all thresholds can be processed concurrently without conflicts.
|
||
// Node.js is single-threaded so completedThresholds++ is safe.
|
||
let completedThresholds = 0;
|
||
|
||
await Promise.all(thresholds.map(async (thresholdMin) => {
|
||
const threshold_s = thresholdMin * 60;
|
||
|
||
const gpIdArr: string[] = [];
|
||
const catArr: string[] = [];
|
||
const modeArr: string[] = [];
|
||
const profileArr: string[] = [];
|
||
const poiIdArr: (string | null)[] = [];
|
||
const distArr: (number | null)[] = [];
|
||
const timeArr: (number | null)[] = [];
|
||
const scoreArr: number[] = [];
|
||
|
||
for (const entry of groups.values()) {
|
||
for (const profileId of PROFILE_IDS) {
|
||
gpIdArr.push(entry.gpId);
|
||
catArr.push(entry.category);
|
||
modeArr.push(entry.mode);
|
||
profileArr.push(profileId);
|
||
poiIdArr.push(entry.nearestPoiId);
|
||
distArr.push(entry.nearestDistM);
|
||
timeArr.push(entry.nearestTimeS);
|
||
scoreArr.push(complementProduct(entry.subcategoryTimes, threshold_s, profileId));
|
||
}
|
||
}
|
||
|
||
// Chunks within a threshold stay sequential — with all thresholds running
|
||
// concurrently we already have up to thresholds.length parallel INSERT
|
||
// streams, which saturates the connection pool without overwhelming it.
|
||
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
|
||
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
|
||
await Promise.resolve(sql`
|
||
INSERT INTO grid_scores (
|
||
grid_point_id, category, travel_mode, threshold_min, profile,
|
||
nearest_poi_id, distance_m, travel_time_s, score
|
||
)
|
||
SELECT
|
||
gp_id::bigint,
|
||
cat,
|
||
mode_val,
|
||
${thresholdMin}::int,
|
||
prof,
|
||
CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END,
|
||
dist,
|
||
time_s,
|
||
score_val
|
||
FROM unnest(
|
||
${gpIdArr.slice(i, end)}::text[],
|
||
${catArr.slice(i, end)}::text[],
|
||
${modeArr.slice(i, end)}::text[],
|
||
${profileArr.slice(i, end)}::text[],
|
||
${poiIdArr.slice(i, end)}::text[],
|
||
${distArr.slice(i, end)}::float8[],
|
||
${timeArr.slice(i, end)}::float8[],
|
||
${scoreArr.slice(i, end)}::float8[]
|
||
) AS t(gp_id, cat, mode_val, prof, poi_id, dist, time_s, score_val)
|
||
ON CONFLICT (grid_point_id, category, travel_mode, threshold_min, profile)
|
||
DO UPDATE SET
|
||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
||
distance_m = EXCLUDED.distance_m,
|
||
travel_time_s = EXCLUDED.travel_time_s,
|
||
score = EXCLUDED.score,
|
||
computed_at = now()
|
||
`);
|
||
}
|
||
|
||
completedThresholds++;
|
||
await job.updateProgress({
|
||
stage: "Computing scores",
|
||
pct: 70 + Math.round((completedThresholds / thresholds.length) * 28),
|
||
message: `${completedThresholds} / ${thresholds.length} thresholds done…`,
|
||
} satisfies JobProgress);
|
||
}));
|
||
|
||
await Promise.resolve(sql`
|
||
UPDATE cities SET status = 'ready', last_ingested = now()
|
||
WHERE slug = ${citySlug}
|
||
`);
|
||
|
||
// Compute hidden gem scores per grid point for cities that have estate value zones.
|
||
// Each grid point looks up the nearest zone's price, ranks it within its accessibility
|
||
// decile, and stores hidden_gem_score = composite_accessibility × (1 − price_rank).
|
||
const gemThreshold = thresholds.includes(15) ? 15 : thresholds[0];
|
||
// Count only the latest year's zones so historical rows don't skew the check.
|
||
const [{ n }] = await Promise.resolve(sql<{ n: number }[]>`
|
||
SELECT count(*)::int AS n
|
||
FROM estate_value_zones ez
|
||
WHERE ez.city_slug = ${citySlug}
|
||
AND ez.value_eur_m2 IS NOT NULL
|
||
AND (ez.year IS NULL OR ez.year = (
|
||
SELECT MAX(year) FROM estate_value_zones
|
||
WHERE city_slug = ${citySlug} AND source = 'boris-ni' AND year IS NOT NULL
|
||
))
|
||
`);
|
||
if (n > 0) {
|
||
await job.updateProgress({
|
||
stage: "Computing scores",
|
||
pct: 99,
|
||
message: "Computing hidden gem scores…",
|
||
} satisfies JobProgress);
|
||
await Promise.resolve(sql`
|
||
WITH latest_year AS (
|
||
SELECT MAX(year) AS yr
|
||
FROM estate_value_zones
|
||
WHERE city_slug = ${citySlug} AND source = 'boris-ni'
|
||
),
|
||
grid_with_price AS (
|
||
-- For each grid point, get composite accessibility score and nearest latest-year zone price
|
||
SELECT
|
||
gp.id,
|
||
COALESCE(AVG(gs.score), 0) AS composite_score,
|
||
ROUND(COALESCE(AVG(gs.score), 0) * 10)::int AS score_decile,
|
||
(
|
||
SELECT ez.value_eur_m2
|
||
FROM estate_value_zones ez, latest_year
|
||
WHERE ez.city_slug = ${citySlug}
|
||
AND ez.value_eur_m2 IS NOT NULL
|
||
AND (ez.year IS NULL OR ez.year = latest_year.yr)
|
||
ORDER BY gp.geom <-> ez.geom
|
||
LIMIT 1
|
||
) AS value_eur_m2
|
||
FROM grid_points gp
|
||
JOIN grid_scores gs ON gs.grid_point_id = gp.id
|
||
WHERE gp.city_slug = ${citySlug}
|
||
AND gs.travel_mode = 'walking'
|
||
AND gs.threshold_min = ${gemThreshold}
|
||
AND gs.profile = 'universal'
|
||
GROUP BY gp.id
|
||
),
|
||
ranked AS (
|
||
SELECT
|
||
id,
|
||
composite_score,
|
||
PERCENT_RANK() OVER (PARTITION BY score_decile ORDER BY value_eur_m2) AS price_rank
|
||
FROM grid_with_price
|
||
WHERE value_eur_m2 IS NOT NULL
|
||
)
|
||
UPDATE grid_points gp
|
||
SET hidden_gem_score = (ranked.composite_score * (1.0 - ranked.price_rank))::float4
|
||
FROM ranked WHERE gp.id = ranked.id
|
||
`);
|
||
}
|
||
|
||
await job.updateProgress({
|
||
stage: "Computing scores",
|
||
pct: 100,
|
||
message: `All scores computed for ${citySlug}`,
|
||
} satisfies JobProgress);
|
||
}
|