fix: do more in SQL, fix some worker behaviour and scheduling, add boris hb
This commit is contained in:
parent
a97ced96ce
commit
43d0dac789
13 changed files with 605 additions and 277 deletions
53
apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts
Normal file
53
apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts
Normal file
|
|
@ -0,0 +1,53 @@
|
||||||
|
import { NextRequest, NextResponse } from "next/server";
|
||||||
|
import { sql } from "@/lib/db";
|
||||||
|
import { getPipelineQueue } from "@/lib/queue";
|
||||||
|
import { VALID_THRESHOLDS } from "@transportationer/shared";
|
||||||
|
|
||||||
|
export const runtime = "nodejs";
|
||||||
|
|
||||||
|
/**
|
||||||
|
* POST /api/admin/cities/[slug]/rerun-scores
|
||||||
|
*
|
||||||
|
* Re-enqueues a compute-scores job with routingDispatched=true, which skips
|
||||||
|
* Phase 1 (routing dispatch) and goes straight to Phase 2 (score aggregation).
|
||||||
|
* Use this when compute-routing jobs already completed but compute-scores stalled
|
||||||
|
* or failed before finishing.
|
||||||
|
*/
|
||||||
|
export async function POST(
|
||||||
|
_req: NextRequest,
|
||||||
|
{ params }: { params: Promise<{ slug: string }> },
|
||||||
|
) {
|
||||||
|
const { slug } = await params;
|
||||||
|
|
||||||
|
const rows = await Promise.resolve(sql<{ slug: string }[]>`
|
||||||
|
SELECT slug FROM cities WHERE slug = ${slug}
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (rows.length === 0) {
|
||||||
|
return NextResponse.json({ error: "City not found" }, { status: 404 });
|
||||||
|
}
|
||||||
|
|
||||||
|
await Promise.resolve(sql`
|
||||||
|
UPDATE cities SET status = 'pending', error_message = NULL WHERE slug = ${slug}
|
||||||
|
`);
|
||||||
|
|
||||||
|
const queue = getPipelineQueue();
|
||||||
|
const job = await queue.add(
|
||||||
|
"compute-scores",
|
||||||
|
{
|
||||||
|
type: "compute-scores",
|
||||||
|
citySlug: slug,
|
||||||
|
modes: ["walking", "cycling", "driving", "transit"],
|
||||||
|
thresholds: [...VALID_THRESHOLDS],
|
||||||
|
routingDispatched: true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
attempts: 1,
|
||||||
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
jobId: `compute-scores.${slug}`,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
return NextResponse.json({ citySlug: slug, jobId: job.id }, { status: 202 });
|
||||||
|
}
|
||||||
|
|
@ -165,7 +165,7 @@ export async function POST(req: NextRequest) {
|
||||||
geofabrikUrl,
|
geofabrikUrl,
|
||||||
resolutionM: resolutionM as number,
|
resolutionM: resolutionM as number,
|
||||||
},
|
},
|
||||||
JOB_OPTIONS["refresh-city"],
|
{ ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` },
|
||||||
);
|
);
|
||||||
|
|
||||||
// Invalidate city list cache
|
// Invalidate city list cache
|
||||||
|
|
|
||||||
|
|
@ -29,7 +29,7 @@ export async function POST(
|
||||||
const job = await queue.add(
|
const job = await queue.add(
|
||||||
"refresh-city",
|
"refresh-city",
|
||||||
{ type: "refresh-city", citySlug: slug, geofabrikUrl },
|
{ type: "refresh-city", citySlug: slug, geofabrikUrl },
|
||||||
JOB_OPTIONS["refresh-city"],
|
{ ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` },
|
||||||
);
|
);
|
||||||
|
|
||||||
return NextResponse.json({ citySlug: slug, jobId: job.id }, { status: 202 });
|
return NextResponse.json({ citySlug: slug, jobId: job.id }, { status: 202 });
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
import { NextRequest } from "next/server";
|
import { NextRequest } from "next/server";
|
||||||
import { Job } from "bullmq";
|
import { Job } from "bullmq";
|
||||||
import { getPipelineQueue, getValhallaQueue, getValhallaTransitQueue } from "@/lib/queue";
|
import { getPipelineQueue, getDownloadQueue, getValhallaQueue, getValhallaTransitQueue } from "@/lib/queue";
|
||||||
import type { PipelineJobData, JobProgress, ComputeScoresJobData, RefreshCityJobData } from "@/lib/queue";
|
import type { PipelineJobData, JobProgress, ComputeScoresJobData, RefreshCityJobData } from "@/lib/queue";
|
||||||
import type { SSEEvent, RoutingDetail } from "@transportationer/shared";
|
import type { SSEEvent, RoutingDetail } from "@transportationer/shared";
|
||||||
import { CATEGORY_IDS } from "@transportationer/shared";
|
import { CATEGORY_IDS } from "@transportationer/shared";
|
||||||
|
|
@ -48,6 +48,7 @@ export async function GET(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const downloadQueue = getDownloadQueue();
|
||||||
const valhallaQueue = getValhallaQueue();
|
const valhallaQueue = getValhallaQueue();
|
||||||
const valhallaTransitQueue = getValhallaTransitQueue();
|
const valhallaTransitQueue = getValhallaTransitQueue();
|
||||||
|
|
||||||
|
|
@ -78,13 +79,15 @@ export async function GET(
|
||||||
}
|
}
|
||||||
|
|
||||||
// 1. Fetch active jobs and waiting-children jobs in parallel.
|
// 1. Fetch active jobs and waiting-children jobs in parallel.
|
||||||
const [pipelineActive, valhallaActive, valhallaTransitActive, waitingChildren] = await Promise.all([
|
const [pipelineActive, downloadActive, valhallaActive, valhallaTransitActive, waitingChildren] = await Promise.all([
|
||||||
queue.getActive(0, 100),
|
queue.getActive(0, 100),
|
||||||
|
downloadQueue.getActive(0, 100),
|
||||||
valhallaQueue.getActive(0, 100),
|
valhallaQueue.getActive(0, 100),
|
||||||
valhallaTransitQueue.getActive(0, 100),
|
valhallaTransitQueue.getActive(0, 100),
|
||||||
queue.getWaitingChildren(0, 200),
|
queue.getWaitingChildren(0, 200),
|
||||||
]);
|
]);
|
||||||
const allValhallaActive = [...valhallaActive, ...valhallaTransitActive];
|
const allValhallaActive = [...valhallaActive, ...valhallaTransitActive];
|
||||||
|
void downloadActive; // visible in job overview; not used for SSE progress
|
||||||
|
|
||||||
// 1a. Parallel routing phase: compute-scores is waiting for its routing
|
// 1a. Parallel routing phase: compute-scores is waiting for its routing
|
||||||
// children to finish. Report aggregate progress instead of one job's pct.
|
// children to finish. Report aggregate progress instead of one job's pct.
|
||||||
|
|
|
||||||
|
|
@ -1,37 +1,45 @@
|
||||||
import { NextResponse } from "next/server";
|
import { NextResponse } from "next/server";
|
||||||
import { getPipelineQueue, getValhallaQueue, getValhallaTransitQueue } from "@/lib/queue";
|
import { getPipelineQueue, getDownloadQueue, getValhallaQueue, getValhallaTransitQueue } from "@/lib/queue";
|
||||||
import type { JobSummary } from "@transportationer/shared";
|
import type { JobSummary } from "@transportationer/shared";
|
||||||
|
|
||||||
export const runtime = "nodejs";
|
export const runtime = "nodejs";
|
||||||
|
|
||||||
export async function GET() {
|
export async function GET() {
|
||||||
const [pQueue, vQueue, vtQueue] = [getPipelineQueue(), getValhallaQueue(), getValhallaTransitQueue()];
|
const [pQueue, dQueue, vQueue, vtQueue] = [getPipelineQueue(), getDownloadQueue(), getValhallaQueue(), getValhallaTransitQueue()];
|
||||||
|
|
||||||
const [
|
const [
|
||||||
pWaiting, pWaitingChildren, pActive, pCompleted, pFailed,
|
pWaiting, pWaitingChildren, pActive, pCompleted, pFailed,
|
||||||
vWaiting, vActive, vCompleted, vFailed,
|
dWaiting, dWaitingChildren, dActive, dCompleted, dFailed,
|
||||||
vtWaiting, vtActive, vtCompleted, vtFailed,
|
vWaiting, vWaitingChildren, vActive, vCompleted, vFailed,
|
||||||
|
vtWaiting, vtWaitingChildren, vtActive, vtCompleted, vtFailed,
|
||||||
] = await Promise.all([
|
] = await Promise.all([
|
||||||
pQueue.getWaiting(0, 20),
|
pQueue.getWaiting(0, 20),
|
||||||
pQueue.getWaitingChildren(0, 20),
|
pQueue.getWaitingChildren(0, 20),
|
||||||
pQueue.getActive(0, 20),
|
pQueue.getActive(0, 20),
|
||||||
pQueue.getCompleted(0, 20),
|
pQueue.getCompleted(0, 20),
|
||||||
pQueue.getFailed(0, 20),
|
pQueue.getFailed(0, 20),
|
||||||
|
dQueue.getWaiting(0, 20),
|
||||||
|
dQueue.getWaitingChildren(0, 20),
|
||||||
|
dQueue.getActive(0, 20),
|
||||||
|
dQueue.getCompleted(0, 20),
|
||||||
|
dQueue.getFailed(0, 20),
|
||||||
vQueue.getWaiting(0, 20),
|
vQueue.getWaiting(0, 20),
|
||||||
|
vQueue.getWaitingChildren(0, 20),
|
||||||
vQueue.getActive(0, 20),
|
vQueue.getActive(0, 20),
|
||||||
vQueue.getCompleted(0, 20),
|
vQueue.getCompleted(0, 20),
|
||||||
vQueue.getFailed(0, 20),
|
vQueue.getFailed(0, 20),
|
||||||
vtQueue.getWaiting(0, 20),
|
vtQueue.getWaiting(0, 20),
|
||||||
|
vtQueue.getWaitingChildren(0, 20),
|
||||||
vtQueue.getActive(0, 20),
|
vtQueue.getActive(0, 20),
|
||||||
vtQueue.getCompleted(0, 20),
|
vtQueue.getCompleted(0, 20),
|
||||||
vtQueue.getFailed(0, 20),
|
vtQueue.getFailed(0, 20),
|
||||||
]);
|
]);
|
||||||
|
|
||||||
const waitingChildren = [...pWaitingChildren];
|
const waitingChildren = [...pWaitingChildren, ...dWaitingChildren, ...vWaitingChildren, ...vtWaitingChildren];
|
||||||
const waiting = [...pWaiting, ...vWaiting, ...vtWaiting];
|
const waiting = [...pWaiting, ...dWaiting, ...vWaiting, ...vtWaiting];
|
||||||
const active = [...pActive, ...vActive, ...vtActive];
|
const active = [...pActive, ...dActive, ...vActive, ...vtActive];
|
||||||
const completed = [...pCompleted, ...vCompleted, ...vtCompleted];
|
const completed = [...pCompleted, ...dCompleted, ...vCompleted, ...vtCompleted];
|
||||||
const failed = [...pFailed, ...vFailed, ...vtFailed];
|
const failed = [...pFailed, ...dFailed, ...vFailed, ...vtFailed];
|
||||||
|
|
||||||
const all = [...active, ...waitingChildren, ...waiting, ...completed, ...failed];
|
const all = [...active, ...waitingChildren, ...waiting, ...completed, ...failed];
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -25,6 +25,9 @@ declare global {
|
||||||
var __pipelineQueue: Queue<any> | undefined;
|
var __pipelineQueue: Queue<any> | undefined;
|
||||||
// eslint-disable-next-line no-var
|
// eslint-disable-next-line no-var
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
var __downloadQueue: Queue<any> | undefined;
|
||||||
|
// eslint-disable-next-line no-var
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
var __valhallaQueue: Queue<any> | undefined;
|
var __valhallaQueue: Queue<any> | undefined;
|
||||||
// eslint-disable-next-line no-var
|
// eslint-disable-next-line no-var
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
|
@ -46,6 +49,22 @@ export function getPipelineQueue(): Queue<any> {
|
||||||
return globalThis.__pipelineQueue;
|
return globalThis.__pipelineQueue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/** Queue for download-pbf jobs — concurrency 1 in the worker prevents parallel downloads. */
|
||||||
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
|
export function getDownloadQueue(): Queue<any> {
|
||||||
|
if (!globalThis.__downloadQueue) {
|
||||||
|
globalThis.__downloadQueue = new Queue("download", {
|
||||||
|
connection: createBullMQConnection(),
|
||||||
|
defaultJobOptions: {
|
||||||
|
attempts: 2,
|
||||||
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
},
|
||||||
|
});
|
||||||
|
}
|
||||||
|
return globalThis.__downloadQueue;
|
||||||
|
}
|
||||||
|
|
||||||
/** Queue for build-valhalla jobs, processed by the valhalla-worker container. */
|
/** Queue for build-valhalla jobs, processed by the valhalla-worker container. */
|
||||||
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
||||||
export function getValhallaQueue(): Queue<any> {
|
export function getValhallaQueue(): Queue<any> {
|
||||||
|
|
|
||||||
|
|
@ -32,6 +32,8 @@ export interface ComputeScoresJobData {
|
||||||
routingDispatched?: boolean;
|
routingDispatched?: boolean;
|
||||||
/** When true, ingest-boris-ni is dispatched in Phase 1 to run alongside routing jobs. */
|
/** When true, ingest-boris-ni is dispatched in Phase 1 to run alongside routing jobs. */
|
||||||
ingestBorisNi?: boolean;
|
ingestBorisNi?: boolean;
|
||||||
|
/** When true, ingest-boris-hb is dispatched in Phase 1 to run alongside routing jobs. */
|
||||||
|
ingestBorisHb?: boolean;
|
||||||
}
|
}
|
||||||
|
|
||||||
export interface ComputeRoutingJobData {
|
export interface ComputeRoutingJobData {
|
||||||
|
|
@ -71,6 +73,11 @@ export interface IngestBorisNiJobData {
|
||||||
citySlug: string;
|
citySlug: string;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
export interface IngestBorisHbJobData {
|
||||||
|
type: "ingest-boris-hb";
|
||||||
|
citySlug: string;
|
||||||
|
}
|
||||||
|
|
||||||
export interface DownloadGtfsDeJobData {
|
export interface DownloadGtfsDeJobData {
|
||||||
type: "download-gtfs-de";
|
type: "download-gtfs-de";
|
||||||
url: string;
|
url: string;
|
||||||
|
|
@ -94,6 +101,7 @@ export type PipelineJobData =
|
||||||
| BuildValhallaJobData
|
| BuildValhallaJobData
|
||||||
| RefreshCityJobData
|
| RefreshCityJobData
|
||||||
| IngestBorisNiJobData
|
| IngestBorisNiJobData
|
||||||
|
| IngestBorisHbJobData
|
||||||
| DownloadGtfsDeJobData
|
| DownloadGtfsDeJobData
|
||||||
| ComputeTransitJobData;
|
| ComputeTransitJobData;
|
||||||
|
|
||||||
|
|
@ -154,4 +162,10 @@ export const JOB_OPTIONS: Record<PipelineJobData["type"], object> = {
|
||||||
removeOnComplete: { age: 86400 * 7 },
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
removeOnFail: { age: 86400 * 30 },
|
removeOnFail: { age: 86400 * 30 },
|
||||||
},
|
},
|
||||||
|
"ingest-boris-hb": {
|
||||||
|
attempts: 2,
|
||||||
|
backoff: { type: "fixed", delay: 5000 },
|
||||||
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
},
|
||||||
};
|
};
|
||||||
|
|
|
||||||
|
|
@ -9,10 +9,40 @@ import { handleComputeScores } from "./jobs/compute-scores.js";
|
||||||
import { handleComputeRouting } from "./jobs/compute-routing.js";
|
import { handleComputeRouting } from "./jobs/compute-routing.js";
|
||||||
import { handleRefreshCity } from "./jobs/refresh-city.js";
|
import { handleRefreshCity } from "./jobs/refresh-city.js";
|
||||||
import { handleIngestBorisNi } from "./jobs/ingest-boris-ni.js";
|
import { handleIngestBorisNi } from "./jobs/ingest-boris-ni.js";
|
||||||
|
import { handleIngestBorisHb } from "./jobs/ingest-boris-hb.js";
|
||||||
import { handleComputeTransit } from "./jobs/compute-transit.js";
|
import { handleComputeTransit } from "./jobs/compute-transit.js";
|
||||||
|
|
||||||
console.log("[worker] Starting Transportationer pipeline worker…");
|
console.log("[worker] Starting Transportationer pipeline worker…");
|
||||||
|
|
||||||
|
// Dedicated download worker — concurrency 1 ensures at most one PBF download
|
||||||
|
// runs at a time. The handler is idempotent (skips if file already on disk),
|
||||||
|
// so the three parallel download-pbf children from a single city refresh
|
||||||
|
// execute sequentially: the first downloads, the other two skip immediately.
|
||||||
|
const downloadWorker = new Worker<PipelineJobData>(
|
||||||
|
"download",
|
||||||
|
async (job: Job<PipelineJobData>) => {
|
||||||
|
if (job.data.type === "download-pbf") return handleDownloadPbf(job as Job<any>);
|
||||||
|
throw new Error(`Unexpected job type on download queue: ${(job.data as any).type}`);
|
||||||
|
},
|
||||||
|
{
|
||||||
|
connection: createBullMQConnection(),
|
||||||
|
concurrency: 1,
|
||||||
|
lockDuration: 300_000,
|
||||||
|
lockRenewTime: 15_000,
|
||||||
|
maxStalledCount: 3,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
|
downloadWorker.on("completed", (job) =>
|
||||||
|
console.log(`[download-worker] ✓ Job ${job.id} (${job.data.type}) completed`),
|
||||||
|
);
|
||||||
|
downloadWorker.on("failed", (job, err) =>
|
||||||
|
console.error(`[download-worker] ✗ Job ${job?.id} failed:`, err.message),
|
||||||
|
);
|
||||||
|
downloadWorker.on("active", (job) =>
|
||||||
|
console.log(`[download-worker] → Job ${job.id} (${job.data.type}) started`),
|
||||||
|
);
|
||||||
|
|
||||||
const worker = new Worker<PipelineJobData>(
|
const worker = new Worker<PipelineJobData>(
|
||||||
"pipeline",
|
"pipeline",
|
||||||
async (job: Job<PipelineJobData>, token?: string) => {
|
async (job: Job<PipelineJobData>, token?: string) => {
|
||||||
|
|
@ -33,6 +63,8 @@ const worker = new Worker<PipelineJobData>(
|
||||||
return handleRefreshCity(job as Job<any>);
|
return handleRefreshCity(job as Job<any>);
|
||||||
case "ingest-boris-ni":
|
case "ingest-boris-ni":
|
||||||
return handleIngestBorisNi(job as Job<any>);
|
return handleIngestBorisNi(job as Job<any>);
|
||||||
|
case "ingest-boris-hb":
|
||||||
|
return handleIngestBorisHb(job as Job<any>);
|
||||||
case "compute-transit":
|
case "compute-transit":
|
||||||
return handleComputeTransit(job as Job<any>);
|
return handleComputeTransit(job as Job<any>);
|
||||||
default:
|
default:
|
||||||
|
|
@ -48,6 +80,7 @@ const worker = new Worker<PipelineJobData>(
|
||||||
concurrency: 8,
|
concurrency: 8,
|
||||||
lockDuration: 300_000, // 5 minutes — download jobs can be slow
|
lockDuration: 300_000, // 5 minutes — download jobs can be slow
|
||||||
lockRenewTime: 15_000, // Renew every 15s
|
lockRenewTime: 15_000, // Renew every 15s
|
||||||
|
maxStalledCount: 3, // Allow up to 3 stalls before failing (large city jobs can be slow)
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
|
|
@ -85,7 +118,7 @@ worker.on("error", (err) => {
|
||||||
|
|
||||||
const shutdown = async () => {
|
const shutdown = async () => {
|
||||||
console.log("[worker] Shutting down gracefully…");
|
console.log("[worker] Shutting down gracefully…");
|
||||||
await worker.close();
|
await Promise.all([worker.close(), downloadWorker.close()]);
|
||||||
process.exit(0);
|
process.exit(0);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -13,7 +13,7 @@ export type ComputeRoutingData = {
|
||||||
/** Number of nearest POI candidates per grid point. */
|
/** Number of nearest POI candidates per grid point. */
|
||||||
const K = 6;
|
const K = 6;
|
||||||
/** Grid points per Valhalla matrix call. */
|
/** Grid points per Valhalla matrix call. */
|
||||||
const BATCH_SIZE = 20;
|
const BATCH_SIZE = 5;
|
||||||
/** Concurrent Valhalla calls within this job. */
|
/** Concurrent Valhalla calls within this job. */
|
||||||
const BATCH_CONCURRENCY = 4;
|
const BATCH_CONCURRENCY = 4;
|
||||||
/** Rows per INSERT. */
|
/** Rows per INSERT. */
|
||||||
|
|
|
||||||
|
|
@ -6,37 +6,9 @@ import type { JobProgress, ComputeScoresJobData as ComputeScoresData } from "@tr
|
||||||
import {
|
import {
|
||||||
CATEGORY_IDS,
|
CATEGORY_IDS,
|
||||||
PROFILES,
|
PROFILES,
|
||||||
PROFILE_IDS,
|
|
||||||
DEFAULT_SUBCATEGORY_WEIGHT,
|
DEFAULT_SUBCATEGORY_WEIGHT,
|
||||||
} from "@transportationer/shared";
|
} from "@transportationer/shared";
|
||||||
|
|
||||||
const INSERT_CHUNK = 2000;
|
|
||||||
|
|
||||||
function subcategoryWeight(profileId: string, subcategory: string): number {
|
|
||||||
const weights = PROFILES[profileId as keyof typeof PROFILES]?.subcategoryWeights;
|
|
||||||
if (!weights) return DEFAULT_SUBCATEGORY_WEIGHT;
|
|
||||||
return weights[subcategory] ?? DEFAULT_SUBCATEGORY_WEIGHT;
|
|
||||||
}
|
|
||||||
|
|
||||||
function sigmoid(t_s: number, threshold_s: number): number {
|
|
||||||
return 1 / (1 + Math.exp(4 * (t_s - threshold_s) / threshold_s));
|
|
||||||
}
|
|
||||||
|
|
||||||
function complementProduct(
|
|
||||||
subcategoryTimes: Array<{ subcategory: string; timeS: number | null }>,
|
|
||||||
threshold_s: number,
|
|
||||||
profileId: string,
|
|
||||||
): number {
|
|
||||||
let logProd = 0;
|
|
||||||
let hasAny = false;
|
|
||||||
for (const { subcategory, timeS } of subcategoryTimes) {
|
|
||||||
const weight = subcategoryWeight(profileId, subcategory);
|
|
||||||
if (timeS === null || weight <= 0) continue;
|
|
||||||
hasAny = true;
|
|
||||||
logProd += Math.log(Math.max(1 - weight * sigmoid(timeS, threshold_s), 1e-10));
|
|
||||||
}
|
|
||||||
return hasAny ? 1 - Math.exp(logProd) : 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Two-phase orchestrator for accessibility score computation.
|
* Two-phase orchestrator for accessibility score computation.
|
||||||
|
|
@ -91,8 +63,25 @@ export async function handleComputeScores(
|
||||||
// parallel with the routing jobs rather than sequentially after them.
|
// parallel with the routing jobs rather than sequentially after them.
|
||||||
const queue = new Queue("pipeline", { connection: createBullMQConnection() });
|
const queue = new Queue("pipeline", { connection: createBullMQConnection() });
|
||||||
try {
|
try {
|
||||||
|
// Dispatch transit scoring first so it starts immediately while the
|
||||||
|
// compute-routing jobs queue up behind it (transit is the slowest leg).
|
||||||
|
if (modes.includes("transit")) {
|
||||||
|
await queue.add(
|
||||||
|
"compute-transit",
|
||||||
|
{ type: "compute-transit", citySlug },
|
||||||
|
{
|
||||||
|
attempts: 1,
|
||||||
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
jobId: `compute-transit.${citySlug}`,
|
||||||
|
parent: { id: job.id!, queue: queue.qualifiedName },
|
||||||
|
ignoreDependencyOnFailure: true,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
for (const mode of modes) {
|
for (const mode of modes) {
|
||||||
if (mode === "transit") continue; // handled below as a single job
|
if (mode === "transit") continue; // already dispatched above
|
||||||
for (const category of CATEGORY_IDS) {
|
for (const category of CATEGORY_IDS) {
|
||||||
await queue.add(
|
await queue.add(
|
||||||
"compute-routing",
|
"compute-routing",
|
||||||
|
|
@ -102,10 +91,9 @@ export async function handleComputeScores(
|
||||||
backoff: { type: "fixed", delay: 3000 },
|
backoff: { type: "fixed", delay: 3000 },
|
||||||
removeOnComplete: { age: 86400 * 7 },
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
removeOnFail: { age: 86400 * 30 },
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
jobId: `compute-routing.${citySlug}.${mode}.${category}`,
|
||||||
parent: {
|
parent: {
|
||||||
id: job.id!,
|
id: job.id!,
|
||||||
// qualifiedName = "bull:pipeline" — the Redis key BullMQ uses
|
|
||||||
// to track parent/child relationships.
|
|
||||||
queue: queue.qualifiedName,
|
queue: queue.qualifiedName,
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
|
@ -113,24 +101,6 @@ export async function handleComputeScores(
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Dispatch transit scoring as a sibling child (one job covers all categories
|
|
||||||
// via PostGIS isochrone spatial joins, unlike per-category routing jobs).
|
|
||||||
if (modes.includes("transit")) {
|
|
||||||
await queue.add(
|
|
||||||
"compute-transit",
|
|
||||||
{ type: "compute-transit", citySlug },
|
|
||||||
{
|
|
||||||
attempts: 1,
|
|
||||||
removeOnComplete: { age: 86400 * 7 },
|
|
||||||
removeOnFail: { age: 86400 * 30 },
|
|
||||||
parent: { id: job.id!, queue: queue.qualifiedName },
|
|
||||||
// Transit is optional — a failure should not cascade to the parent.
|
|
||||||
// The city will be marked ready with walking/cycling scores only.
|
|
||||||
ignoreDependencyOnFailure: true,
|
|
||||||
},
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Dispatch BORIS NI ingest as a sibling child so it runs during routing.
|
// Dispatch BORIS NI ingest as a sibling child so it runs during routing.
|
||||||
if (job.data.ingestBorisNi) {
|
if (job.data.ingestBorisNi) {
|
||||||
await queue.add(
|
await queue.add(
|
||||||
|
|
@ -141,8 +111,25 @@ export async function handleComputeScores(
|
||||||
backoff: { type: "fixed", delay: 5000 },
|
backoff: { type: "fixed", delay: 5000 },
|
||||||
removeOnComplete: { age: 86400 * 7 },
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
removeOnFail: { age: 86400 * 30 },
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
jobId: `ingest-boris-ni.${citySlug}`,
|
||||||
|
parent: { id: job.id!, queue: queue.qualifiedName },
|
||||||
|
ignoreDependencyOnFailure: true,
|
||||||
|
},
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Dispatch BORIS HB ingest (Stadt Bremen + Bremerhaven) similarly.
|
||||||
|
if (job.data.ingestBorisHb) {
|
||||||
|
await queue.add(
|
||||||
|
"ingest-boris-hb",
|
||||||
|
{ type: "ingest-boris-hb", citySlug },
|
||||||
|
{
|
||||||
|
attempts: 2,
|
||||||
|
backoff: { type: "fixed", delay: 5000 },
|
||||||
|
removeOnComplete: { age: 86400 * 7 },
|
||||||
|
removeOnFail: { age: 86400 * 30 },
|
||||||
|
jobId: `ingest-boris-hb.${citySlug}`,
|
||||||
parent: { id: job.id!, queue: queue.qualifiedName },
|
parent: { id: job.id!, queue: queue.qualifiedName },
|
||||||
// Boris NI data is optional — failure should not cancel the pipeline.
|
|
||||||
ignoreDependencyOnFailure: true,
|
ignoreDependencyOnFailure: true,
|
||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
@ -162,214 +149,155 @@ export async function handleComputeScores(
|
||||||
}
|
}
|
||||||
|
|
||||||
// ── Phase 2: aggregate scores from grid_poi_details ──────────────────────
|
// ── Phase 2: aggregate scores from grid_poi_details ──────────────────────
|
||||||
|
// All computation runs inside a single SQL CTE chain so the DBMS can
|
||||||
|
// optimise joins and aggregations without streaming data through Node.js.
|
||||||
await job.updateProgress({
|
await job.updateProgress({
|
||||||
stage: "Aggregating scores",
|
stage: "Aggregating scores",
|
||||||
pct: 70,
|
pct: 70,
|
||||||
message: `All routing complete — computing profile scores…`,
|
message: `All routing complete — computing profile scores…`,
|
||||||
} satisfies JobProgress);
|
} satisfies JobProgress);
|
||||||
|
|
||||||
// Load all per-subcategory routing results for this city in one query.
|
// Build parallel arrays for SQL unnest: one row per (profile, subcategory).
|
||||||
// Ordered by distance so the first row per (gpId, mode, category) is nearest.
|
const pwProfiles: string[] = [];
|
||||||
const detailRows = await Promise.resolve(sql<{
|
const pwSubcats: string[] = [];
|
||||||
grid_point_id: string;
|
const pwWeights: number[] = [];
|
||||||
category: string;
|
for (const [profileId, profile] of Object.entries(PROFILES)) {
|
||||||
subcategory: string;
|
for (const [subcategory, weight] of Object.entries(profile.subcategoryWeights)) {
|
||||||
travel_mode: string;
|
pwProfiles.push(profileId);
|
||||||
nearest_poi_id: string | null;
|
pwSubcats.push(subcategory);
|
||||||
distance_m: number | null;
|
pwWeights.push(weight);
|
||||||
travel_time_s: number | null;
|
}
|
||||||
}[]>`
|
}
|
||||||
|
|
||||||
|
await Promise.resolve(sql`
|
||||||
|
WITH
|
||||||
|
base AS (
|
||||||
|
SELECT
|
||||||
|
gpd.grid_point_id,
|
||||||
|
gpd.category,
|
||||||
|
gpd.subcategory,
|
||||||
|
gpd.travel_mode,
|
||||||
|
gpd.nearest_poi_id,
|
||||||
|
gpd.distance_m,
|
||||||
|
gpd.travel_time_s
|
||||||
|
FROM grid_poi_details gpd
|
||||||
|
JOIN grid_points gp ON gp.id = gpd.grid_point_id
|
||||||
|
WHERE gp.city_slug = ${citySlug}
|
||||||
|
),
|
||||||
|
fifteen_subcat AS (
|
||||||
|
-- "fifteen" mode: best (lowest) travel time across walking / cycling / transit
|
||||||
|
SELECT
|
||||||
|
grid_point_id, category, subcategory,
|
||||||
|
'fifteen'::text AS travel_mode,
|
||||||
|
MIN(travel_time_s) AS travel_time_s
|
||||||
|
FROM base
|
||||||
|
WHERE travel_mode IN ('walking', 'cycling', 'transit')
|
||||||
|
GROUP BY grid_point_id, category, subcategory
|
||||||
|
),
|
||||||
|
all_subcat AS (
|
||||||
|
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM base
|
||||||
|
UNION ALL
|
||||||
|
SELECT grid_point_id, category, subcategory, travel_mode, travel_time_s FROM fifteen_subcat
|
||||||
|
),
|
||||||
|
road_nearest AS (
|
||||||
|
-- Nearest POI per (grid_point, category, mode) by distance
|
||||||
|
SELECT DISTINCT ON (grid_point_id, category, travel_mode)
|
||||||
|
grid_point_id, category, travel_mode, nearest_poi_id, distance_m, travel_time_s
|
||||||
|
FROM base
|
||||||
|
WHERE nearest_poi_id IS NOT NULL
|
||||||
|
ORDER BY grid_point_id, category, travel_mode, distance_m
|
||||||
|
),
|
||||||
|
fifteen_nearest AS (
|
||||||
|
-- Nearest POI for "fifteen": closest across walking / cycling / transit
|
||||||
|
SELECT DISTINCT ON (grid_point_id, category)
|
||||||
|
grid_point_id, category,
|
||||||
|
'fifteen'::text AS travel_mode,
|
||||||
|
nearest_poi_id, distance_m, travel_time_s
|
||||||
|
FROM base
|
||||||
|
WHERE travel_mode IN ('walking', 'cycling', 'transit')
|
||||||
|
AND nearest_poi_id IS NOT NULL
|
||||||
|
ORDER BY grid_point_id, category, distance_m
|
||||||
|
),
|
||||||
|
all_nearest AS (
|
||||||
|
SELECT * FROM road_nearest
|
||||||
|
UNION ALL
|
||||||
|
SELECT * FROM fifteen_nearest
|
||||||
|
),
|
||||||
|
profile_weights AS (
|
||||||
|
SELECT profile_id, subcategory, weight
|
||||||
|
FROM unnest(
|
||||||
|
${pwProfiles}::text[],
|
||||||
|
${pwSubcats}::text[],
|
||||||
|
${pwWeights}::float8[]
|
||||||
|
) AS t(profile_id, subcategory, weight)
|
||||||
|
),
|
||||||
|
thresholds_t AS (
|
||||||
|
SELECT unnest(${thresholds}::int[]) AS threshold_min
|
||||||
|
),
|
||||||
|
scores AS (
|
||||||
|
-- Complement-product score per (grid_point, category, mode, threshold, profile).
|
||||||
|
-- sigmoid(t) = 1 / (1 + exp((t − T) / (T/6))) where T = threshold in seconds.
|
||||||
|
-- score = 1 − ∏(1 − w·sigmoid) computed via EXP(SUM(LN(complement))).
|
||||||
|
-- NULL travel_time_s → sigmoid = 0 → complement = 1 → LN(1) = 0 (no penalty).
|
||||||
|
SELECT
|
||||||
|
s.grid_point_id,
|
||||||
|
s.category,
|
||||||
|
s.travel_mode,
|
||||||
|
t.threshold_min,
|
||||||
|
p.profile_id,
|
||||||
|
1.0 - EXP(SUM(
|
||||||
|
LN(GREATEST(
|
||||||
|
1.0 - COALESCE(pw.weight, ${DEFAULT_SUBCATEGORY_WEIGHT}::float8)
|
||||||
|
* CASE
|
||||||
|
WHEN s.travel_time_s IS NULL THEN 0.0
|
||||||
|
ELSE 1.0 / (1.0 + EXP(
|
||||||
|
(s.travel_time_s - t.threshold_min * 60.0)
|
||||||
|
/ (t.threshold_min * 10.0)
|
||||||
|
))
|
||||||
|
END,
|
||||||
|
1e-10
|
||||||
|
))
|
||||||
|
)) AS score
|
||||||
|
FROM all_subcat s
|
||||||
|
JOIN thresholds_t t ON true
|
||||||
|
CROSS JOIN (SELECT DISTINCT profile_id FROM profile_weights) p
|
||||||
|
LEFT JOIN profile_weights pw
|
||||||
|
ON pw.profile_id = p.profile_id AND pw.subcategory = s.subcategory
|
||||||
|
GROUP BY s.grid_point_id, s.category, s.travel_mode, t.threshold_min, p.profile_id
|
||||||
|
)
|
||||||
|
INSERT INTO grid_scores (
|
||||||
|
grid_point_id, category, travel_mode, threshold_min, profile,
|
||||||
|
nearest_poi_id, distance_m, travel_time_s, score
|
||||||
|
)
|
||||||
SELECT
|
SELECT
|
||||||
gpd.grid_point_id::text,
|
sc.grid_point_id,
|
||||||
gpd.category,
|
sc.category,
|
||||||
gpd.subcategory,
|
sc.travel_mode,
|
||||||
gpd.travel_mode,
|
sc.threshold_min,
|
||||||
gpd.nearest_poi_id::text,
|
sc.profile_id,
|
||||||
gpd.distance_m,
|
n.nearest_poi_id,
|
||||||
gpd.travel_time_s
|
n.distance_m,
|
||||||
FROM grid_poi_details gpd
|
n.travel_time_s,
|
||||||
JOIN grid_points gp ON gp.id = gpd.grid_point_id
|
sc.score
|
||||||
WHERE gp.city_slug = ${citySlug}
|
FROM scores sc
|
||||||
ORDER BY gpd.grid_point_id, gpd.travel_mode, gpd.category, gpd.distance_m
|
LEFT JOIN all_nearest n
|
||||||
|
ON n.grid_point_id = sc.grid_point_id
|
||||||
|
AND n.category = sc.category
|
||||||
|
AND n.travel_mode = sc.travel_mode
|
||||||
|
ON CONFLICT (grid_point_id, category, travel_mode, threshold_min, profile)
|
||||||
|
DO UPDATE SET
|
||||||
|
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
||||||
|
distance_m = EXCLUDED.distance_m,
|
||||||
|
travel_time_s = EXCLUDED.travel_time_s,
|
||||||
|
score = EXCLUDED.score,
|
||||||
|
computed_at = now()
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Build in-memory structure keyed by "gpId:mode:category".
|
await job.updateProgress({
|
||||||
type GroupEntry = {
|
stage: "Aggregating scores",
|
||||||
gpId: string;
|
pct: 98,
|
||||||
mode: string;
|
message: `All scores computed for ${citySlug}`,
|
||||||
category: string;
|
} satisfies JobProgress);
|
||||||
subcategoryTimes: Array<{ subcategory: string; timeS: number | null }>;
|
|
||||||
nearestPoiId: string | null;
|
|
||||||
nearestDistM: number | null;
|
|
||||||
nearestTimeS: number | null;
|
|
||||||
};
|
|
||||||
const groups = new Map<string, GroupEntry>();
|
|
||||||
|
|
||||||
for (const row of detailRows) {
|
|
||||||
const key = `${row.grid_point_id}:${row.travel_mode}:${row.category}`;
|
|
||||||
let entry = groups.get(key);
|
|
||||||
if (!entry) {
|
|
||||||
entry = {
|
|
||||||
gpId: row.grid_point_id,
|
|
||||||
mode: row.travel_mode,
|
|
||||||
category: row.category,
|
|
||||||
subcategoryTimes: [],
|
|
||||||
nearestPoiId: null,
|
|
||||||
nearestDistM: null,
|
|
||||||
nearestTimeS: null,
|
|
||||||
};
|
|
||||||
groups.set(key, entry);
|
|
||||||
}
|
|
||||||
entry.subcategoryTimes.push({ subcategory: row.subcategory, timeS: row.travel_time_s });
|
|
||||||
// Track the overall nearest POI for this category (minimum distance).
|
|
||||||
if (
|
|
||||||
row.distance_m !== null &&
|
|
||||||
(entry.nearestDistM === null || row.distance_m < entry.nearestDistM)
|
|
||||||
) {
|
|
||||||
entry.nearestPoiId = row.nearest_poi_id;
|
|
||||||
entry.nearestDistM = row.distance_m;
|
|
||||||
entry.nearestTimeS = row.travel_time_s;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Synthesize "multimodal" groups: for each (gpId, category, subcategory),
|
|
||||||
// take the minimum travel time across walking and cycling so that a
|
|
||||||
// destination reachable by either mode counts as accessible.
|
|
||||||
// Driving is intentionally excluded (not a 15-min city metric).
|
|
||||||
const MULTIMODAL_MODES = new Set(["walking", "cycling", "transit"]); // modes combined into "fifteen"
|
|
||||||
const mmAccumulator = new Map<string, {
|
|
||||||
gpId: string;
|
|
||||||
category: string;
|
|
||||||
subTimes: Map<string, number | null>;
|
|
||||||
nearestDistM: number | null;
|
|
||||||
nearestPoiId: string | null;
|
|
||||||
nearestTimeS: number | null;
|
|
||||||
}>();
|
|
||||||
|
|
||||||
for (const entry of groups.values()) {
|
|
||||||
if (!MULTIMODAL_MODES.has(entry.mode)) continue;
|
|
||||||
const mmKey = `${entry.gpId}:${entry.category}`;
|
|
||||||
if (!mmAccumulator.has(mmKey)) {
|
|
||||||
mmAccumulator.set(mmKey, {
|
|
||||||
gpId: entry.gpId,
|
|
||||||
category: entry.category,
|
|
||||||
subTimes: new Map(),
|
|
||||||
nearestDistM: null,
|
|
||||||
nearestPoiId: null,
|
|
||||||
nearestTimeS: null,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
const acc = mmAccumulator.get(mmKey)!;
|
|
||||||
// Track nearest POI across all multimodal modes
|
|
||||||
if (entry.nearestDistM !== null && (acc.nearestDistM === null || entry.nearestDistM < acc.nearestDistM)) {
|
|
||||||
acc.nearestDistM = entry.nearestDistM;
|
|
||||||
acc.nearestPoiId = entry.nearestPoiId;
|
|
||||||
acc.nearestTimeS = entry.nearestTimeS;
|
|
||||||
}
|
|
||||||
// For each subcategory, keep the minimum travel time across modes
|
|
||||||
for (const { subcategory, timeS } of entry.subcategoryTimes) {
|
|
||||||
const existing = acc.subTimes.get(subcategory);
|
|
||||||
if (existing === undefined) {
|
|
||||||
acc.subTimes.set(subcategory, timeS);
|
|
||||||
} else if (existing === null && timeS !== null) {
|
|
||||||
acc.subTimes.set(subcategory, timeS);
|
|
||||||
} else if (timeS !== null && existing !== null && timeS < existing) {
|
|
||||||
acc.subTimes.set(subcategory, timeS);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (const acc of mmAccumulator.values()) {
|
|
||||||
const key = `${acc.gpId}:fifteen:${acc.category}`;
|
|
||||||
groups.set(key, {
|
|
||||||
gpId: acc.gpId,
|
|
||||||
mode: "fifteen",
|
|
||||||
category: acc.category,
|
|
||||||
subcategoryTimes: Array.from(acc.subTimes.entries()).map(([subcategory, timeS]) => ({ subcategory, timeS })),
|
|
||||||
nearestPoiId: acc.nearestPoiId,
|
|
||||||
nearestDistM: acc.nearestDistM,
|
|
||||||
nearestTimeS: acc.nearestTimeS,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute and insert scores for every threshold × profile combination.
|
|
||||||
// Each threshold writes to distinct rows (threshold_min is part of the PK),
|
|
||||||
// so all thresholds can be processed concurrently without conflicts.
|
|
||||||
// Node.js is single-threaded so completedThresholds++ is safe.
|
|
||||||
let completedThresholds = 0;
|
|
||||||
|
|
||||||
await Promise.all(thresholds.map(async (thresholdMin) => {
|
|
||||||
const threshold_s = thresholdMin * 60;
|
|
||||||
|
|
||||||
const gpIdArr: string[] = [];
|
|
||||||
const catArr: string[] = [];
|
|
||||||
const modeArr: string[] = [];
|
|
||||||
const profileArr: string[] = [];
|
|
||||||
const poiIdArr: (string | null)[] = [];
|
|
||||||
const distArr: (number | null)[] = [];
|
|
||||||
const timeArr: (number | null)[] = [];
|
|
||||||
const scoreArr: number[] = [];
|
|
||||||
|
|
||||||
for (const entry of groups.values()) {
|
|
||||||
for (const profileId of PROFILE_IDS) {
|
|
||||||
gpIdArr.push(entry.gpId);
|
|
||||||
catArr.push(entry.category);
|
|
||||||
modeArr.push(entry.mode);
|
|
||||||
profileArr.push(profileId);
|
|
||||||
poiIdArr.push(entry.nearestPoiId);
|
|
||||||
distArr.push(entry.nearestDistM);
|
|
||||||
timeArr.push(entry.nearestTimeS);
|
|
||||||
scoreArr.push(complementProduct(entry.subcategoryTimes, threshold_s, profileId));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Chunks within a threshold stay sequential — with all thresholds running
|
|
||||||
// concurrently we already have up to thresholds.length parallel INSERT
|
|
||||||
// streams, which saturates the connection pool without overwhelming it.
|
|
||||||
for (let i = 0; i < gpIdArr.length; i += INSERT_CHUNK) {
|
|
||||||
const end = Math.min(i + INSERT_CHUNK, gpIdArr.length);
|
|
||||||
await Promise.resolve(sql`
|
|
||||||
INSERT INTO grid_scores (
|
|
||||||
grid_point_id, category, travel_mode, threshold_min, profile,
|
|
||||||
nearest_poi_id, distance_m, travel_time_s, score
|
|
||||||
)
|
|
||||||
SELECT
|
|
||||||
gp_id::bigint,
|
|
||||||
cat,
|
|
||||||
mode_val,
|
|
||||||
${thresholdMin}::int,
|
|
||||||
prof,
|
|
||||||
CASE WHEN poi_id IS NULL THEN NULL ELSE poi_id::bigint END,
|
|
||||||
dist,
|
|
||||||
time_s,
|
|
||||||
score_val
|
|
||||||
FROM unnest(
|
|
||||||
${gpIdArr.slice(i, end)}::text[],
|
|
||||||
${catArr.slice(i, end)}::text[],
|
|
||||||
${modeArr.slice(i, end)}::text[],
|
|
||||||
${profileArr.slice(i, end)}::text[],
|
|
||||||
${poiIdArr.slice(i, end)}::text[],
|
|
||||||
${distArr.slice(i, end)}::float8[],
|
|
||||||
${timeArr.slice(i, end)}::float8[],
|
|
||||||
${scoreArr.slice(i, end)}::float8[]
|
|
||||||
) AS t(gp_id, cat, mode_val, prof, poi_id, dist, time_s, score_val)
|
|
||||||
ON CONFLICT (grid_point_id, category, travel_mode, threshold_min, profile)
|
|
||||||
DO UPDATE SET
|
|
||||||
nearest_poi_id = EXCLUDED.nearest_poi_id,
|
|
||||||
distance_m = EXCLUDED.distance_m,
|
|
||||||
travel_time_s = EXCLUDED.travel_time_s,
|
|
||||||
score = EXCLUDED.score,
|
|
||||||
computed_at = now()
|
|
||||||
`);
|
|
||||||
}
|
|
||||||
|
|
||||||
completedThresholds++;
|
|
||||||
await job.updateProgress({
|
|
||||||
stage: "Aggregating scores",
|
|
||||||
pct: 70 + Math.round((completedThresholds / thresholds.length) * 28),
|
|
||||||
message: `${completedThresholds} / ${thresholds.length} thresholds done…`,
|
|
||||||
} satisfies JobProgress);
|
|
||||||
}));
|
|
||||||
|
|
||||||
await Promise.resolve(sql`
|
await Promise.resolve(sql`
|
||||||
UPDATE cities SET status = 'ready', last_ingested = now()
|
UPDATE cities SET status = 'ready', last_ingested = now()
|
||||||
|
|
|
||||||
255
worker/src/jobs/ingest-boris-hb.ts
Normal file
255
worker/src/jobs/ingest-boris-hb.ts
Normal file
|
|
@ -0,0 +1,255 @@
|
||||||
|
/**
|
||||||
|
* Ingest BORIS HB (Freie Hansestadt Bremen — Stadt Bremen + Bremerhaven) estate
|
||||||
|
* value zones.
|
||||||
|
*
|
||||||
|
* The HB WFS has a single endpoint (no year-specific variants like the NI source).
|
||||||
|
* The reference year is read from the `stichtag` date in the returned features.
|
||||||
|
*
|
||||||
|
* Both Stadt Bremen and Bremerhaven are covered by the same WFS endpoint. Since
|
||||||
|
* each city ingestion passes its own bbox as the WFS BBOX filter, the two cities
|
||||||
|
* are handled independently and their zones never overlap.
|
||||||
|
*
|
||||||
|
* WFS endpoint: https://www.geobasisdaten.niedersachsen.de/doorman/noauth/WFS_borisHB
|
||||||
|
*/
|
||||||
|
import type { Job } from "bullmq";
|
||||||
|
import { getSql } from "../db.js";
|
||||||
|
import type { JobProgress } from "@transportationer/shared";
|
||||||
|
|
||||||
|
export type IngestBorisHbData = {
|
||||||
|
type: "ingest-boris-hb";
|
||||||
|
citySlug: string;
|
||||||
|
};
|
||||||
|
|
||||||
|
const WFS_URL = "https://www.geobasisdaten.niedersachsen.de/doorman/noauth/WFS_borisHB";
|
||||||
|
const CHUNK = 500;
|
||||||
|
|
||||||
|
// ─── GML Parser (WFS 2.0 / GML 3.2) — same schema as boris:BR_BodenrichtwertZonal ──
|
||||||
|
|
||||||
|
function extractFirst(xml: string, localName: string): string | null {
|
||||||
|
const re = new RegExp(`<(?:[\\w]+:)?${localName}(?:\\s[^>]*)?>([^<]*)`, "");
|
||||||
|
const m = re.exec(xml);
|
||||||
|
return m ? m[1].trim() || null : null;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parsePosListCoords(posList: string): [number, number][] {
|
||||||
|
const nums = posList.trim().split(/\s+/).map(Number);
|
||||||
|
const coords: [number, number][] = [];
|
||||||
|
for (let i = 0; i + 1 < nums.length; i += 2) {
|
||||||
|
coords.push([nums[i + 1], nums[i]]); // lat,lon → lon,lat (GeoJSON)
|
||||||
|
}
|
||||||
|
return coords;
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseGmlPolygon(xml: string): { type: "Polygon"; coordinates: [number, number][][] } | null {
|
||||||
|
const extMatch = /<gml:exterior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/.exec(xml);
|
||||||
|
if (!extMatch) return null;
|
||||||
|
const exterior = parsePosListCoords(extMatch[1]);
|
||||||
|
if (exterior.length < 3) return null;
|
||||||
|
|
||||||
|
const rings: [number, number][][] = [exterior];
|
||||||
|
const intRe = /<gml:interior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/g;
|
||||||
|
let intMatch: RegExpExecArray | null;
|
||||||
|
while ((intMatch = intRe.exec(xml)) !== null) {
|
||||||
|
const interior = parsePosListCoords(intMatch[1]);
|
||||||
|
if (interior.length >= 3) rings.push(interior);
|
||||||
|
}
|
||||||
|
return { type: "Polygon", coordinates: rings };
|
||||||
|
}
|
||||||
|
|
||||||
|
type GeoJsonGeometry =
|
||||||
|
| { type: "Polygon"; coordinates: [number, number][][] }
|
||||||
|
| { type: "MultiPolygon"; coordinates: [number, number][][][] };
|
||||||
|
|
||||||
|
type EstateValueFeature = {
|
||||||
|
geometry: GeoJsonGeometry;
|
||||||
|
value: number | null;
|
||||||
|
zoneName: string | null;
|
||||||
|
usageType: string | null;
|
||||||
|
usageDetail: string | null;
|
||||||
|
devState: string | null;
|
||||||
|
stichtag: string | null;
|
||||||
|
};
|
||||||
|
|
||||||
|
function parseMember(xml: string): EstateValueFeature | null {
|
||||||
|
const value = extractFirst(xml, "bodenrichtwert");
|
||||||
|
const zoneName = extractFirst(xml, "bodenrichtwertzoneName");
|
||||||
|
const art = extractFirst(xml, "art");
|
||||||
|
const ergaenzung = extractFirst(xml, "ergaenzung");
|
||||||
|
const devState = extractFirst(xml, "entwicklungszustand");
|
||||||
|
const stichtag = extractFirst(xml, "stichtag");
|
||||||
|
|
||||||
|
// Only keep Bauland (B) zones designated for residential or mixed use.
|
||||||
|
if (devState !== "B") return null;
|
||||||
|
if (!art || (!art.startsWith("W") && !art.startsWith("M"))) return null;
|
||||||
|
|
||||||
|
let geometry: GeoJsonGeometry | null = null;
|
||||||
|
|
||||||
|
const polygonMatch = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(xml);
|
||||||
|
if (polygonMatch) {
|
||||||
|
geometry = parseGmlPolygon(polygonMatch[0]);
|
||||||
|
} else {
|
||||||
|
const multiMatch = /<gml:MultiSurface[^>]*>([\s\S]*?)<\/gml:MultiSurface>/.exec(xml);
|
||||||
|
if (multiMatch) {
|
||||||
|
const polys: [number, number][][][] = [];
|
||||||
|
const surfRe = /<gml:surfaceMember[^>]*>([\s\S]*?)<\/gml:surfaceMember>/g;
|
||||||
|
let surfMatch: RegExpExecArray | null;
|
||||||
|
while ((surfMatch = surfRe.exec(multiMatch[0])) !== null) {
|
||||||
|
const polyFrag = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(surfMatch[0]);
|
||||||
|
if (polyFrag) {
|
||||||
|
const poly = parseGmlPolygon(polyFrag[0]);
|
||||||
|
if (poly) polys.push(poly.coordinates);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (polys.length > 0) {
|
||||||
|
geometry = { type: "MultiPolygon", coordinates: polys };
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!geometry) return null;
|
||||||
|
|
||||||
|
return {
|
||||||
|
geometry,
|
||||||
|
value: value !== null ? parseFloat(value) : null,
|
||||||
|
zoneName,
|
||||||
|
usageType: art,
|
||||||
|
usageDetail: ergaenzung,
|
||||||
|
devState,
|
||||||
|
stichtag,
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
function parseGmlFeatures(gml: string): EstateValueFeature[] {
|
||||||
|
const features: EstateValueFeature[] = [];
|
||||||
|
const memberRe = /<wfs:member>([\s\S]*?)<\/wfs:member>/g;
|
||||||
|
let match: RegExpExecArray | null;
|
||||||
|
while ((match = memberRe.exec(gml)) !== null) {
|
||||||
|
const f = parseMember(match[1]);
|
||||||
|
if (f) features.push(f);
|
||||||
|
}
|
||||||
|
return features;
|
||||||
|
}
|
||||||
|
|
||||||
|
// ─── Job handler ──────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
export async function handleIngestBorisHb(job: Job<IngestBorisHbData>): Promise<void> {
|
||||||
|
const { citySlug } = job.data;
|
||||||
|
const sql = getSql();
|
||||||
|
|
||||||
|
await job.updateProgress({
|
||||||
|
stage: "Ingesting BORIS HB",
|
||||||
|
pct: 0,
|
||||||
|
message: `Reading bbox for ${citySlug}`,
|
||||||
|
} satisfies JobProgress);
|
||||||
|
|
||||||
|
const bboxRows = await Promise.resolve(sql<{
|
||||||
|
minlng: number; minlat: number; maxlng: number; maxlat: number;
|
||||||
|
}[]>`
|
||||||
|
SELECT
|
||||||
|
ST_XMin(bbox)::float AS minlng,
|
||||||
|
ST_YMin(bbox)::float AS minlat,
|
||||||
|
ST_XMax(bbox)::float AS maxlng,
|
||||||
|
ST_YMax(bbox)::float AS maxlat
|
||||||
|
FROM cities WHERE slug = ${citySlug} AND bbox IS NOT NULL
|
||||||
|
`);
|
||||||
|
|
||||||
|
if (bboxRows.length === 0) throw new Error(`No bbox set for city ${citySlug}`);
|
||||||
|
const { minlng, minlat, maxlng, maxlat } = bboxRows[0];
|
||||||
|
|
||||||
|
await job.updateProgress({
|
||||||
|
stage: "Ingesting BORIS HB",
|
||||||
|
pct: 10,
|
||||||
|
message: "Fetching zones from WFS…",
|
||||||
|
} satisfies JobProgress);
|
||||||
|
|
||||||
|
const url = new URL(WFS_URL);
|
||||||
|
url.searchParams.set("SERVICE", "WFS");
|
||||||
|
url.searchParams.set("VERSION", "2.0.0");
|
||||||
|
url.searchParams.set("REQUEST", "GetFeature");
|
||||||
|
url.searchParams.set("TYPENAMES", "boris:BR_BodenrichtwertZonal");
|
||||||
|
url.searchParams.set("SRSNAME", "urn:ogc:def:crs:EPSG::4326");
|
||||||
|
url.searchParams.set("BBOX", `${minlat},${minlng},${maxlat},${maxlng},urn:ogc:def:crs:EPSG::4326`);
|
||||||
|
url.searchParams.set("COUNT", "10000");
|
||||||
|
|
||||||
|
let gml: string;
|
||||||
|
try {
|
||||||
|
const res = await fetch(url.toString(), {
|
||||||
|
headers: { Accept: "application/gml+xml; version=3.2" },
|
||||||
|
signal: AbortSignal.timeout(60_000),
|
||||||
|
});
|
||||||
|
if (!res.ok) throw new Error(`WFS returned ${res.status}`);
|
||||||
|
gml = await res.text();
|
||||||
|
if (gml.includes("<ows:ExceptionReport")) throw new Error("WFS returned ExceptionReport");
|
||||||
|
} catch (err) {
|
||||||
|
throw new Error(`BORIS HB WFS fetch failed: ${err instanceof Error ? err.message : err}`);
|
||||||
|
}
|
||||||
|
|
||||||
|
const features = parseGmlFeatures(gml);
|
||||||
|
console.log(`[ingest-boris-hb] ${features.length} Bauland W/M zones for ${citySlug}`);
|
||||||
|
|
||||||
|
if (features.length === 0) {
|
||||||
|
await job.updateProgress({
|
||||||
|
stage: "Ingesting BORIS HB",
|
||||||
|
pct: 100,
|
||||||
|
message: "No residential/mixed zones found in bbox",
|
||||||
|
} satisfies JobProgress);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract the reference year from the first feature's stichtag (e.g. "2025-01-01" → 2025).
|
||||||
|
// All features from a single WFS response share the same reference date.
|
||||||
|
const year = features[0].stichtag
|
||||||
|
? parseInt(features[0].stichtag.substring(0, 4), 10)
|
||||||
|
: new Date().getFullYear();
|
||||||
|
|
||||||
|
// Clear existing BORIS HB data for this city before re-ingesting.
|
||||||
|
await Promise.resolve(sql`
|
||||||
|
DELETE FROM estate_value_zones WHERE city_slug = ${citySlug} AND source = 'boris-hb'
|
||||||
|
`);
|
||||||
|
|
||||||
|
await job.updateProgress({
|
||||||
|
stage: "Ingesting BORIS HB",
|
||||||
|
pct: 50,
|
||||||
|
message: `Inserting ${features.length} zones (year ${year})…`,
|
||||||
|
} satisfies JobProgress);
|
||||||
|
|
||||||
|
for (let i = 0; i < features.length; i += CHUNK) {
|
||||||
|
const chunk = features.slice(i, i + CHUNK);
|
||||||
|
const geomJsons = chunk.map((f) => JSON.stringify(f.geometry));
|
||||||
|
const values = chunk.map((f) => f.value);
|
||||||
|
const zoneNames = chunk.map((f) => f.zoneName);
|
||||||
|
const usageTypes = chunk.map((f) => f.usageType);
|
||||||
|
const usageDetails = chunk.map((f) => f.usageDetail);
|
||||||
|
const devStates = chunk.map((f) => f.devState);
|
||||||
|
const stichtags = chunk.map((f) => f.stichtag);
|
||||||
|
|
||||||
|
await Promise.resolve(sql`
|
||||||
|
INSERT INTO estate_value_zones
|
||||||
|
(city_slug, geom, value_eur_m2, zone_name, usage_type, usage_detail, dev_state, stichtag, source, year)
|
||||||
|
SELECT
|
||||||
|
${citySlug},
|
||||||
|
ST_SetSRID(ST_GeomFromGeoJSON(g), 4326),
|
||||||
|
v,
|
||||||
|
zn, ut, ud, ds, st,
|
||||||
|
'boris-hb',
|
||||||
|
${year}::smallint
|
||||||
|
FROM unnest(
|
||||||
|
${geomJsons}::text[],
|
||||||
|
${values}::numeric[],
|
||||||
|
${zoneNames}::text[],
|
||||||
|
${usageTypes}::text[],
|
||||||
|
${usageDetails}::text[],
|
||||||
|
${devStates}::text[],
|
||||||
|
${stichtags}::text[]
|
||||||
|
) AS t(g, v, zn, ut, ud, ds, st)
|
||||||
|
`);
|
||||||
|
}
|
||||||
|
|
||||||
|
console.log(`[ingest-boris-hb] ✓ Stored ${features.length} BORIS HB zones (year ${year}) for ${citySlug}`);
|
||||||
|
|
||||||
|
await job.updateProgress({
|
||||||
|
stage: "Ingesting BORIS HB",
|
||||||
|
pct: 100,
|
||||||
|
message: `BORIS HB ingest complete: ${features.length} zones (year ${year})`,
|
||||||
|
} satisfies JobProgress);
|
||||||
|
}
|
||||||
|
|
@ -21,6 +21,17 @@ function isInNiedersachsen(minLng: number, minLat: number, maxLng: number, maxLa
|
||||||
return minLng < 11.779 && maxLng > 6.526 && minLat < 54.033 && maxLat > 51.197;
|
return minLng < 11.779 && maxLng > 6.526 && minLat < 54.033 && maxLat > 51.197;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* True when the given bbox intersects the Freie Hansestadt Bremen (HB), which
|
||||||
|
* consists of two non-contiguous cities:
|
||||||
|
* - Stadt Bremen: ~52.95–53.23°N, 8.48–8.99°E
|
||||||
|
* - Bremerhaven: ~53.46–53.65°N, 8.48–8.68°E
|
||||||
|
* The WFS BBOX filter ensures each city only receives its own zones — no overlap.
|
||||||
|
*/
|
||||||
|
function isInBremen(minLng: number, minLat: number, maxLng: number, maxLat: number): boolean {
|
||||||
|
return minLng < 8.99 && maxLng > 8.48 && minLat < 53.65 && maxLat > 52.95;
|
||||||
|
}
|
||||||
|
|
||||||
export async function handleRefreshCity(
|
export async function handleRefreshCity(
|
||||||
job: Job<RefreshCityData>,
|
job: Job<RefreshCityData>,
|
||||||
): Promise<void> {
|
): Promise<void> {
|
||||||
|
|
@ -81,18 +92,21 @@ export async function handleRefreshCity(
|
||||||
UPDATE cities SET status = 'processing' WHERE slug = ${citySlug}
|
UPDATE cities SET status = 'processing' WHERE slug = ${citySlug}
|
||||||
`);
|
`);
|
||||||
|
|
||||||
// Shared download node factory — produces an idempotent download-pbf node.
|
// Shared download node factory — each call produces an independent download-pbf
|
||||||
// Two independent nodes with the same geofabrikUrl are safe: the idempotency
|
// job (no fixed jobId). Three branches each get their own job so BullMQ can
|
||||||
// check in download-pbf.ts skips the download if the file already exists.
|
// track each parent→child dependency correctly; the handler skips the actual
|
||||||
|
// download if the file already exists (atomic rename + idempotency check).
|
||||||
const downloadNode = () => ({
|
const downloadNode = () => ({
|
||||||
name: "download-pbf",
|
name: "download-pbf",
|
||||||
queueName: "pipeline",
|
queueName: "download",
|
||||||
data: { type: "download-pbf" as const, citySlug, geofabrikUrl },
|
data: { type: "download-pbf" as const, citySlug, geofabrikUrl },
|
||||||
opts: JOB_OPTIONS["download-pbf"],
|
opts: { ...JOB_OPTIONS["download-pbf"] },
|
||||||
});
|
});
|
||||||
|
|
||||||
// For NI cities: ingest-boris-ni is dispatched in Phase 1 of compute-scores.
|
// For NI cities: ingest-boris-ni is dispatched in Phase 1 of compute-scores.
|
||||||
const niApplicable = !!(bbox && isInNiedersachsen(...bbox));
|
const niApplicable = !!(bbox && isInNiedersachsen(...bbox));
|
||||||
|
// For HB cities (Stadt Bremen + Bremerhaven): ingest-boris-hb runs in Phase 1.
|
||||||
|
const hbApplicable = !!(bbox && isInBremen(...bbox));
|
||||||
|
|
||||||
// Parallel pipeline DAG (bottom-up — leaves execute first):
|
// Parallel pipeline DAG (bottom-up — leaves execute first):
|
||||||
//
|
//
|
||||||
|
|
@ -119,17 +133,18 @@ export async function handleRefreshCity(
|
||||||
modes: ["walking", "cycling", "driving", "transit"] as const,
|
modes: ["walking", "cycling", "driving", "transit"] as const,
|
||||||
thresholds: [...VALID_THRESHOLDS],
|
thresholds: [...VALID_THRESHOLDS],
|
||||||
ingestBorisNi: niApplicable,
|
ingestBorisNi: niApplicable,
|
||||||
|
ingestBorisHb: hbApplicable,
|
||||||
},
|
},
|
||||||
opts: JOB_OPTIONS["compute-scores"],
|
opts: { ...JOB_OPTIONS["compute-scores"], jobId: `compute-scores.${citySlug}` },
|
||||||
children: [
|
children: [
|
||||||
{
|
{
|
||||||
name: "generate-grid",
|
name: "generate-grid",
|
||||||
queueName: "pipeline",
|
queueName: "pipeline",
|
||||||
data: { type: "generate-grid" as const, citySlug, resolutionM },
|
data: { type: "generate-grid" as const, citySlug, resolutionM },
|
||||||
opts: JOB_OPTIONS["generate-grid"],
|
opts: { ...JOB_OPTIONS["generate-grid"], jobId: `generate-grid.${citySlug}` },
|
||||||
children: [
|
children: [
|
||||||
// Three parallel branches — each gets its own download-pbf child;
|
// Three parallel branches — all share a single download-pbf job via
|
||||||
// the idempotency guard ensures only one actually downloads when they race.
|
// the deterministic jobId; BullMQ deduplicates them to one download.
|
||||||
{
|
{
|
||||||
name: "extract-pois",
|
name: "extract-pois",
|
||||||
queueName: "pipeline",
|
queueName: "pipeline",
|
||||||
|
|
@ -139,7 +154,7 @@ export async function handleRefreshCity(
|
||||||
pbfPath,
|
pbfPath,
|
||||||
...(bbox ? { bbox } : {}),
|
...(bbox ? { bbox } : {}),
|
||||||
},
|
},
|
||||||
opts: JOB_OPTIONS["extract-pois"],
|
opts: { ...JOB_OPTIONS["extract-pois"], jobId: `extract-pois.${citySlug}` },
|
||||||
children: [downloadNode()],
|
children: [downloadNode()],
|
||||||
},
|
},
|
||||||
// Road-only Valhalla build — no GTFS, produces clean tiles without
|
// Road-only Valhalla build — no GTFS, produces clean tiles without
|
||||||
|
|
@ -153,7 +168,7 @@ export async function handleRefreshCity(
|
||||||
pbfPath,
|
pbfPath,
|
||||||
...(bbox ? { bbox } : {}),
|
...(bbox ? { bbox } : {}),
|
||||||
},
|
},
|
||||||
opts: JOB_OPTIONS["build-valhalla"],
|
opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla.${citySlug}` },
|
||||||
children: [downloadNode()],
|
children: [downloadNode()],
|
||||||
},
|
},
|
||||||
// Transit Valhalla build — depends on GTFS download. Produces tiles with
|
// Transit Valhalla build — depends on GTFS download. Produces tiles with
|
||||||
|
|
@ -167,7 +182,7 @@ export async function handleRefreshCity(
|
||||||
pbfPath,
|
pbfPath,
|
||||||
...(bbox ? { bbox } : {}),
|
...(bbox ? { bbox } : {}),
|
||||||
},
|
},
|
||||||
opts: JOB_OPTIONS["build-valhalla"],
|
opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla-transit.${citySlug}` },
|
||||||
children: [
|
children: [
|
||||||
downloadNode(),
|
downloadNode(),
|
||||||
// Download GTFS feed before building transit tiles. Idempotent —
|
// Download GTFS feed before building transit tiles. Idempotent —
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,7 @@ const MATRIX_MAX_ATTEMPTS = 8;
|
||||||
const MATRIX_RETRY_BASE_MS = 1_000;
|
const MATRIX_RETRY_BASE_MS = 1_000;
|
||||||
const MATRIX_RETRY_MAX_MS = 15_000;
|
const MATRIX_RETRY_MAX_MS = 15_000;
|
||||||
/** Per-request timeout — prevents hanging indefinitely if the service is down. */
|
/** Per-request timeout — prevents hanging indefinitely if the service is down. */
|
||||||
const MATRIX_TIMEOUT_MS = 30_000;
|
const MATRIX_TIMEOUT_MS = 60_000;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Call Valhalla's sources_to_targets matrix endpoint.
|
* Call Valhalla's sources_to_targets matrix endpoint.
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue