fifteen/apps/web/app/api/admin/jobs/[id]/stream/route.ts

275 lines
11 KiB
TypeScript

import { NextRequest } from "next/server";
import { Job } from "bullmq";
import { getPipelineQueue, getValhallaQueue } from "@/lib/queue";
import type { PipelineJobData, JobProgress, ComputeScoresJobData, RefreshCityJobData } from "@/lib/queue";
import type { SSEEvent, RoutingDetail } from "@transportationer/shared";
import { CATEGORY_IDS } from "@transportationer/shared";
import { sql } from "@/lib/db";
export const runtime = "nodejs";
function fmt(event: SSEEvent): string {
return `data: ${JSON.stringify(event)}\n\n`;
}
export async function GET(
req: NextRequest,
{ params }: { params: Promise<{ id: string }> },
): Promise<Response> {
const { id } = await params;
const queue = getPipelineQueue();
const encoder = new TextEncoder();
let timer: ReturnType<typeof setInterval> | null = null;
// Resolve citySlug and creation timestamp from the refresh-city job.
// We track progress by citySlug across all pipeline stages because
// refresh-city itself completes almost immediately after enqueueing children.
// jobCreatedAt gates failed lookups so we never match results from a
// previous ingest of the same city.
// computeScoresJobId is captured after flow.add() by the worker; once
// available it allows exact-ID matching for the completion check,
// eliminating false positives from previous runs.
let citySlug: string;
let jobCreatedAt: number;
let computeScoresJobId: string | undefined;
try {
const job = await Job.fromId<PipelineJobData>(queue, id);
if (!job) {
return new Response(fmt({ type: "failed", jobId: id, error: "Job not found" }), {
headers: { "Content-Type": "text/event-stream" },
});
}
citySlug = "citySlug" in job.data ? (job.data.citySlug ?? "") : "";
jobCreatedAt = job.timestamp;
computeScoresJobId = (job.data as RefreshCityJobData).computeScoresJobId;
} catch {
return new Response(fmt({ type: "failed", jobId: id, error: "Queue unavailable" }), {
headers: { "Content-Type": "text/event-stream" },
});
}
const valhallaQueue = getValhallaQueue();
const stream = new ReadableStream({
start(ctrl) {
const enqueue = (e: SSEEvent) => {
try {
ctrl.enqueue(encoder.encode(fmt(e)));
} catch {
/* controller already closed */
}
};
const cleanup = () => {
if (timer) { clearInterval(timer); timer = null; }
try { ctrl.close(); } catch { /* already closed */ }
};
const poll = async () => {
try {
// If computeScoresJobId wasn't set when the stream opened (race with
// the worker updating job data), re-read the job once to pick it up.
if (!computeScoresJobId) {
const refreshJob = await Job.fromId<PipelineJobData>(queue, id);
computeScoresJobId = refreshJob
? (refreshJob.data as RefreshCityJobData).computeScoresJobId
: undefined;
}
// 1. Fetch active jobs and waiting-children jobs in parallel.
const [pipelineActive, valhallaActive, waitingChildren] = await Promise.all([
queue.getActive(0, 100),
valhallaQueue.getActive(0, 100),
queue.getWaitingChildren(0, 200),
]);
// 1a. Parallel routing phase: compute-scores is waiting for its routing
// children to finish. Report aggregate progress instead of one job's pct.
// Only enter this branch when routingDispatched=true (Phase 1 has run).
// Before that, compute-scores is in waiting-children while generate-grid
// is running — fall through to the sequential active-job check instead.
const csWaiting = waitingChildren.find(
(j) =>
j.data.type === "compute-scores" &&
(j.data as ComputeScoresJobData).routingDispatched === true &&
(computeScoresJobId ? j.id === computeScoresJobId : j.data.citySlug === citySlug),
);
if (csWaiting) {
const csData = csWaiting.data as ComputeScoresJobData;
const routingModes = csData.modes.filter((m) => m !== "transit");
const totalRoutingJobs = routingModes.length * CATEGORY_IDS.length;
const hasTransit = csData.modes.includes("transit");
const pipelineWaiting = await queue.getWaiting(0, 200);
// Build per-mode routing detail for the UI
const routingDetail: RoutingDetail = {};
for (const mode of routingModes) {
const total = CATEGORY_IDS.length;
const stillActive = pipelineActive.filter(
(j) => j.data.citySlug === citySlug && j.data.type === "compute-routing" && (j.data as any).mode === mode,
).length;
const stillWaiting = pipelineWaiting.filter(
(j) => j.data.citySlug === citySlug && j.data.type === "compute-routing" && (j.data as any).mode === mode,
).length;
routingDetail[mode] = { done: Math.max(0, total - stillActive - stillWaiting), total };
}
const completedRouting = Object.values(routingDetail).reduce((s, v) => s + v.done, 0);
// Check if compute-transit is still running
const transitActive = hasTransit && pipelineActive.some(
(j) => j.data.citySlug === citySlug && j.data.type === "compute-transit",
);
const transitWaiting = hasTransit && pipelineWaiting.some(
(j) => j.data.citySlug === citySlug && j.data.type === "compute-transit",
);
const transitRunning = transitActive || transitWaiting;
if (hasTransit) {
routingDetail["transit"] = { done: transitRunning ? 0 : 1, total: 1 };
}
// Prefer reporting transit job's own progress when it's active
const transitActiveJob = pipelineActive.find(
(j) => j.data.citySlug === citySlug && j.data.type === "compute-transit",
);
if (transitActiveJob) {
const p = transitActiveJob.progress as JobProgress | undefined;
if (p?.stage) {
enqueue({ type: "progress", stage: p.stage, pct: p.pct, message: p.message, routingDetail });
return;
}
}
const pct = totalRoutingJobs > 0
? Math.round((completedRouting / totalRoutingJobs) * 100)
: transitRunning ? 50 : 100;
const message = transitRunning && completedRouting >= totalRoutingJobs
? "Routing done — computing transit isochrones…"
: `${completedRouting} / ${totalRoutingJobs} routing jobs`;
enqueue({ type: "progress", stage: "Computing scores", pct, message, routingDetail });
return;
}
// 1b. OSM parallel phase: extract-pois and build-valhalla run concurrently.
// Emit a separate progress event for each so the UI can track them independently.
const extractPoisJob = pipelineActive.find(
(j) => j.data.citySlug === citySlug && j.data.type === "extract-pois",
);
const buildValhallaJob = valhallaActive.find(
(j) => j.data.citySlug === citySlug && j.data.type === "build-valhalla",
);
if (extractPoisJob || buildValhallaJob) {
const pe = extractPoisJob?.progress as JobProgress | undefined;
const pv = buildValhallaJob?.progress as JobProgress | undefined;
if (pe?.stage) enqueue({ type: "progress", stage: pe.stage, pct: pe.pct, message: pe.message });
if (pv?.stage) enqueue({ type: "progress", stage: pv.stage, pct: pv.pct, message: pv.message });
if (!pe?.stage && !pv?.stage) enqueue({ type: "heartbeat" });
return;
}
// 1c. Sequential phase: report whichever single job is currently active.
// Two download-pbf jobs are enqueued per city (one child of extract-pois,
// one child of build-valhalla). The idempotency guard makes one skip
// immediately at pct=100 while the other does the real download.
// Prefer the job that has actual byte progress so the UI doesn't
// regress from 100% → 5% when the skip job is seen first.
const allCityActive = [...pipelineActive, ...valhallaActive].filter(
(j) => j.data.citySlug === citySlug && j.data.type !== "refresh-city",
);
const activeJob =
allCityActive.find(
(j) =>
j.data.type === "download-pbf" &&
((j.progress as JobProgress | undefined)?.bytesDownloaded ?? 0) > 0,
) ?? allCityActive[0];
if (activeJob) {
const p = activeJob.progress as JobProgress | undefined;
if (p?.stage) {
enqueue({
type: "progress",
stage: p.stage,
pct: p.pct,
message: p.message,
bytesDownloaded: p.bytesDownloaded,
totalBytes: p.totalBytes,
});
} else {
enqueue({ type: "heartbeat" });
}
return;
}
// 2. Check city status in DB — authoritative ground truth for completion/error.
// This catches completion reliably regardless of BullMQ job retention,
// and catches errors set by the worker's failed-job handler.
const cityRows = await Promise.resolve(sql<{ status: string; error_message: string | null }[]>`
SELECT status, error_message FROM cities WHERE slug = ${citySlug}
`);
const cityStatus = cityRows[0]?.status;
if (cityStatus === "ready") {
enqueue({ type: "completed", jobId: computeScoresJobId ?? id });
cleanup();
return;
}
if (cityStatus === "error") {
enqueue({
type: "failed",
jobId: id,
error: cityRows[0]?.error_message ?? "Pipeline failed",
});
cleanup();
return;
}
// 3. Check BullMQ failed queue as a secondary signal (catches failures
// before the worker's DB update propagates, e.g. DB connection issues).
const [pipelineFailed, valhallaFailed] = await Promise.all([
queue.getFailed(0, 50),
valhallaQueue.getFailed(0, 50),
]);
const recentFail = [...pipelineFailed, ...valhallaFailed].find(
(j) =>
j.data.citySlug === citySlug &&
j.data.type !== "refresh-city" &&
(j.finishedOn ?? 0) > jobCreatedAt,
);
if (recentFail) {
enqueue({
type: "failed",
jobId: recentFail.id ?? "",
error: recentFail.failedReason ?? "Pipeline stage failed",
});
cleanup();
return;
}
// 4. Still pending — heartbeat.
enqueue({ type: "heartbeat" });
} catch {
enqueue({ type: "heartbeat" });
}
};
poll();
timer = setInterval(poll, 1000);
req.signal.addEventListener("abort", cleanup);
},
cancel() {
if (timer) clearInterval(timer);
},
});
return new Response(stream, {
headers: {
"Content-Type": "text/event-stream",
"Cache-Control": "no-cache, no-transform",
Connection: "keep-alive",
"X-Accel-Buffering": "no",
},
});
}