import type { Job } from "bullmq"; import { createWriteStream, mkdirSync, statSync, renameSync } from "fs"; import { Writable } from "stream"; import type { JobProgress } from "@transportationer/shared"; export type DownloadPbfData = { type: "download-pbf"; citySlug: string; geofabrikUrl: string; expectedBytes?: number; }; const ALLOWED_PATTERN = /^https:\/\/download\.geofabrik\.de\/[\w][\w/-]+-latest\.osm\.pbf$/; const OSM_DATA_DIR = process.env.OSM_DATA_DIR ?? "/data/osm"; export async function handleDownloadPbf( job: Job, ): Promise { const { citySlug, geofabrikUrl, expectedBytes } = job.data; if (!ALLOWED_PATTERN.test(geofabrikUrl)) { throw new Error(`Rejected URL (must be a Geofabrik PBF): ${geofabrikUrl}`); } mkdirSync(OSM_DATA_DIR, { recursive: true }); const outputPath = `${OSM_DATA_DIR}/${citySlug}-latest.osm.pbf`; // Use job.id in the tmp path so two concurrent download-pbf jobs for the // same city (one under extract-pois, one under build-valhalla) don't write // to the same file and corrupt each other. const tmpPath = `${outputPath}.${job.id}.tmp`; // Idempotency: skip if a complete file is already on disk (supports // parallel download-pbf instances for the same city PBF). try { const stat = statSync(outputPath); if (stat.size > 0) { await job.updateProgress({ stage: "Downloading PBF", pct: 100, message: `Already on disk: ${outputPath} (${(stat.size / 1_048_576).toFixed(1)} MB)`, } satisfies JobProgress); return; } } catch { // File doesn't exist — fall through to download. } await job.updateProgress({ stage: "Downloading PBF", pct: 0, message: `Starting download from Geofabrik…`, } satisfies JobProgress); const response = await fetch(geofabrikUrl, { headers: { "User-Agent": "Transportationer/1.0" }, }); if (!response.ok || !response.body) { throw new Error(`HTTP ${response.status} from ${geofabrikUrl}`); } const totalBytes = expectedBytes ?? parseInt(response.headers.get("content-length") ?? "0", 10); let downloaded = 0; let lastPct = -1; // Write to a temp path; rename to final path on completion so concurrent // instances see a complete file or nothing (never a partial file). const fileStream = createWriteStream(tmpPath); // Count bytes through a transform, then write to file const reader = response.body.getReader(); const writable = new Writable({ write(chunk, _enc, cb) { fileStream.write(chunk, cb); }, final(cb) { fileStream.end(cb); }, }); // Propagate fileStream errors (e.g. EACCES, ENOSPC) to the writable // so they surface as a rejected promise rather than an unhandled event. fileStream.on("error", (err) => writable.destroy(err)); await (async () => { while (true) { const { done, value } = await reader.read(); if (done) break; if (value) { downloaded += value.byteLength; const pct = totalBytes > 0 ? Math.floor((downloaded / totalBytes) * 100) : 0; if (pct !== lastPct) { lastPct = pct; job .updateProgress({ stage: "Downloading PBF", pct, message: `${(downloaded / 1_048_576).toFixed(1)} MB${totalBytes ? ` / ${(totalBytes / 1_048_576).toFixed(1)} MB` : ""}`, bytesDownloaded: downloaded, totalBytes, } satisfies JobProgress) .catch(() => {}); } writable.write(value); } } writable.end(); await new Promise((res, rej) => writable.on("finish", res).on("error", rej), ); })(); // Atomically promote the temp file to the final path. renameSync(tmpPath, outputPath); await job.updateProgress({ stage: "Downloading PBF", pct: 100, message: `Download complete: ${outputPath}`, } satisfies JobProgress); }