126 lines
3.9 KiB
TypeScript
126 lines
3.9 KiB
TypeScript
import type { Job } from "bullmq";
|
|
import { createWriteStream, mkdirSync, statSync, renameSync } from "fs";
|
|
import { Writable } from "stream";
|
|
import type { JobProgress } from "@transportationer/shared";
|
|
|
|
export type DownloadPbfData = {
|
|
type: "download-pbf";
|
|
citySlug: string;
|
|
geofabrikUrl: string;
|
|
expectedBytes?: number;
|
|
};
|
|
|
|
const ALLOWED_PATTERN =
|
|
/^https:\/\/download\.geofabrik\.de\/[\w][\w/-]+-latest\.osm\.pbf$/;
|
|
|
|
const OSM_DATA_DIR = process.env.OSM_DATA_DIR ?? "/data/osm";
|
|
|
|
export async function handleDownloadPbf(
|
|
job: Job<DownloadPbfData>,
|
|
): Promise<void> {
|
|
const { citySlug, geofabrikUrl, expectedBytes } = job.data;
|
|
|
|
if (!ALLOWED_PATTERN.test(geofabrikUrl)) {
|
|
throw new Error(`Rejected URL (must be a Geofabrik PBF): ${geofabrikUrl}`);
|
|
}
|
|
|
|
mkdirSync(OSM_DATA_DIR, { recursive: true });
|
|
const outputPath = `${OSM_DATA_DIR}/${citySlug}-latest.osm.pbf`;
|
|
// Use job.id in the tmp path so two concurrent download-pbf jobs for the
|
|
// same city (one under extract-pois, one under build-valhalla) don't write
|
|
// to the same file and corrupt each other.
|
|
const tmpPath = `${outputPath}.${job.id}.tmp`;
|
|
|
|
// Idempotency: skip if a complete file is already on disk (supports
|
|
// parallel download-pbf instances for the same city PBF).
|
|
try {
|
|
const stat = statSync(outputPath);
|
|
if (stat.size > 0) {
|
|
await job.updateProgress({
|
|
stage: "Downloading PBF",
|
|
pct: 100,
|
|
message: `Already on disk: ${outputPath} (${(stat.size / 1_048_576).toFixed(1)} MB)`,
|
|
} satisfies JobProgress);
|
|
return;
|
|
}
|
|
} catch {
|
|
// File doesn't exist — fall through to download.
|
|
}
|
|
|
|
await job.updateProgress({
|
|
stage: "Downloading PBF",
|
|
pct: 0,
|
|
message: `Starting download from Geofabrik…`,
|
|
} satisfies JobProgress);
|
|
|
|
const response = await fetch(geofabrikUrl, {
|
|
headers: { "User-Agent": "Transportationer/1.0" },
|
|
});
|
|
|
|
if (!response.ok || !response.body) {
|
|
throw new Error(`HTTP ${response.status} from ${geofabrikUrl}`);
|
|
}
|
|
|
|
const totalBytes =
|
|
expectedBytes ??
|
|
parseInt(response.headers.get("content-length") ?? "0", 10);
|
|
|
|
let downloaded = 0;
|
|
let lastPct = -1;
|
|
|
|
// Write to a temp path; rename to final path on completion so concurrent
|
|
// instances see a complete file or nothing (never a partial file).
|
|
const fileStream = createWriteStream(tmpPath);
|
|
|
|
// Count bytes through a transform, then write to file
|
|
const reader = response.body.getReader();
|
|
const writable = new Writable({
|
|
write(chunk, _enc, cb) {
|
|
fileStream.write(chunk, cb);
|
|
},
|
|
final(cb) {
|
|
fileStream.end(cb);
|
|
},
|
|
});
|
|
// Propagate fileStream errors (e.g. EACCES, ENOSPC) to the writable
|
|
// so they surface as a rejected promise rather than an unhandled event.
|
|
fileStream.on("error", (err) => writable.destroy(err));
|
|
|
|
await (async () => {
|
|
while (true) {
|
|
const { done, value } = await reader.read();
|
|
if (done) break;
|
|
if (value) {
|
|
downloaded += value.byteLength;
|
|
const pct =
|
|
totalBytes > 0 ? Math.floor((downloaded / totalBytes) * 100) : 0;
|
|
if (pct !== lastPct) {
|
|
lastPct = pct;
|
|
job
|
|
.updateProgress({
|
|
stage: "Downloading PBF",
|
|
pct,
|
|
message: `${(downloaded / 1_048_576).toFixed(1)} MB${totalBytes ? ` / ${(totalBytes / 1_048_576).toFixed(1)} MB` : ""}`,
|
|
bytesDownloaded: downloaded,
|
|
totalBytes,
|
|
} satisfies JobProgress)
|
|
.catch(() => {});
|
|
}
|
|
writable.write(value);
|
|
}
|
|
}
|
|
writable.end();
|
|
await new Promise((res, rej) =>
|
|
writable.on("finish", res).on("error", rej),
|
|
);
|
|
})();
|
|
|
|
// Atomically promote the temp file to the final path.
|
|
renameSync(tmpPath, outputPath);
|
|
|
|
await job.updateProgress({
|
|
stage: "Downloading PBF",
|
|
pct: 100,
|
|
message: `Download complete: ${outputPath}`,
|
|
} satisfies JobProgress);
|
|
}
|