fifteen/worker/src/jobs/ingest-boris-ni.ts

262 lines
9 KiB
TypeScript
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/**
* Ingest BORIS NI (Niedersachsen Bodenrichtwerte) estate value zones.
*
* Fetches from the LGLN WFS and stores into estate_value_zones.
* Only enqueued by refresh-city when the city's bbox intersects Niedersachsen.
*/
import type { Job } from "bullmq";
import { getSql } from "../db.js";
import type { JobProgress } from "@transportationer/shared";
export type IngestBorisNiData = {
type: "ingest-boris-ni";
citySlug: string;
};
const WFS_BASE = "https://opendata.lgln.niedersachsen.de/doorman/noauth/boris_wfs";
const CHUNK = 500;
// ─── GML Parser (WFS 2.0 / GML 3.2) ──────────────────────────────────────────
function extractFirst(xml: string, localName: string): string | null {
const re = new RegExp(`<(?:[\\w]+:)?${localName}(?:\\s[^>]*)?>([^<]*)`, "");
const m = re.exec(xml);
return m ? m[1].trim() || null : null;
}
function parsePosListCoords(posList: string): [number, number][] {
const nums = posList.trim().split(/\s+/).map(Number);
const coords: [number, number][] = [];
for (let i = 0; i + 1 < nums.length; i += 2) {
coords.push([nums[i + 1], nums[i]]); // lat,lon → lon,lat (GeoJSON)
}
return coords;
}
function parseGmlPolygon(xml: string): { type: "Polygon"; coordinates: [number, number][][] } | null {
const extMatch = /<gml:exterior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/.exec(xml);
if (!extMatch) return null;
const exterior = parsePosListCoords(extMatch[1]);
if (exterior.length < 3) return null;
const rings: [number, number][][] = [exterior];
const intRe = /<gml:interior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/g;
let intMatch: RegExpExecArray | null;
while ((intMatch = intRe.exec(xml)) !== null) {
const interior = parsePosListCoords(intMatch[1]);
if (interior.length >= 3) rings.push(interior);
}
return { type: "Polygon", coordinates: rings };
}
type GeoJsonGeometry =
| { type: "Polygon"; coordinates: [number, number][][] }
| { type: "MultiPolygon"; coordinates: [number, number][][][] };
type EstateValueFeature = {
geometry: GeoJsonGeometry;
value: number | null;
zoneName: string | null;
usageType: string | null;
usageDetail: string | null;
devState: string | null;
stichtag: string | null;
};
function parseMember(xml: string): EstateValueFeature | null {
const value = extractFirst(xml, "bodenrichtwert");
const zoneName = extractFirst(xml, "bodenrichtwertzoneName");
const art = extractFirst(xml, "art");
const ergaenzung = extractFirst(xml, "ergaenzung");
const devState = extractFirst(xml, "entwicklungszustand");
const stichtag = extractFirst(xml, "stichtag");
// Only keep Bauland (B) zones designated for residential or mixed use.
// entwicklungszustand=LF covers Landwirtschaft, Forst (F), Grünland (GR),
// and Ackerland (A) — all priced €0.65€4/m², irrelevant for housing.
// Within Bauland, art W* = Wohnbaufläche (residential) and M* = Mischgebiet
// (mixed use, housing permitted); GE/GI (commercial/industrial) and SO
// (special use) are excluded as houses cannot be built there.
if (devState !== "B") return null;
if (!art || (!art.startsWith("W") && !art.startsWith("M"))) return null;
let geometry: GeoJsonGeometry | null = null;
const polygonMatch = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(xml);
if (polygonMatch) {
geometry = parseGmlPolygon(polygonMatch[0]);
} else {
const multiMatch = /<gml:MultiSurface[^>]*>([\s\S]*?)<\/gml:MultiSurface>/.exec(xml);
if (multiMatch) {
const polys: [number, number][][][] = [];
const surfRe = /<gml:surfaceMember[^>]*>([\s\S]*?)<\/gml:surfaceMember>/g;
let surfMatch: RegExpExecArray | null;
while ((surfMatch = surfRe.exec(multiMatch[0])) !== null) {
const polyFrag = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(surfMatch[0]);
if (polyFrag) {
const poly = parseGmlPolygon(polyFrag[0]);
if (poly) polys.push(poly.coordinates);
}
}
if (polys.length > 0) {
geometry = { type: "MultiPolygon", coordinates: polys };
}
}
}
if (!geometry) return null;
return {
geometry,
value: value !== null ? parseFloat(value) : null,
zoneName,
usageType: art,
usageDetail: ergaenzung,
devState,
stichtag,
};
}
function parseGmlFeatures(gml: string): EstateValueFeature[] {
const features: EstateValueFeature[] = [];
const memberRe = /<wfs:member>([\s\S]*?)<\/wfs:member>/g;
let match: RegExpExecArray | null;
while ((match = memberRe.exec(gml)) !== null) {
const f = parseMember(match[1]);
if (f) features.push(f);
}
return features;
}
// ─── Job handler ──────────────────────────────────────────────────────────────
export async function handleIngestBorisNi(job: Job<IngestBorisNiData>): Promise<void> {
const { citySlug } = job.data;
const sql = getSql();
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 0,
message: `Reading bbox for ${citySlug}`,
} satisfies JobProgress);
const bboxRows = await Promise.resolve(sql<{
minlng: number; minlat: number; maxlng: number; maxlat: number;
}[]>`
SELECT
ST_XMin(bbox)::float AS minlng,
ST_YMin(bbox)::float AS minlat,
ST_XMax(bbox)::float AS maxlng,
ST_YMax(bbox)::float AS maxlat
FROM cities WHERE slug = ${citySlug} AND bbox IS NOT NULL
`);
if (bboxRows.length === 0) {
throw new Error(`No bbox set for city ${citySlug}`);
}
const { minlng, minlat, maxlng, maxlat } = bboxRows[0];
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 10,
message: `Fetching BORIS NI WFS for ${citySlug}`,
} satisfies JobProgress);
// Fetch from BORIS NI WFS (lat/lon axis order for EPSG:4326)
const wfsUrl = new URL(WFS_BASE);
wfsUrl.searchParams.set("SERVICE", "WFS");
wfsUrl.searchParams.set("VERSION", "2.0.0");
wfsUrl.searchParams.set("REQUEST", "GetFeature");
wfsUrl.searchParams.set("TYPENAMES", "boris:BR_BodenrichtwertZonal");
wfsUrl.searchParams.set("SRSNAME", "urn:ogc:def:crs:EPSG::4326");
wfsUrl.searchParams.set("BBOX", `${minlat},${minlng},${maxlat},${maxlng},urn:ogc:def:crs:EPSG::4326`);
wfsUrl.searchParams.set("COUNT", "10000");
const response = await fetch(wfsUrl.toString(), {
headers: { Accept: "application/gml+xml; version=3.2" },
signal: AbortSignal.timeout(60_000),
});
if (!response.ok) {
throw new Error(`BORIS NI WFS returned HTTP ${response.status}`);
}
const gml = await response.text();
if (gml.includes("<ows:ExceptionReport")) {
console.warn(`[ingest-boris-ni] WFS returned exception report for ${citySlug}`);
return;
}
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 50,
message: "Parsing GML response",
} satisfies JobProgress);
const features = parseGmlFeatures(gml);
console.log(`[ingest-boris-ni] Parsed ${features.length} zones for ${citySlug}`);
// Replace all existing BORIS NI data for this city
await Promise.resolve(sql`
DELETE FROM estate_value_zones WHERE city_slug = ${citySlug} AND source = 'boris-ni'
`);
if (features.length === 0) {
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 100,
message: `No zones found for ${citySlug}`,
} satisfies JobProgress);
return;
}
let inserted = 0;
for (let i = 0; i < features.length; i += CHUNK) {
const chunk = features.slice(i, i + CHUNK);
const geomJsons = chunk.map((f) => JSON.stringify(f.geometry));
const values = chunk.map((f) => f.value);
const zoneNames = chunk.map((f) => f.zoneName);
const usageTypes = chunk.map((f) => f.usageType);
const usageDetails = chunk.map((f) => f.usageDetail);
const devStates = chunk.map((f) => f.devState);
const stichtags = chunk.map((f) => f.stichtag);
await Promise.resolve(sql`
INSERT INTO estate_value_zones
(city_slug, geom, value_eur_m2, zone_name, usage_type, usage_detail, dev_state, stichtag, source)
SELECT
${citySlug},
ST_SetSRID(ST_GeomFromGeoJSON(g), 4326),
v,
zn, ut, ud, ds, st,
'boris-ni'
FROM unnest(
${geomJsons}::text[],
${values}::numeric[],
${zoneNames}::text[],
${usageTypes}::text[],
${usageDetails}::text[],
${devStates}::text[],
${stichtags}::text[]
) AS t(g, v, zn, ut, ud, ds, st)
`);
inserted += chunk.length;
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 50 + Math.round((inserted / features.length) * 50),
message: `Stored ${inserted}/${features.length} zones`,
} satisfies JobProgress);
}
console.log(`[ingest-boris-ni] ✓ Stored ${inserted} BORIS NI zones for ${citySlug}`);
await job.updateProgress({
stage: "Ingesting BORIS NI",
pct: 100,
message: `BORIS NI ingest complete: ${inserted} zones`,
} satisfies JobProgress);
}