262 lines
9 KiB
TypeScript
262 lines
9 KiB
TypeScript
/**
|
||
* Ingest BORIS NI (Niedersachsen Bodenrichtwerte) estate value zones.
|
||
*
|
||
* Fetches from the LGLN WFS and stores into estate_value_zones.
|
||
* Only enqueued by refresh-city when the city's bbox intersects Niedersachsen.
|
||
*/
|
||
import type { Job } from "bullmq";
|
||
import { getSql } from "../db.js";
|
||
import type { JobProgress } from "@transportationer/shared";
|
||
|
||
export type IngestBorisNiData = {
|
||
type: "ingest-boris-ni";
|
||
citySlug: string;
|
||
};
|
||
|
||
const WFS_BASE = "https://opendata.lgln.niedersachsen.de/doorman/noauth/boris_wfs";
|
||
const CHUNK = 500;
|
||
|
||
// ─── GML Parser (WFS 2.0 / GML 3.2) ──────────────────────────────────────────
|
||
|
||
function extractFirst(xml: string, localName: string): string | null {
|
||
const re = new RegExp(`<(?:[\\w]+:)?${localName}(?:\\s[^>]*)?>([^<]*)`, "");
|
||
const m = re.exec(xml);
|
||
return m ? m[1].trim() || null : null;
|
||
}
|
||
|
||
function parsePosListCoords(posList: string): [number, number][] {
|
||
const nums = posList.trim().split(/\s+/).map(Number);
|
||
const coords: [number, number][] = [];
|
||
for (let i = 0; i + 1 < nums.length; i += 2) {
|
||
coords.push([nums[i + 1], nums[i]]); // lat,lon → lon,lat (GeoJSON)
|
||
}
|
||
return coords;
|
||
}
|
||
|
||
function parseGmlPolygon(xml: string): { type: "Polygon"; coordinates: [number, number][][] } | null {
|
||
const extMatch = /<gml:exterior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/.exec(xml);
|
||
if (!extMatch) return null;
|
||
const exterior = parsePosListCoords(extMatch[1]);
|
||
if (exterior.length < 3) return null;
|
||
|
||
const rings: [number, number][][] = [exterior];
|
||
const intRe = /<gml:interior[\s\S]*?<gml:posList[^>]*>([\s\S]*?)<\/gml:posList>/g;
|
||
let intMatch: RegExpExecArray | null;
|
||
while ((intMatch = intRe.exec(xml)) !== null) {
|
||
const interior = parsePosListCoords(intMatch[1]);
|
||
if (interior.length >= 3) rings.push(interior);
|
||
}
|
||
return { type: "Polygon", coordinates: rings };
|
||
}
|
||
|
||
type GeoJsonGeometry =
|
||
| { type: "Polygon"; coordinates: [number, number][][] }
|
||
| { type: "MultiPolygon"; coordinates: [number, number][][][] };
|
||
|
||
type EstateValueFeature = {
|
||
geometry: GeoJsonGeometry;
|
||
value: number | null;
|
||
zoneName: string | null;
|
||
usageType: string | null;
|
||
usageDetail: string | null;
|
||
devState: string | null;
|
||
stichtag: string | null;
|
||
};
|
||
|
||
function parseMember(xml: string): EstateValueFeature | null {
|
||
const value = extractFirst(xml, "bodenrichtwert");
|
||
const zoneName = extractFirst(xml, "bodenrichtwertzoneName");
|
||
const art = extractFirst(xml, "art");
|
||
const ergaenzung = extractFirst(xml, "ergaenzung");
|
||
const devState = extractFirst(xml, "entwicklungszustand");
|
||
const stichtag = extractFirst(xml, "stichtag");
|
||
|
||
// Only keep Bauland (B) zones designated for residential or mixed use.
|
||
// entwicklungszustand=LF covers Landwirtschaft, Forst (F), Grünland (GR),
|
||
// and Ackerland (A) — all priced €0.65–€4/m², irrelevant for housing.
|
||
// Within Bauland, art W* = Wohnbaufläche (residential) and M* = Mischgebiet
|
||
// (mixed use, housing permitted); GE/GI (commercial/industrial) and SO
|
||
// (special use) are excluded as houses cannot be built there.
|
||
if (devState !== "B") return null;
|
||
if (!art || (!art.startsWith("W") && !art.startsWith("M"))) return null;
|
||
|
||
let geometry: GeoJsonGeometry | null = null;
|
||
|
||
const polygonMatch = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(xml);
|
||
if (polygonMatch) {
|
||
geometry = parseGmlPolygon(polygonMatch[0]);
|
||
} else {
|
||
const multiMatch = /<gml:MultiSurface[^>]*>([\s\S]*?)<\/gml:MultiSurface>/.exec(xml);
|
||
if (multiMatch) {
|
||
const polys: [number, number][][][] = [];
|
||
const surfRe = /<gml:surfaceMember[^>]*>([\s\S]*?)<\/gml:surfaceMember>/g;
|
||
let surfMatch: RegExpExecArray | null;
|
||
while ((surfMatch = surfRe.exec(multiMatch[0])) !== null) {
|
||
const polyFrag = /<gml:Polygon[^>]*>([\s\S]*?)<\/gml:Polygon>/.exec(surfMatch[0]);
|
||
if (polyFrag) {
|
||
const poly = parseGmlPolygon(polyFrag[0]);
|
||
if (poly) polys.push(poly.coordinates);
|
||
}
|
||
}
|
||
if (polys.length > 0) {
|
||
geometry = { type: "MultiPolygon", coordinates: polys };
|
||
}
|
||
}
|
||
}
|
||
|
||
if (!geometry) return null;
|
||
|
||
return {
|
||
geometry,
|
||
value: value !== null ? parseFloat(value) : null,
|
||
zoneName,
|
||
usageType: art,
|
||
usageDetail: ergaenzung,
|
||
devState,
|
||
stichtag,
|
||
};
|
||
}
|
||
|
||
function parseGmlFeatures(gml: string): EstateValueFeature[] {
|
||
const features: EstateValueFeature[] = [];
|
||
const memberRe = /<wfs:member>([\s\S]*?)<\/wfs:member>/g;
|
||
let match: RegExpExecArray | null;
|
||
while ((match = memberRe.exec(gml)) !== null) {
|
||
const f = parseMember(match[1]);
|
||
if (f) features.push(f);
|
||
}
|
||
return features;
|
||
}
|
||
|
||
// ─── Job handler ──────────────────────────────────────────────────────────────
|
||
|
||
export async function handleIngestBorisNi(job: Job<IngestBorisNiData>): Promise<void> {
|
||
const { citySlug } = job.data;
|
||
const sql = getSql();
|
||
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 0,
|
||
message: `Reading bbox for ${citySlug}`,
|
||
} satisfies JobProgress);
|
||
|
||
const bboxRows = await Promise.resolve(sql<{
|
||
minlng: number; minlat: number; maxlng: number; maxlat: number;
|
||
}[]>`
|
||
SELECT
|
||
ST_XMin(bbox)::float AS minlng,
|
||
ST_YMin(bbox)::float AS minlat,
|
||
ST_XMax(bbox)::float AS maxlng,
|
||
ST_YMax(bbox)::float AS maxlat
|
||
FROM cities WHERE slug = ${citySlug} AND bbox IS NOT NULL
|
||
`);
|
||
|
||
if (bboxRows.length === 0) {
|
||
throw new Error(`No bbox set for city ${citySlug}`);
|
||
}
|
||
|
||
const { minlng, minlat, maxlng, maxlat } = bboxRows[0];
|
||
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 10,
|
||
message: `Fetching BORIS NI WFS for ${citySlug}`,
|
||
} satisfies JobProgress);
|
||
|
||
// Fetch from BORIS NI WFS (lat/lon axis order for EPSG:4326)
|
||
const wfsUrl = new URL(WFS_BASE);
|
||
wfsUrl.searchParams.set("SERVICE", "WFS");
|
||
wfsUrl.searchParams.set("VERSION", "2.0.0");
|
||
wfsUrl.searchParams.set("REQUEST", "GetFeature");
|
||
wfsUrl.searchParams.set("TYPENAMES", "boris:BR_BodenrichtwertZonal");
|
||
wfsUrl.searchParams.set("SRSNAME", "urn:ogc:def:crs:EPSG::4326");
|
||
wfsUrl.searchParams.set("BBOX", `${minlat},${minlng},${maxlat},${maxlng},urn:ogc:def:crs:EPSG::4326`);
|
||
wfsUrl.searchParams.set("COUNT", "10000");
|
||
|
||
const response = await fetch(wfsUrl.toString(), {
|
||
headers: { Accept: "application/gml+xml; version=3.2" },
|
||
signal: AbortSignal.timeout(60_000),
|
||
});
|
||
|
||
if (!response.ok) {
|
||
throw new Error(`BORIS NI WFS returned HTTP ${response.status}`);
|
||
}
|
||
|
||
const gml = await response.text();
|
||
|
||
if (gml.includes("<ows:ExceptionReport")) {
|
||
console.warn(`[ingest-boris-ni] WFS returned exception report for ${citySlug}`);
|
||
return;
|
||
}
|
||
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 50,
|
||
message: "Parsing GML response",
|
||
} satisfies JobProgress);
|
||
|
||
const features = parseGmlFeatures(gml);
|
||
console.log(`[ingest-boris-ni] Parsed ${features.length} zones for ${citySlug}`);
|
||
|
||
// Replace all existing BORIS NI data for this city
|
||
await Promise.resolve(sql`
|
||
DELETE FROM estate_value_zones WHERE city_slug = ${citySlug} AND source = 'boris-ni'
|
||
`);
|
||
|
||
if (features.length === 0) {
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 100,
|
||
message: `No zones found for ${citySlug}`,
|
||
} satisfies JobProgress);
|
||
return;
|
||
}
|
||
|
||
let inserted = 0;
|
||
for (let i = 0; i < features.length; i += CHUNK) {
|
||
const chunk = features.slice(i, i + CHUNK);
|
||
|
||
const geomJsons = chunk.map((f) => JSON.stringify(f.geometry));
|
||
const values = chunk.map((f) => f.value);
|
||
const zoneNames = chunk.map((f) => f.zoneName);
|
||
const usageTypes = chunk.map((f) => f.usageType);
|
||
const usageDetails = chunk.map((f) => f.usageDetail);
|
||
const devStates = chunk.map((f) => f.devState);
|
||
const stichtags = chunk.map((f) => f.stichtag);
|
||
|
||
await Promise.resolve(sql`
|
||
INSERT INTO estate_value_zones
|
||
(city_slug, geom, value_eur_m2, zone_name, usage_type, usage_detail, dev_state, stichtag, source)
|
||
SELECT
|
||
${citySlug},
|
||
ST_SetSRID(ST_GeomFromGeoJSON(g), 4326),
|
||
v,
|
||
zn, ut, ud, ds, st,
|
||
'boris-ni'
|
||
FROM unnest(
|
||
${geomJsons}::text[],
|
||
${values}::numeric[],
|
||
${zoneNames}::text[],
|
||
${usageTypes}::text[],
|
||
${usageDetails}::text[],
|
||
${devStates}::text[],
|
||
${stichtags}::text[]
|
||
) AS t(g, v, zn, ut, ud, ds, st)
|
||
`);
|
||
|
||
inserted += chunk.length;
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 50 + Math.round((inserted / features.length) * 50),
|
||
message: `Stored ${inserted}/${features.length} zones`,
|
||
} satisfies JobProgress);
|
||
}
|
||
|
||
console.log(`[ingest-boris-ni] ✓ Stored ${inserted} BORIS NI zones for ${citySlug}`);
|
||
|
||
await job.updateProgress({
|
||
stage: "Ingesting BORIS NI",
|
||
pct: 100,
|
||
message: `BORIS NI ingest complete: ${inserted} zones`,
|
||
} satisfies JobProgress);
|
||
}
|