fifteen/infra/schema.sql

197 lines
8.7 KiB
SQL

-- Enable PostGIS
CREATE EXTENSION IF NOT EXISTS postgis;
CREATE EXTENSION IF NOT EXISTS postgis_topology;
CREATE EXTENSION IF NOT EXISTS pg_trgm;
-- ─── Cities ──────────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS cities (
slug TEXT PRIMARY KEY,
name TEXT NOT NULL,
country_code CHAR(2) NOT NULL DEFAULT '',
geofabrik_url TEXT NOT NULL,
bbox geometry(Polygon, 4326),
resolution_m INTEGER NOT NULL DEFAULT 200,
status TEXT NOT NULL DEFAULT 'empty'
CHECK (status IN ('empty','pending','processing','ready','error')),
error_message TEXT,
last_ingested TIMESTAMPTZ,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
-- Migration for existing databases
ALTER TABLE cities ADD COLUMN IF NOT EXISTS resolution_m INTEGER NOT NULL DEFAULT 200;
ALTER TABLE cities ADD COLUMN IF NOT EXISTS boundary geometry(MultiPolygon, 4326);
ALTER TABLE cities ADD COLUMN IF NOT EXISTS refresh_iter SMALLINT NOT NULL DEFAULT 0;
-- Migration: rename 'fifteen' mode to 'cyclist' and update CHECK constraint
UPDATE grid_scores SET travel_mode = 'cyclist' WHERE travel_mode = 'fifteen';
ALTER TABLE grid_scores DROP CONSTRAINT IF EXISTS grid_scores_travel_mode_check;
ALTER TABLE grid_scores ADD CONSTRAINT grid_scores_travel_mode_check
CHECK (travel_mode IN ('walking','cycling','driving','transit','cyclist','cycling_walk','transit_walk'));
CREATE INDEX IF NOT EXISTS idx_cities_bbox ON cities USING GIST (bbox);
CREATE INDEX IF NOT EXISTS idx_cities_boundary ON cities USING GIST (boundary);
-- ─── Raw POIs (created and managed by osm2pgsql flex output) ─────────────────
-- osm2pgsql --drop recreates this table on each ingest using the Lua script.
-- Columns: osm_id (bigint), osm_type (char), city_slug, category, subcategory,
-- name, tags, geom — no auto-generated id column.
-- This CREATE TABLE IF NOT EXISTS is a no-op after the first osm2pgsql run.
CREATE TABLE IF NOT EXISTS raw_pois (
osm_id BIGINT NOT NULL,
osm_type CHAR(1) NOT NULL,
city_slug TEXT NOT NULL,
category TEXT NOT NULL,
subcategory TEXT NOT NULL,
name TEXT,
tags JSONB,
geom geometry(Point, 4326) NOT NULL
);
CREATE INDEX IF NOT EXISTS idx_raw_pois_city_cat
ON raw_pois (city_slug, category);
CREATE INDEX IF NOT EXISTS idx_raw_pois_geom
ON raw_pois USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_raw_pois_name
ON raw_pois USING GIN (name gin_trgm_ops)
WHERE name IS NOT NULL;
-- ─── Grid points ─────────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS grid_points (
id BIGSERIAL PRIMARY KEY,
city_slug TEXT NOT NULL REFERENCES cities(slug) ON DELETE CASCADE,
geom geometry(Point, 4326) NOT NULL,
grid_x INTEGER NOT NULL,
grid_y INTEGER NOT NULL,
hidden_gem_score FLOAT4,
UNIQUE (city_slug, grid_x, grid_y)
);
-- Migration for existing databases
ALTER TABLE grid_points ADD COLUMN IF NOT EXISTS hidden_gem_score FLOAT4;
CREATE INDEX IF NOT EXISTS idx_grid_city ON grid_points (city_slug);
CREATE INDEX IF NOT EXISTS idx_grid_geom ON grid_points USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_grid_hidden_gem
ON grid_points (city_slug, hidden_gem_score)
WHERE hidden_gem_score IS NOT NULL;
-- ─── Pre-computed accessibility scores ───────────────────────────────────────
CREATE TABLE IF NOT EXISTS grid_scores (
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
category TEXT NOT NULL,
travel_mode TEXT NOT NULL CHECK (travel_mode IN ('walking','cycling','driving','transit','cyclist','cycling_walk','transit_walk')),
threshold_min INTEGER NOT NULL,
profile TEXT NOT NULL DEFAULT 'universal',
nearest_poi_id BIGINT,
distance_m FLOAT,
travel_time_s FLOAT,
score FLOAT NOT NULL CHECK (score >= 0 AND score <= 1),
computed_at TIMESTAMPTZ NOT NULL DEFAULT now(),
PRIMARY KEY (grid_point_id, category, travel_mode, threshold_min, profile)
);
-- Migration for existing databases (adds profile column and rebuilds PK)
ALTER TABLE grid_scores ADD COLUMN IF NOT EXISTS profile TEXT NOT NULL DEFAULT 'universal';
CREATE INDEX IF NOT EXISTS idx_grid_scores_lookup
ON grid_scores (grid_point_id, travel_mode, threshold_min, profile);
-- ─── Nearest POIs per subcategory per grid point ─────────────────────────────
-- Populated by compute-routing / compute-transit jobs. Stores up to K nearest
-- POIs per (grid_point, category, subcategory, travel_mode) so that the scoring
-- phase can apply the complement-product formula across multiple POIs, giving
-- diminishing returns for POI diversity within a subcategory.
CREATE TABLE IF NOT EXISTS grid_poi_details (
id BIGSERIAL PRIMARY KEY,
grid_point_id BIGINT NOT NULL REFERENCES grid_points(id) ON DELETE CASCADE,
category TEXT NOT NULL,
subcategory TEXT NOT NULL,
travel_mode TEXT NOT NULL,
nearest_poi_id BIGINT,
nearest_poi_name TEXT,
distance_m FLOAT,
travel_time_s FLOAT,
computed_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_grid_poi_details_lookup
ON grid_poi_details (grid_point_id, travel_mode);
-- Migration: replace old composite PK with surrogate id for diversity scoring
DO $$
BEGIN
IF NOT EXISTS (
SELECT 1 FROM information_schema.columns
WHERE table_name = 'grid_poi_details' AND column_name = 'id'
) THEN
-- Old single-POI-per-subcategory data is incompatible; clear it
DELETE FROM grid_scores;
DELETE FROM grid_poi_details;
ALTER TABLE grid_poi_details DROP CONSTRAINT IF EXISTS grid_poi_details_pkey;
ALTER TABLE grid_poi_details ADD COLUMN id BIGSERIAL PRIMARY KEY;
END IF;
END $$;
-- ─── Isochrone cache ──────────────────────────────────────────────────────────
CREATE TABLE IF NOT EXISTS isochrone_cache (
id BIGSERIAL PRIMARY KEY,
origin_geom geometry(Point, 4326) NOT NULL,
travel_mode TEXT NOT NULL,
contours_min INTEGER[] NOT NULL,
departure_date TEXT, -- NULL for non-transit; 'YYYY-MM-DDTHH:mm' for transit
result JSONB NOT NULL,
created_at TIMESTAMPTZ NOT NULL DEFAULT now()
);
CREATE INDEX IF NOT EXISTS idx_isochrone_origin
ON isochrone_cache USING GIST (origin_geom);
CREATE INDEX IF NOT EXISTS idx_isochrone_created
ON isochrone_cache (created_at);
CREATE INDEX IF NOT EXISTS idx_isochrone_mode_departure
ON isochrone_cache (travel_mode, departure_date)
WHERE departure_date IS NOT NULL;
-- Auto-expire isochrone cache entries older than 30 days
-- (handled by periodic cleanup or TTL logic in app)
-- ─── Estate value zones ───────────────────────────────────────────────────────
-- Populated by datasource-specific worker jobs (currently: ingest-boris-ni).
-- Only present for cities whose bbox intersects a supported region.
-- Migration: rename if upgrading from the previous schema version
DO $$ BEGIN
IF EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'boris_zones')
AND NOT EXISTS (SELECT FROM information_schema.tables WHERE table_name = 'estate_value_zones')
THEN ALTER TABLE boris_zones RENAME TO estate_value_zones; END IF;
END $$;
CREATE TABLE IF NOT EXISTS estate_value_zones (
id BIGSERIAL PRIMARY KEY,
city_slug TEXT NOT NULL REFERENCES cities(slug) ON DELETE CASCADE,
geom geometry(GEOMETRY, 4326) NOT NULL,
value_eur_m2 NUMERIC,
zone_name TEXT,
usage_type TEXT,
usage_detail TEXT,
dev_state TEXT,
stichtag TEXT,
source TEXT NOT NULL DEFAULT 'boris-ni',
year SMALLINT,
ingested_at TIMESTAMPTZ NOT NULL DEFAULT NOW()
);
-- Migration for existing databases
ALTER TABLE estate_value_zones ADD COLUMN IF NOT EXISTS year SMALLINT;
CREATE INDEX IF NOT EXISTS idx_estate_value_zones_city ON estate_value_zones (city_slug);
CREATE INDEX IF NOT EXISTS idx_estate_value_zones_geom ON estate_value_zones USING GIST (geom);
CREATE INDEX IF NOT EXISTS idx_estate_value_zones_year
ON estate_value_zones (city_slug, year)
WHERE year IS NOT NULL;