From faa9e48234346b94149d0ad101406c893ad1288a Mon Sep 17 00:00:00 2001 From: Jan-Henrik Bruhn Date: Fri, 6 Mar 2026 16:32:10 +0100 Subject: [PATCH] fix: change some job ids, make distance score function exponential instead of sigmoid --- README.md | 12 ++++++------ .../api/admin/cities/[slug]/rerun-scores/route.ts | 9 ++++++--- apps/web/app/api/admin/cities/route.ts | 8 +++++++- apps/web/app/api/admin/ingest/[slug]/route.ts | 12 +++++++----- infra/schema.sql | 1 + shared/src/queue.ts | 3 +++ worker/src/jobs/compute-routing.ts | 5 ++--- worker/src/jobs/compute-scores.ts | 9 +-------- worker/src/jobs/refresh-city.ts | 14 ++++++-------- 9 files changed, 39 insertions(+), 34 deletions(-) diff --git a/README.md b/README.md index 72db7e9..4f2d65b 100644 --- a/README.md +++ b/README.md @@ -135,21 +135,21 @@ Travel times are obtained from [Valhalla](https://github.com/valhalla/valhalla), All scores are precomputed at ingest time for every combination of threshold (5 / 10 / 15 / 20 / 30 min), travel mode, and profile, so interactive queries hit only the database. -Each subcategory *i* contributes a sigmoid score based on travel time `t` and threshold `T` (both in seconds): +Each subcategory *i* contributes a proximity score based on travel time `t` and threshold `T` (both in seconds) using exponential decay: ``` -sigmoid(t, T) = 1 / (1 + exp(4 × (t − T) / T)) +score(t, T) = exp(−3 × t / T) ``` -The sigmoid equals 0.5 exactly at the threshold, approaches 1 for very short times, and approaches 0 for very long times. It is continuous — a 14-minute trip still contributes almost as much as a 10-minute trip under a 15-minute threshold. +At t = 0 the score is 1.0. At the threshold it is exp(−3) ≈ 0.05 — a POI reachable in exactly the threshold time barely contributes. Close proximity dominates: a third of the threshold away scores ~0.37, halfway scores ~0.22. This ensures that genuinely nearby POIs are rated much more highly than merely reachable ones. -The category score aggregates all subcategories via a **complement product** weighted by profile-specific importance weights `w_i ∈ [0, 1]`: +The category score aggregates across subcategories **and** across multiple nearby POIs of the same subcategory via a **complement product** weighted by profile-specific importance weights `w_i ∈ [0, 1]`: ``` -category_score = 1 − ∏ (1 − w_i × sigmoid(t_i, T)) +category_score = 1 − ∏ (1 − w_i × score(t_i, T)) ``` -This captures coverage diversity: one nearby supermarket already yields a high score, but also having a pharmacy and a bakery pushes it higher. Subcategories with no POI found are omitted from the product and do not penalise the score. +This captures both subcategory coverage (a pharmacy and a supermarket together score higher than either alone) and within-subcategory diversity (a second nearby park still improves the score, with strongly diminishing returns). Subcategories with no POI found contribute nothing and do not penalise the score. The **composite score** shown on the heatmap is a weighted average of all five category scores. Category weights come from the selected profile but can be adjusted freely with the UI sliders. Changing the profile, threshold, or travel mode re-queries the database; adjusting the sliders re-blends client-side with no server round-trip. diff --git a/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts b/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts index 91f1b6c..85240ac 100644 --- a/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts +++ b/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts @@ -27,8 +27,11 @@ export async function POST( return NextResponse.json({ error: "City not found" }, { status: 404 }); } - await Promise.resolve(sql` - UPDATE cities SET status = 'pending', error_message = NULL WHERE slug = ${slug} + const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>` + UPDATE cities SET status = 'pending', error_message = NULL, + refresh_iter = refresh_iter + 1 + WHERE slug = ${slug} + RETURNING refresh_iter AS iter `); const queue = getPipelineQueue(); @@ -45,7 +48,7 @@ export async function POST( attempts: 1, removeOnComplete: { age: 86400 * 7 }, removeOnFail: { age: 86400 * 30 }, - jobId: `compute-scores.${slug}`, + jobId: `compute-scores.${slug}.${iter}`, }, ); diff --git a/apps/web/app/api/admin/cities/route.ts b/apps/web/app/api/admin/cities/route.ts index fdec431..89b18d7 100644 --- a/apps/web/app/api/admin/cities/route.ts +++ b/apps/web/app/api/admin/cities/route.ts @@ -156,6 +156,11 @@ export async function POST(req: NextRequest) { `); } + const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>` + UPDATE cities SET refresh_iter = refresh_iter + 1 WHERE slug = ${slug as string} + RETURNING refresh_iter AS iter + `); + const queue = getPipelineQueue(); const job = await queue.add( "refresh-city", @@ -164,8 +169,9 @@ export async function POST(req: NextRequest) { citySlug: slug as string, geofabrikUrl, resolutionM: resolutionM as number, + iter, }, - { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` }, + { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}.${iter}` }, ); // Invalidate city list cache diff --git a/apps/web/app/api/admin/ingest/[slug]/route.ts b/apps/web/app/api/admin/ingest/[slug]/route.ts index 68fbf84..660853e 100644 --- a/apps/web/app/api/admin/ingest/[slug]/route.ts +++ b/apps/web/app/api/admin/ingest/[slug]/route.ts @@ -20,16 +20,18 @@ export async function POST( const { geofabrik_url: geofabrikUrl } = rows[0]; - // Reset status - await Promise.resolve(sql` - UPDATE cities SET status = 'pending', error_message = NULL WHERE slug = ${slug} + const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>` + UPDATE cities SET status = 'pending', error_message = NULL, + refresh_iter = refresh_iter + 1 + WHERE slug = ${slug} + RETURNING refresh_iter AS iter `); const queue = getPipelineQueue(); const job = await queue.add( "refresh-city", - { type: "refresh-city", citySlug: slug, geofabrikUrl }, - { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` }, + { type: "refresh-city", citySlug: slug, geofabrikUrl, iter }, + { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}.${iter}` }, ); return NextResponse.json({ citySlug: slug, jobId: job.id }, { status: 202 }); diff --git a/infra/schema.sql b/infra/schema.sql index e7c3625..4c3dbf6 100644 --- a/infra/schema.sql +++ b/infra/schema.sql @@ -22,6 +22,7 @@ CREATE TABLE IF NOT EXISTS cities ( -- Migration for existing databases ALTER TABLE cities ADD COLUMN IF NOT EXISTS resolution_m INTEGER NOT NULL DEFAULT 200; ALTER TABLE cities ADD COLUMN IF NOT EXISTS boundary geometry(MultiPolygon, 4326); +ALTER TABLE cities ADD COLUMN IF NOT EXISTS refresh_iter SMALLINT NOT NULL DEFAULT 0; CREATE INDEX IF NOT EXISTS idx_cities_bbox ON cities USING GIST (bbox); CREATE INDEX IF NOT EXISTS idx_cities_boundary ON cities USING GIST (boundary); diff --git a/shared/src/queue.ts b/shared/src/queue.ts index 9741903..6c14bc4 100644 --- a/shared/src/queue.ts +++ b/shared/src/queue.ts @@ -64,6 +64,9 @@ export interface RefreshCityJobData { citySlug: string; geofabrikUrl: string; resolutionM?: number; + /** Monotonically increasing counter incremented at each trigger; used in jobIds + * to prevent completed-job deduplication on re-runs. */ + iter: number; /** ID of the compute-scores job enqueued for this refresh; set after flow.add(). */ computeScoresJobId?: string; } diff --git a/worker/src/jobs/compute-routing.ts b/worker/src/jobs/compute-routing.ts index caa6be2..5ddc903 100644 --- a/worker/src/jobs/compute-routing.ts +++ b/worker/src/jobs/compute-routing.ts @@ -10,9 +10,8 @@ export type ComputeRoutingData = { category: string; }; -/** Number of nearest POI candidates per grid point (across all subcategories). - * Higher K means more diversity candidates for the complement-product formula. */ -const K = 15; +/** Number of nearest POI candidates per grid point (across all subcategories). */ +const K = 6; /** Grid points per Valhalla matrix call. */ const BATCH_SIZE = 5; /** Concurrent Valhalla calls within this job. */ diff --git a/worker/src/jobs/compute-scores.ts b/worker/src/jobs/compute-scores.ts index 141e78e..355084e 100644 --- a/worker/src/jobs/compute-scores.ts +++ b/worker/src/jobs/compute-scores.ts @@ -73,7 +73,6 @@ export async function handleComputeScores( attempts: 1, removeOnComplete: { age: 86400 * 7 }, removeOnFail: { age: 86400 * 30 }, - jobId: `compute-transit.${citySlug}`, parent: { id: job.id!, queue: queue.qualifiedName }, ignoreDependencyOnFailure: true, }, @@ -91,7 +90,6 @@ export async function handleComputeScores( backoff: { type: "fixed", delay: 3000 }, removeOnComplete: { age: 86400 * 7 }, removeOnFail: { age: 86400 * 30 }, - jobId: `compute-routing.${citySlug}.${mode}.${category}`, parent: { id: job.id!, queue: queue.qualifiedName, @@ -111,7 +109,6 @@ export async function handleComputeScores( backoff: { type: "fixed", delay: 5000 }, removeOnComplete: { age: 86400 * 7 }, removeOnFail: { age: 86400 * 30 }, - jobId: `ingest-boris-ni.${citySlug}`, parent: { id: job.id!, queue: queue.qualifiedName }, ignoreDependencyOnFailure: true, }, @@ -128,7 +125,6 @@ export async function handleComputeScores( backoff: { type: "fixed", delay: 5000 }, removeOnComplete: { age: 86400 * 7 }, removeOnFail: { age: 86400 * 30 }, - jobId: `ingest-boris-hb.${citySlug}`, parent: { id: job.id!, queue: queue.qualifiedName }, ignoreDependencyOnFailure: true, }, @@ -252,10 +248,7 @@ export async function handleComputeScores( 1.0 - COALESCE(pw.weight, ${DEFAULT_SUBCATEGORY_WEIGHT}::float8) * CASE WHEN s.travel_time_s IS NULL THEN 0.0 - ELSE 1.0 / (1.0 + EXP( - (s.travel_time_s - t.threshold_min * 60.0) - / (t.threshold_min * 10.0) - )) + ELSE EXP(-3.0 * s.travel_time_s / (t.threshold_min * 60.0)) END, 1e-10 )) diff --git a/worker/src/jobs/refresh-city.ts b/worker/src/jobs/refresh-city.ts index 75622b0..1ea10da 100644 --- a/worker/src/jobs/refresh-city.ts +++ b/worker/src/jobs/refresh-city.ts @@ -35,7 +35,7 @@ function isInBremen(minLng: number, minLat: number, maxLng: number, maxLat: numb export async function handleRefreshCity( job: Job, ): Promise { - const { citySlug, geofabrikUrl, resolutionM = 200 } = job.data; + const { citySlug, geofabrikUrl, resolutionM = 200, iter = 0 } = job.data; const sql = getSql(); const pbfPath = `${OSM_DATA_DIR}/${citySlug}-latest.osm.pbf`; @@ -135,16 +135,14 @@ export async function handleRefreshCity( ingestBorisNi: niApplicable, ingestBorisHb: hbApplicable, }, - opts: { ...JOB_OPTIONS["compute-scores"], jobId: `compute-scores.${citySlug}` }, + opts: { ...JOB_OPTIONS["compute-scores"], jobId: `compute-scores.${citySlug}.${iter}` }, children: [ { name: "generate-grid", queueName: "pipeline", data: { type: "generate-grid" as const, citySlug, resolutionM }, - opts: { ...JOB_OPTIONS["generate-grid"], jobId: `generate-grid.${citySlug}` }, + opts: { ...JOB_OPTIONS["generate-grid"], jobId: `generate-grid.${citySlug}.${iter}` }, children: [ - // Three parallel branches — all share a single download-pbf job via - // the deterministic jobId; BullMQ deduplicates them to one download. { name: "extract-pois", queueName: "pipeline", @@ -154,7 +152,7 @@ export async function handleRefreshCity( pbfPath, ...(bbox ? { bbox } : {}), }, - opts: { ...JOB_OPTIONS["extract-pois"], jobId: `extract-pois.${citySlug}` }, + opts: { ...JOB_OPTIONS["extract-pois"], jobId: `extract-pois.${citySlug}.${iter}` }, children: [downloadNode()], }, // Road-only Valhalla build — no GTFS, produces clean tiles without @@ -168,7 +166,7 @@ export async function handleRefreshCity( pbfPath, ...(bbox ? { bbox } : {}), }, - opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla.${citySlug}` }, + opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla.${citySlug}.${iter}` }, children: [downloadNode()], }, // Transit Valhalla build — depends on GTFS download. Produces tiles with @@ -182,7 +180,7 @@ export async function handleRefreshCity( pbfPath, ...(bbox ? { bbox } : {}), }, - opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla-transit.${citySlug}` }, + opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla-transit.${citySlug}.${iter}` }, children: [ downloadNode(), // Download GTFS feed before building transit tiles. Idempotent —