fix: change some job ids, make distance score function exponential instead of sigmoid

2026-03-06 16:32:10 +01:00 · 2026-03-06 16:32:10 +01:00 · faa9e48234
commit faa9e48234
parent 25c67b2536
9 changed files with 39 additions and 34 deletions
--- a/README.md
+++ b/README.md
@ -135,21 +135,21 @@ Travel times are obtained from [Valhalla](https://github.com/valhalla/valhalla),
 All scores are precomputed at ingest time for every combination of threshold (5 / 10 / 15 / 20 / 30 min), travel mode, and profile, so interactive queries hit only the database.
-Each subcategory *i* contributes a sigmoid score based on travel time `t` and threshold `T` (both in seconds):
+Each subcategory *i* contributes a proximity score based on travel time `t` and threshold `T` (both in seconds) using exponential decay:
 ```
-sigmoid(t, T) = 1 / (1 + exp(4 × (t − T) / T))
+score(t, T) = exp(−3 × t / T)
 ```
-The sigmoid equals 0.5 exactly at the threshold, approaches 1 for very short times, and approaches 0 for very long times. It is continuous — a 14-minute trip still contributes almost as much as a 10-minute trip under a 15-minute threshold.
+At t = 0 the score is 1.0. At the threshold it is exp(−3) ≈ 0.05 — a POI reachable in exactly the threshold time barely contributes. Close proximity dominates: a third of the threshold away scores ~0.37, halfway scores ~0.22. This ensures that genuinely nearby POIs are rated much more highly than merely reachable ones.
-The category score aggregates all subcategories via a **complement product** weighted by profile-specific importance weights `w_i ∈ [0, 1]`:
+The category score aggregates across subcategories **and** across multiple nearby POIs of the same subcategory via a **complement product** weighted by profile-specific importance weights `w_i ∈ [0, 1]`:
 ```
-category_score = 1 − ∏ (1 − w_i × sigmoid(t_i, T))
+category_score = 1 − ∏ (1 − w_i × score(t_i, T))
 ```
-This captures coverage diversity: one nearby supermarket already yields a high score, but also having a pharmacy and a bakery pushes it higher. Subcategories with no POI found are omitted from the product and do not penalise the score.
+This captures both subcategory coverage (a pharmacy and a supermarket together score higher than either alone) and within-subcategory diversity (a second nearby park still improves the score, with strongly diminishing returns). Subcategories with no POI found contribute nothing and do not penalise the score.
 The **composite score** shown on the heatmap is a weighted average of all five category scores. Category weights come from the selected profile but can be adjusted freely with the UI sliders. Changing the profile, threshold, or travel mode re-queries the database; adjusting the sliders re-blends client-side with no server round-trip.
--- a/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts
+++ b/apps/web/app/api/admin/cities/[slug]/rerun-scores/route.ts
@ -27,8 +27,11 @@ export async function POST(
    return NextResponse.json({ error: "City not found" }, { status: 404 });
  }
-  await Promise.resolve(sql`
+  const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>`
-    UPDATE cities SET status = 'pending', error_message = NULL WHERE slug = ${slug}
+    UPDATE cities SET status = 'pending', error_message = NULL,
      refresh_iter = refresh_iter + 1
    WHERE slug = ${slug}
    RETURNING refresh_iter AS iter
  `);
  const queue = getPipelineQueue();
@ -45,7 +48,7 @@ export async function POST(
      attempts: 1,
      removeOnComplete: { age: 86400 * 7 },
      removeOnFail: { age: 86400 * 30 },
-      jobId: `compute-scores.${slug}`,
+      jobId: `compute-scores.${slug}.${iter}`,
    },
  );
--- a/apps/web/app/api/admin/cities/route.ts
+++ b/apps/web/app/api/admin/cities/route.ts
@ -156,6 +156,11 @@ export async function POST(req: NextRequest) {
    `);
  }
  const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>`
    UPDATE cities SET refresh_iter = refresh_iter + 1 WHERE slug = ${slug as string}
    RETURNING refresh_iter AS iter
  `);
  const queue = getPipelineQueue();
  const job = await queue.add(
    "refresh-city",
@ -164,8 +169,9 @@ export async function POST(req: NextRequest) {
      citySlug: slug as string,
      geofabrikUrl,
      resolutionM: resolutionM as number,
      iter,
    },
-    { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` },
+    { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}.${iter}` },
  );
  // Invalidate city list cache
--- a/apps/web/app/api/admin/ingest/[slug]/route.ts
+++ b/apps/web/app/api/admin/ingest/[slug]/route.ts
@ -20,16 +20,18 @@ export async function POST(
  const { geofabrik_url: geofabrikUrl } = rows[0];
-  // Reset status
+  const [{ iter }] = await Promise.resolve(sql<{ iter: number }[]>`
-  await Promise.resolve(sql`
+    UPDATE cities SET status = 'pending', error_message = NULL,
-    UPDATE cities SET status = 'pending', error_message = NULL WHERE slug = ${slug}
+      refresh_iter = refresh_iter + 1
    WHERE slug = ${slug}
    RETURNING refresh_iter AS iter
  `);
  const queue = getPipelineQueue();
  const job = await queue.add(
    "refresh-city",
-    { type: "refresh-city", citySlug: slug, geofabrikUrl },
+    { type: "refresh-city", citySlug: slug, geofabrikUrl, iter },
-    { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}` },
+    { ...JOB_OPTIONS["refresh-city"], jobId: `refresh-city.${slug}.${iter}` },
  );
  return NextResponse.json({ citySlug: slug, jobId: job.id }, { status: 202 });
--- a/infra/schema.sql
+++ b/infra/schema.sql
@ -22,6 +22,7 @@ CREATE TABLE IF NOT EXISTS cities (
 -- Migration for existing databases
 ALTER TABLE cities ADD COLUMN IF NOT EXISTS resolution_m INTEGER NOT NULL DEFAULT 200;
 ALTER TABLE cities ADD COLUMN IF NOT EXISTS boundary geometry(MultiPolygon, 4326);
 ALTER TABLE cities ADD COLUMN IF NOT EXISTS refresh_iter SMALLINT NOT NULL DEFAULT 0;
 CREATE INDEX IF NOT EXISTS idx_cities_bbox ON cities USING GIST (bbox);
 CREATE INDEX IF NOT EXISTS idx_cities_boundary ON cities USING GIST (boundary);
--- a/shared/src/queue.ts
+++ b/shared/src/queue.ts
@ -64,6 +64,9 @@ export interface RefreshCityJobData {
  citySlug: string;
  geofabrikUrl: string;
  resolutionM?: number;
  /** Monotonically increasing counter incremented at each trigger; used in jobIds
   *  to prevent completed-job deduplication on re-runs. */
  iter: number;
  /** ID of the compute-scores job enqueued for this refresh; set after flow.add(). */
  computeScoresJobId?: string;
 }
--- a/worker/src/jobs/compute-routing.ts
+++ b/worker/src/jobs/compute-routing.ts
@ -10,9 +10,8 @@ export type ComputeRoutingData = {
  category: string;
 };
-/** Number of nearest POI candidates per grid point (across all subcategories).
+/** Number of nearest POI candidates per grid point (across all subcategories). */
- *  Higher K means more diversity candidates for the complement-product formula. */
+const K = 6;
 const K = 15;
 /** Grid points per Valhalla matrix call. */
 const BATCH_SIZE = 5;
 /** Concurrent Valhalla calls within this job. */
--- a/worker/src/jobs/compute-scores.ts
+++ b/worker/src/jobs/compute-scores.ts
@ -73,7 +73,6 @@ export async function handleComputeScores(
            attempts: 1,
            removeOnComplete: { age: 86400 * 7 },
            removeOnFail: { age: 86400 * 30 },
            jobId: `compute-transit.${citySlug}`,
            parent: { id: job.id!, queue: queue.qualifiedName },
            ignoreDependencyOnFailure: true,
          },
@ -91,7 +90,6 @@ export async function handleComputeScores(
              backoff: { type: "fixed", delay: 3000 },
              removeOnComplete: { age: 86400 * 7 },
              removeOnFail: { age: 86400 * 30 },
              jobId: `compute-routing.${citySlug}.${mode}.${category}`,
              parent: {
                id: job.id!,
                queue: queue.qualifiedName,
@ -111,7 +109,6 @@ export async function handleComputeScores(
            backoff: { type: "fixed", delay: 5000 },
            removeOnComplete: { age: 86400 * 7 },
            removeOnFail: { age: 86400 * 30 },
            jobId: `ingest-boris-ni.${citySlug}`,
            parent: { id: job.id!, queue: queue.qualifiedName },
            ignoreDependencyOnFailure: true,
          },
@ -128,7 +125,6 @@ export async function handleComputeScores(
            backoff: { type: "fixed", delay: 5000 },
            removeOnComplete: { age: 86400 * 7 },
            removeOnFail: { age: 86400 * 30 },
            jobId: `ingest-boris-hb.${citySlug}`,
            parent: { id: job.id!, queue: queue.qualifiedName },
            ignoreDependencyOnFailure: true,
          },
@ -252,10 +248,7 @@ export async function handleComputeScores(
            1.0 - COALESCE(pw.weight, ${DEFAULT_SUBCATEGORY_WEIGHT}::float8)
              * CASE
                  WHEN s.travel_time_s IS NULL THEN 0.0
-                  ELSE 1.0 / (1.0 + EXP(
+                  ELSE EXP(-3.0 * s.travel_time_s / (t.threshold_min * 60.0))
                    (s.travel_time_s - t.threshold_min * 60.0)
                    / (t.threshold_min * 10.0)
                  ))
                END,
            1e-10
          ))
--- a/worker/src/jobs/refresh-city.ts
+++ b/worker/src/jobs/refresh-city.ts
@ -35,7 +35,7 @@ function isInBremen(minLng: number, minLat: number, maxLng: number, maxLat: numb
 export async function handleRefreshCity(
  job: Job<RefreshCityData>,
 ): Promise<void> {
-  const { citySlug, geofabrikUrl, resolutionM = 200 } = job.data;
+  const { citySlug, geofabrikUrl, resolutionM = 200, iter = 0 } = job.data;
  const sql = getSql();
  const pbfPath = `${OSM_DATA_DIR}/${citySlug}-latest.osm.pbf`;
@ -135,16 +135,14 @@ export async function handleRefreshCity(
      ingestBorisNi: niApplicable,
      ingestBorisHb: hbApplicable,
    },
-    opts: { ...JOB_OPTIONS["compute-scores"], jobId: `compute-scores.${citySlug}` },
+    opts: { ...JOB_OPTIONS["compute-scores"], jobId: `compute-scores.${citySlug}.${iter}` },
    children: [
      {
        name: "generate-grid",
        queueName: "pipeline",
        data: { type: "generate-grid" as const, citySlug, resolutionM },
-        opts: { ...JOB_OPTIONS["generate-grid"], jobId: `generate-grid.${citySlug}` },
+        opts: { ...JOB_OPTIONS["generate-grid"], jobId: `generate-grid.${citySlug}.${iter}` },
        children: [
          // Three parallel branches — all share a single download-pbf job via
          // the deterministic jobId; BullMQ deduplicates them to one download.
          {
            name: "extract-pois",
            queueName: "pipeline",
@ -154,7 +152,7 @@ export async function handleRefreshCity(
              pbfPath,
              ...(bbox ? { bbox } : {}),
            },
-            opts: { ...JOB_OPTIONS["extract-pois"], jobId: `extract-pois.${citySlug}` },
+            opts: { ...JOB_OPTIONS["extract-pois"], jobId: `extract-pois.${citySlug}.${iter}` },
            children: [downloadNode()],
          },
          // Road-only Valhalla build — no GTFS, produces clean tiles without
@ -168,7 +166,7 @@ export async function handleRefreshCity(
              pbfPath,
              ...(bbox ? { bbox } : {}),
            },
-            opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla.${citySlug}` },
+            opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla.${citySlug}.${iter}` },
            children: [downloadNode()],
          },
          // Transit Valhalla build — depends on GTFS download. Produces tiles with
@ -182,7 +180,7 @@ export async function handleRefreshCity(
              pbfPath,
              ...(bbox ? { bbox } : {}),
            },
-            opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla-transit.${citySlug}` },
+            opts: { ...JOB_OPTIONS["build-valhalla"], jobId: `build-valhalla-transit.${citySlug}.${iter}` },
            children: [
              downloadNode(),
              // Download GTFS feed before building transit tiles. Idempotent —