From f7bea8fb75febcc66e8637d59139feef75c1999f Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Wed, 1 Jul 2026 16:20:01 +0100 Subject: [PATCH 01/15] =?UTF-8?q?feat(run-ops):=20webapp=20write=20path=20?= =?UTF-8?q?=E2=80=94=20trigger/batch=20minting,=20idempotency=20routing,?= =?UTF-8?q?=20run=20lifecycle?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-Authored-By: Claude Opus 4.8 (1M context) --- .../concerns/idempotencyKeys.server.ts | 91 +- .../idempotencyResidency.server.test.ts | 100 +++ .../concerns/idempotencyResidency.server.ts | 56 ++ ...solveWaitpointThroughReadThrough.server.ts | 35 + .../runEngine/services/batchTrigger.server.ts | 104 +-- .../runEngine/services/createBatch.server.ts | 51 +- .../services/streamBatchItems.server.ts | 42 +- .../services/triggerFailedTask.server.ts | 75 +- .../services/triggerTask.server.test.ts | 832 ++++++++++++++++++ .../runEngine/services/triggerTask.server.ts | 60 +- .../app/services/archiveBranch.server.ts | 3 + .../app/services/dashboardAgent.server.ts | 9 +- .../app/services/deleteProject.server.ts | 5 + .../app/services/realtime/runReader.server.ts | 2 +- .../app/services/realtime/sessions.server.ts | 50 +- .../clickhouseRunsRepository.server.ts | 188 ++-- .../runsRepository/runsRepository.server.ts | 42 +- .../v3/services/alerts/deliverAlert.server.ts | 119 ++- .../alerts/performTaskRunAlerts.server.ts | 46 +- .../app/v3/services/batchTriggerV3.server.ts | 344 +++++--- ...lkActionV2.batchReadThrough.server.test.ts | 125 +++ .../BulkActionV2.batchReadThrough.server.ts | 119 +++ .../v3/services/bulk/BulkActionV2.server.ts | 87 +- .../services/cancelDevSessionRuns.server.ts | 74 +- .../v3/services/createCheckpoint.server.ts | 29 +- .../services/createTaskRunAttempt.server.ts | 39 +- .../v3/services/enqueueDelayedRun.server.ts | 16 +- .../services/executeTasksWaitingForDeploy.ts | 27 +- .../v3/services/expireEnqueuedRun.server.ts | 35 +- .../app/v3/services/finalizeTaskRun.server.ts | 37 +- .../app/v3/services/resumeBatchRun.server.ts | 114 ++- ...batchTriggerV3ResidencyInheritance.test.ts | 148 ++++ .../test/batchTriggerV3StoreRouting.test.ts | 252 ++++++ .../test/bulkActionV2ReadRouting.test.ts | 242 +++++ .../cancelDevSessionRunsStoreRouting.test.ts | 245 ++++++ .../test/engine/streamBatchItems.test.ts | 84 +- .../test/engine/triggerFailedTask.test.ts | 309 +++++++ apps/webapp/test/engine/triggerTask.test.ts | 317 ++++++- .../test/idempotencyDedupResidency.test.ts | 179 ++++ ...empotencyKeyConcernLegacyAuthority.test.ts | 301 +++++++ .../performTaskRunAlertsStoreRouting.test.ts | 349 ++++++++ .../realtime/runReaderReadThrough.test.ts | 493 +++++++++++ .../streamRegistrationRouting.test.ts | 240 +++++ ...resetIdempotencyKeyLegacyAuthority.test.ts | 220 +++++ ...ointThroughReadThrough.readthrough.test.ts | 216 +++++ .../runEngineBatchTriggerStoreRouting.test.ts | 172 ++++ .../test/runsRepository.readthrough.test.ts | 435 +++++++++ apps/webapp/test/runsRepositoryCpres.test.ts | 260 ++++++ apps/webapp/test/sessions.readthrough.test.ts | 347 ++++++++ .../test/streamLoader.controlPlane.test.ts | 114 +++ 50 files changed, 7230 insertions(+), 649 deletions(-) create mode 100644 apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts create mode 100644 apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts create mode 100644 apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts create mode 100644 apps/webapp/app/runEngine/services/triggerTask.server.test.ts create mode 100644 apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts create mode 100644 apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts create mode 100644 apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts create mode 100644 apps/webapp/test/batchTriggerV3StoreRouting.test.ts create mode 100644 apps/webapp/test/bulkActionV2ReadRouting.test.ts create mode 100644 apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts create mode 100644 apps/webapp/test/engine/triggerFailedTask.test.ts create mode 100644 apps/webapp/test/idempotencyDedupResidency.test.ts create mode 100644 apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts create mode 100644 apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts create mode 100644 apps/webapp/test/realtime/runReaderReadThrough.test.ts create mode 100644 apps/webapp/test/realtime/streamRegistrationRouting.test.ts create mode 100644 apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts create mode 100644 apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts create mode 100644 apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts create mode 100644 apps/webapp/test/runsRepository.readthrough.test.ts create mode 100644 apps/webapp/test/runsRepositoryCpres.test.ts create mode 100644 apps/webapp/test/sessions.readthrough.test.ts create mode 100644 apps/webapp/test/streamLoader.controlPlane.test.ts diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 3e049ceb37e..1a50942b4e5 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -11,6 +11,10 @@ import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.se import { claimOrAwait } from "~/v3/mollifier/idempotencyClaim.server"; import { makeResolveMollifierFlag } from "~/v3/mollifier/mollifierGate.server"; import { runStore } from "~/v3/runStore.server"; +import { runOpsLegacyPrisma, runOpsNewPrisma } from "~/db.server"; +import { isSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { resolveIdempotencyDedupClient } from "./idempotencyResidency.server"; import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // In-memory per-org mollifier-enabled check, shared with `evaluateGate` @@ -147,6 +151,28 @@ export class IdempotencyKeyConcern { return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } + // Probe and clears must hit the DB where the would-be run will physically live. + const dedupClient = await resolveIdempotencyDedupClient( + { + environmentForMint: { + organizationId: request.environment.organizationId, + id: request.environment.id, + orgFeatureFlags: request.environment.organization?.featureFlags, + }, + parentRunFriendlyId: request.body.options?.parentRunId, + }, + { + isSplitEnabled, + fallbackClient: this.prisma, + newClient: runOpsNewPrisma, + legacyClient: runOpsLegacyPrisma, + resolveMintKind: resolveRunIdMintKind, + // `isMigrated` is intentionally omitted: until a child of a swept + // legacy-id parent can be born on the new DB, the swept-marker override + // would never change the answer, so a child routes by parent id-shape. + } + ); + const existingRun = idempotencyKey ? await runStore.findRun( { @@ -159,7 +185,7 @@ export class IdempotencyKeyConcern { associatedWaitpoint: true, }, }, - this.prisma + dedupClient ) : undefined; @@ -193,7 +219,7 @@ export class IdempotencyKeyConcern { // Update the existing run to remove the idempotency key await runStore.clearIdempotencyKey( { byId: { runId: existingRun.id, idempotencyKey } }, - this.prisma + dedupClient ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; @@ -210,7 +236,7 @@ export class IdempotencyKeyConcern { // Update the existing run to remove the idempotency key await runStore.clearIdempotencyKey( { byId: { runId: existingRun.id, idempotencyKey } }, - this.prisma + dedupClient ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; @@ -249,7 +275,6 @@ export class IdempotencyKeyConcern { ? `${event.traceparent.spanId}:${event.spanId}` : event.spanId; - //block run with waitpoint await this.engine.blockRunWithWaitpoint({ runId: RunId.fromFriendlyId(parentRunId), waitpoints: associatedWaitpoint!.id, @@ -277,24 +302,13 @@ export class IdempotencyKeyConcern { // (resumeParentOnCompletion) — that path bypasses the gate entirely // and its existing PG-side dedup is sufficient. // - // Also gated on the same per-org mollifier flag the gate uses: when - // `TRIGGER_MOLLIFIER_ENABLED=1` globally for staged rollout, the buffer - // singleton is constructed and `claimOrAwait` would otherwise issue a - // Redis SETNX for EVERY idempotency-keyed trigger — including orgs - // that haven't opted in. Those orgs never enter the mollify branch - // (the gate always returns pass_through for them), so there's no - // buffer activity to serialise against; PG's unique constraint - // already deduplicates concurrent same-key races. Resolving the org - // flag is a pure in-memory read of `Organization.featureFlags` — no - // DB query, same predicate the gate uses — keeping the claim's Redis - // RTT off the hot path for non-opted-in orgs during incremental - // rollout. - // Match the gate's bypass list (`mollifierGate.server.ts:158-175`). - // debounce + oneTimeUseToken triggers always return pass_through from - // the gate, so claiming a Redis SETNX here is wasted RTT on the - // trigger hot path. Excluding them keeps the claim aligned with the - // gate — if the gate would never mollify the request, there's no - // buffer to serialise against. + // Gated on the same per-org mollifier flag the gate uses, and the same + // bypass list (debounce + oneTimeUseToken): if the gate would never mollify + // the request, there's no buffer to serialise against and PG's unique + // constraint already deduplicates concurrent same-key races. Skipping the + // claim's Redis SETNX keeps its RTT off the hot path for those requests + // during staged rollout. The org-flag check is a pure in-memory read of + // `Organization.featureFlags`, no DB query. const claimEligible = !request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && @@ -336,7 +350,7 @@ export class IdempotencyKeyConcern { taskIdentifier: request.taskId, }, { include: { associatedWaitpoint: true } }, - this.prisma + dedupClient ); if (writerRun) { return { isCached: true, run: writerRun }; @@ -350,27 +364,18 @@ export class IdempotencyKeyConcern { if (buffered) { return { isCached: true, run: buffered }; } - // Claim resolved to a runId nothing can find — the run was - // genuinely lost (claimant errored after publish, drain failed, - // or both the PG row and buffer entry TTL'd out). This is - // terminal, not transient: `lookupIdempotency` self-heals a - // dangling pointer, and `ack` keeps the entry hash as a - // read-fallback past the PG write, so re-polling cannot conjure - // a run that is gone. Falling through to a fresh trigger is the - // correct recovery. + // Claim resolved to a runId nothing can find — the run was genuinely + // lost (claimant errored after publish, or both the PG row and buffer + // entry TTL'd out). Terminal, not transient, so falling through to a + // fresh trigger is the correct recovery. // - // Why falling through claimless is safe (no duplicate runs): - // concurrent triggers that also fall through here converge on a - // single run via the same dedup backstops the claim layer relies - // on — the PG unique constraint on the idempotency key - // (RunDuplicateIdempotencyKeyError → retry resolves to the - // winner) for the pass-through path, and `accept`'s idempotency - // SETNX (`duplicate_idempotency`) for the mollify path. Once the - // first fall-through commits a run, later callers find it via the - // writer-PG / buffer lookups above despite the stale `resolved:` - // slot, which the slot's TTL clears within ~30s. The residual - // cost is a few redundant (deduped) trigger attempts in that - // window, not duplicate runs. + // Falling through claimless doesn't duplicate runs: concurrent + // fall-throughs converge on one run via the same dedup backstops the + // claim layer relies on — PG's unique constraint on the idempotency key + // (pass-through path) and `accept`'s SETNX (mollify path). Once the + // first commits, later callers find it via the writer-PG / buffer + // lookups above despite the stale `resolved:` slot (cleared by its ~30s + // TTL). Residual cost is a few deduped trigger attempts, not dup runs. logger.warn("idempotency claim resolved but runId not findable", { envId: request.environment.id, taskIdentifier: request.taskId, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts new file mode 100644 index 00000000000..ee128224d10 --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { + resolveIdempotencyDedupClient, + type ResolveIdempotencyClientDeps, +} from "./idempotencyResidency.server"; + +// Distinct sentinel objects so we can assert WHICH client was selected by reference. +const FALLBACK = { __tag: "fallback" } as never; +const NEW_CLIENT = { __tag: "new" } as never; +const LEGACY_CLIENT = { __tag: "legacy" } as never; + +function makeDeps(over: Partial): ResolveIdempotencyClientDeps { + return { + isSplitEnabled: async () => true, + fallbackClient: FALLBACK, + newClient: NEW_CLIENT, + legacyClient: LEGACY_CLIENT, + resolveMintKind: async () => "ksuid", + classify: (id) => { + if (id.length === 27) return "NEW"; + if (id.length === 25) return "LEGACY"; + throw new Error(`unclassifiable: ${id.length}`); + }, + isMigrated: undefined, + ...over, + }; +} + +const env = { organizationId: "org_1", id: "env_1", orgFeatureFlags: {} }; + +describe("resolveIdempotencyDedupClient", () => { + it("returns the fallback client unchanged when split is disabled", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ isSplitEnabled: async () => false }) + ); + expect(client).toBe(FALLBACK); + }); + + it("routes a root run to the NEW client when the env mints ksuid", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ resolveMintKind: async () => "ksuid" }) + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a root run to the LEGACY client when the env mints cuid", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ resolveMintKind: async () => "cuid" }) + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("routes a child to the NEW client when the ksuid parent is NEW-resident", async () => { + const ksuidParent = RunId.toFriendlyId("a".repeat(27)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: ksuidParent }, + makeDeps({ resolveMintKind: async () => "cuid" }) // mint flag must NOT win for a child + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a child to the LEGACY client when the cuid parent is LEGACY-resident", async () => { + const cuidParent = RunId.toFriendlyId("b".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ resolveMintKind: async () => "ksuid" }) // mint flag must NOT win for a child + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("routes a swept (migrated) cuid-parent child to the NEW client", async () => { + const cuidParent = RunId.toFriendlyId("c".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ isMigrated: async () => true }) + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a non-migrated cuid-parent child to the LEGACY client even when isMigrated is provided", async () => { + const cuidParent = RunId.toFriendlyId("d".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ isMigrated: async () => false }) + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("falls back to the fallback client when a present parent id is unclassifiable", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: "run_not-a-valid-length" }, + makeDeps({}) + ); + expect(client).toBe(FALLBACK); + }); +}); diff --git a/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts new file mode 100644 index 00000000000..38ef4755844 --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts @@ -0,0 +1,56 @@ +import { ownerEngine, RunId, type Residency } from "@trigger.dev/core/v3/isomorphic"; +import type { PrismaClientOrTransaction } from "@trigger.dev/database"; + +type MintKind = "cuid" | "ksuid"; + +export type ResolveIdempotencyClientDeps = { + isSplitEnabled: () => Promise; + fallbackClient: PrismaClientOrTransaction; + newClient: PrismaClientOrTransaction; + legacyClient: PrismaClientOrTransaction; + resolveMintKind: (environment: { + organizationId: string; + id: string; + orgFeatureFlags?: unknown; + }) => Promise; + classify?: (id: string) => Residency; + isMigrated?: (id: string) => Promise; +}; + +export async function resolveIdempotencyDedupClient( + args: { + environmentForMint: { organizationId: string; id: string; orgFeatureFlags?: unknown }; + parentRunFriendlyId: string | undefined; + }, + deps: ResolveIdempotencyClientDeps +): Promise { + if (!(await deps.isSplitEnabled())) { + return deps.fallbackClient; + } + + const classify = deps.classify ?? ownerEngine; + const clientFor = (residency: Residency): PrismaClientOrTransaction => + residency === "NEW" ? deps.newClient : deps.legacyClient; + + if (args.parentRunFriendlyId) { + let parentInternalId: string; + try { + parentInternalId = RunId.fromFriendlyId(args.parentRunFriendlyId); + } catch { + return deps.fallbackClient; + } + let residency: Residency; + try { + residency = classify(parentInternalId); + } catch { + return deps.fallbackClient; + } + if (residency === "LEGACY" && deps.isMigrated && (await deps.isMigrated(parentInternalId))) { + return deps.newClient; + } + return clientFor(residency); + } + + const kind = await deps.resolveMintKind(args.environmentForMint); + return clientFor(kind === "ksuid" ? "NEW" : "LEGACY"); +} diff --git a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts new file mode 100644 index 00000000000..d1bcaea9b6e --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts @@ -0,0 +1,35 @@ +import type { PrismaReplicaClient } from "~/db.server"; +import { $replica } from "~/db.server"; +import { readThroughRun } from "~/v3/runOpsMigration/readThrough.server"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; + +type ResolveWaitpointDeps = { + newClient?: PrismaReplicaClient; + legacyReplica?: PrismaReplicaClient; + splitEnabled?: boolean; + isKnownMigrated?: (id: string) => Promise; + isPastRetention?: (id: string) => boolean; +}; + +export async function resolveWaitpointThroughReadThrough(opts: { + waitpointId: string; + environmentId: string; + read: (client: PrismaReplicaClient) => Promise; + deps?: ResolveWaitpointDeps; +}): Promise { + const result = await readThroughRun({ + runId: opts.waitpointId, + environmentId: opts.environmentId, + readNew: (client) => opts.read(client), + readLegacy: (replica) => opts.read(replica), + deps: { + splitEnabled: opts.deps?.splitEnabled, + newClient: opts.deps?.newClient ?? $replica, + legacyReplica: opts.deps?.legacyReplica ?? $replica, + isKnownMigrated: opts.deps?.isKnownMigrated ?? defaultIsKnownMigrated, + isPastRetention: opts.deps?.isPastRetention, + }, + }); + + return result.source === "new" || result.source === "legacy-replica" ? result.value : null; +} diff --git a/apps/webapp/app/runEngine/services/batchTrigger.server.ts b/apps/webapp/app/runEngine/services/batchTrigger.server.ts index 54a819770ef..1a54815a26b 100644 --- a/apps/webapp/app/runEngine/services/batchTrigger.server.ts +++ b/apps/webapp/app/runEngine/services/batchTrigger.server.ts @@ -13,13 +13,17 @@ import { Evt } from "evt"; import { z } from "zod"; import { prisma, type PrismaClientOrTransaction } from "~/db.server"; import { env } from "~/env.server"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { batchTriggerWorker } from "~/v3/batchTriggerWorker.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { downloadPacketFromObjectStore, uploadPacketToObjectStore, } from "../../v3/objectStore.server"; +import type { RunEngine } from "../../v3/runEngine.server"; import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseService.server"; import { TriggerTaskService } from "../../v3/services/triggerTask.server"; import { startActiveSpan } from "../../v3/tracer.server"; @@ -64,9 +68,10 @@ export class RunEngineBatchTriggerService extends WithRunEngine { constructor( batchProcessingStrategy?: BatchProcessingStrategy, - protected readonly _prisma: PrismaClientOrTransaction = prisma + protected readonly _prisma: PrismaClientOrTransaction = prisma, + engine?: RunEngine ) { - super({ prisma }); + super({ prisma, engine }); // Eric note: We need to force sequential processing because when doing parallel, we end up with high-contention on the parent run lock // becuase we are triggering a lot of runs at once, and each one is trying to lock the parent run. @@ -84,11 +89,17 @@ export class RunEngineBatchTriggerService extends WithRunEngine { "call()", environment, async (span) => { - const { id: _id, friendlyId } = BatchId.generate(); + const { friendlyId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", friendlyId); - // Upload to object store const payloadPacket = await this.#handlePayloadPacket( body.items, `batch/${friendlyId}`, @@ -155,20 +166,22 @@ export class RunEngineBatchTriggerService extends WithRunEngine { body: BatchTriggerTaskV2RequestBody, options: BatchTriggerTaskServiceOptions = {} ) { + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side (covers both create arms below). + await controlPlaneResolver.assertEnvExists(environment.id); + if (body.items.length <= ASYNC_BATCH_PROCESS_SIZE_THRESHOLD) { - const batch = await this._prisma.batchTaskRun.create({ - data: { - id: BatchId.fromFriendlyId(batchId), - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - runCount: body.items.length, - runIds: [], - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "runengine:v1", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this._engine.runStore.createBatchTaskRun({ + id: BatchId.fromFriendlyId(batchId), + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + runCount: body.items.length, + runIds: [], + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "runengine:v1", + oneTimeUseToken: options.oneTimeUseToken, }); this.onBatchTaskRunCreated.post(batch); @@ -249,19 +262,17 @@ export class RunEngineBatchTriggerService extends WithRunEngine { } } } else { - const batch = await this._prisma.batchTaskRun.create({ - data: { - id: BatchId.fromFriendlyId(batchId), - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - runCount: body.items.length, - runIds: [], - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "runengine:v1", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this._engine.runStore.createBatchTaskRun({ + id: BatchId.fromFriendlyId(batchId), + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + runCount: body.items.length, + runIds: [], + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "runengine:v1", + oneTimeUseToken: options.oneTimeUseToken, }); this.onBatchTaskRunCreated.post(batch); @@ -336,7 +347,6 @@ export class RunEngineBatchTriggerService extends WithRunEngine { const $attemptCount = options.attemptCount + 1; - // Add early return if max attempts reached if ($attemptCount > MAX_ATTEMPTS) { logger.error("[RunEngineBatchTrigger][processBatchTaskRun] Max attempts reached", { options, @@ -346,23 +356,22 @@ export class RunEngineBatchTriggerService extends WithRunEngine { return; } - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { id: options.batchId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - }, - }); + const batch = await this._engine.runStore.findBatchTaskRunById(options.batchId); if (!batch) { return; } - // Check to make sure the currentIndex is not greater than the runCount + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batch.runtimeEnvironmentId); + if (!environment) { + logger.error("[RunEngineBatchTrigger][processBatchTaskRun] Environment not found", { + batchId: batch.id, + runtimeEnvironmentId: batch.runtimeEnvironmentId, + }); + return; + } + if (options.range.start >= batch.runCount) { logger.debug( "[RunEngineBatchTrigger][processBatchTaskRun] currentIndex is greater than runCount", @@ -377,13 +386,12 @@ export class RunEngineBatchTriggerService extends WithRunEngine { return; } - // Resolve the payload const payloadPacket = await downloadPacketFromObjectStore( { data: batch.payload ?? undefined, dataType: batch.payloadType, }, - batch.runtimeEnvironment + environment ); const payload = await parsePacket(payloadPacket); @@ -404,7 +412,7 @@ export class RunEngineBatchTriggerService extends WithRunEngine { const result = await this.#processBatchTaskRunItems({ batch, - environment: batch.runtimeEnvironment, + environment, currentIndex: options.range.start, batchSize: options.range.count, items: $payload, @@ -609,8 +617,7 @@ export class RunEngineBatchTriggerService extends WithRunEngine { workingIndex++; } - //add the run ids to the batch - const updatedBatch = await this._prisma.batchTaskRun.update({ + const updatedBatch = await this._engine.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { runIds: { @@ -626,7 +633,6 @@ export class RunEngineBatchTriggerService extends WithRunEngine { }, }); - //triggered all the runs if (updatedBatch.processingJobsCount >= updatedBatch.runCount) { logger.debug("[RunEngineBatchTrigger][processBatchTaskRun] All runs created", { batchId: batch.friendlyId, diff --git a/apps/webapp/app/runEngine/services/createBatch.server.ts b/apps/webapp/app/runEngine/services/createBatch.server.ts index 0653e1ef1c2..f738b07997e 100644 --- a/apps/webapp/app/runEngine/services/createBatch.server.ts +++ b/apps/webapp/app/runEngine/services/createBatch.server.ts @@ -6,6 +6,8 @@ import { Evt } from "evt"; import { prisma, type PrismaClientOrTransaction } from "~/db.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseService.server"; import { BatchRateLimitExceededError, getBatchLimits } from "../concerns/batchLimits.server"; import { DefaultQueueManager } from "../concerns/queues.server"; @@ -58,12 +60,18 @@ export class CreateBatchService extends WithRunEngine { "createBatch()", environment, async (span) => { - const { id, friendlyId } = BatchId.generate(); + const { id, friendlyId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", friendlyId); span.setAttribute("runCount", body.runCount); - // Validate entitlement const entitlementValidation = await this.validator.validateEntitlement({ environment, }); @@ -72,14 +80,11 @@ export class CreateBatchService extends WithRunEngine { throw entitlementValidation.error; } - // Extract plan type from entitlement validation for billing tracking const planType = entitlementValidation.plan?.type; - // Get batch limits for this organization const { config, rateLimiter } = await getBatchLimits(environment.organization); - // Check rate limit BEFORE creating the batch - // This prevents burst creation of batches that exceed the rate limit + // Rate-limit before creating the batch, to stop bursts exceeding the limit. const rateResult = await rateLimiter.limit(environment.id, body.runCount); if (!rateResult.success) { @@ -94,23 +99,23 @@ export class CreateBatchService extends WithRunEngine { // Note: Queue size limits are validated per-queue when batch items are processed, // since we don't know which queues items will go to until they're streamed. - // Create BatchTaskRun in Postgres with PENDING status - // The batch will be sealed (status -> PROCESSING) when items are streamed - const batch = await this._prisma.batchTaskRun.create({ - data: { - id, - friendlyId, - runtimeEnvironmentId: environment.id, - status: "PENDING", - runCount: body.runCount, - expectedCount: body.runCount, - runIds: [], - batchVersion: "runengine:v2", // 2-phase streaming batch API - oneTimeUseToken: options.oneTimeUseToken, - idempotencyKey: body.idempotencyKey, - // Not sealed yet - will be sealed when items stream completes - sealed: false, - }, + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side (passthrough when split is off). + await controlPlaneResolver.assertEnvExists(environment.id); + + // Created PENDING; sealed (status -> PROCESSING) once items are streamed. + const batch = await this._engine.runStore.createBatchTaskRun({ + id, + friendlyId, + runtimeEnvironmentId: environment.id, + status: "PENDING", + runCount: body.runCount, + expectedCount: body.runCount, + runIds: [], + batchVersion: "runengine:v2", // 2-phase streaming batch API + oneTimeUseToken: options.oneTimeUseToken, + idempotencyKey: body.idempotencyKey, + sealed: false, }); this.onBatchTaskRunCreated.post(batch); diff --git a/apps/webapp/app/runEngine/services/streamBatchItems.server.ts b/apps/webapp/app/runEngine/services/streamBatchItems.server.ts index fd229777c10..0011975d6d0 100644 --- a/apps/webapp/app/runEngine/services/streamBatchItems.server.ts +++ b/apps/webapp/app/runEngine/services/streamBatchItems.server.ts @@ -13,7 +13,7 @@ import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseSer import { BatchPayloadProcessor } from "../concerns/batchPayloads.server"; /** - * Phase 2 retry idempotency check (TRI-9944). + * Phase 2 retry idempotency check. * * Returns true when the batch is in a state that means the Phase 2 stream's * job has already been done — every item has a TaskRun record (real or @@ -128,24 +128,12 @@ export class StreamBatchItemsService extends WithRunEngine { // Convert friendly ID to internal ID const batchId = this.parseBatchFriendlyId(batchFriendlyId); - // Validate batch exists and belongs to this environment - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { - id: batchId, - runtimeEnvironmentId: environment.id, - }, - select: { - id: true, - friendlyId: true, - status: true, - runCount: true, - sealed: true, - batchVersion: true, - processingCompletedAt: true, - }, - }); + // Validate batch exists and belongs to this environment. Routed by batch id so a + // ksuid (NEW-resident) batch is found on the owning DB; the env-ownership check that + // was in the where clause is enforced app-side below. + const batch = await this._engine.runStore.findBatchTaskRunById(batchId); - if (!batch) { + if (!batch || batch.runtimeEnvironmentId !== environment.id) { throw new ServiceValidationError(`Batch ${batchFriendlyId} not found`); } @@ -215,10 +203,7 @@ export class StreamBatchItemsService extends WithRunEngine { // milliseconds between the loop ending and getBatchEnqueuedCount() being called. // Check both sealed (sealed by this endpoint on a concurrent request) and // COMPLETED (sealed by the BatchQueue completion path before we got here). - const currentBatch = await this._prisma.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { sealed: true, status: true, processingCompletedAt: true }, - }); + const currentBatch = await this._engine.runStore.findBatchTaskRunById(batchId); if ( isIdempotentRetrySuccess( @@ -279,7 +264,7 @@ export class StreamBatchItemsService extends WithRunEngine { // Seal the batch - use conditional update to prevent TOCTOU race // Another concurrent request may have already sealed this batch const now = new Date(); - const sealResult = await this._prisma.batchTaskRun.updateMany({ + const sealResult = await this._engine.runStore.updateManyBatchTaskRun({ where: { id: batchId, sealed: false, @@ -306,16 +291,7 @@ export class StreamBatchItemsService extends WithRunEngine { // batch-queue/index.ts. // Either way the goal — a durable batch that the SDK stops retrying — // has been achieved, so we return sealed: true. - const currentBatch = await this._prisma.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { - id: true, - friendlyId: true, - status: true, - sealed: true, - processingCompletedAt: true, - }, - }); + const currentBatch = await this._engine.runStore.findBatchTaskRunById(batchId); if ( isIdempotentRetrySuccess( diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 2fc0fb750b9..30dd587aeeb 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -1,6 +1,6 @@ import type { RunEngine } from "@internal/run-engine"; import { TaskRunErrorCodes, type TaskRunError } from "@trigger.dev/core/v3"; -import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { RunId, generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; import type { PrismaClientOrTransaction, RuntimeEnvironmentType, @@ -8,6 +8,10 @@ import type { } from "@trigger.dev/database"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; +import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import { getEventRepository } from "~/v3/eventRepository/index.server"; import { runStore } from "~/v3/runStore.server"; import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; @@ -58,26 +62,74 @@ export class TriggerFailedTaskService { private readonly prisma: PrismaClientOrTransaction; private readonly replicaPrisma: PrismaClientOrTransaction; private readonly engine: RunEngine; + // Reports whether a run that is legacy by id-shape has already been moved to + // the new store. Injected for tests; defaults to the live resolver. + private readonly isKnownMigrated: (runId: string) => Promise; + // Injected so the migrated-marker read stays off the hot path when split is off + // (same guard as RunEngineTriggerTaskService); defaults to the live resolver. + private readonly isSplitEnabled: () => Promise; constructor(opts: { prisma: PrismaClientOrTransaction; engine: RunEngine; replicaPrisma?: PrismaClientOrTransaction; + isKnownMigrated?: (runId: string) => Promise; + isSplitEnabled?: () => Promise; }) { this.prisma = opts.prisma; this.replicaPrisma = opts.replicaPrisma ?? opts.prisma; this.engine = opts.engine; + this.isKnownMigrated = opts.isKnownMigrated ?? defaultIsKnownMigrated; + this.isSplitEnabled = opts.isSplitEnabled ?? defaultIsSplitEnabled; + } + + // Mint a failed run's friendlyId. The id-kind decides which store the run is + // born in (cuid → legacy store, ksuid → new store); the whole subgraph of a + // run must agree. Root failed runs mint by the environment's setting; child + // failed runs inherit the parent's current store so they never split. + private async mintFailedRunFriendlyId(args: { + organizationId: string; + environmentId: string; + orgFeatureFlags?: unknown; + parentRunFriendlyId?: string; + }): Promise { + const mintKind = args.parentRunFriendlyId + ? await resolveInheritedMintKind(args.parentRunFriendlyId, { + isSplitEnabled: this.isSplitEnabled, + isKnownMigrated: this.isKnownMigrated, + }) + : await resolveRunIdMintKind({ + organizationId: args.organizationId, + id: args.environmentId, + orgFeatureFlags: args.orgFeatureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; } async call(request: TriggerFailedTaskRequest): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; const taskRunError: TaskRunError = { type: "INTERNAL_ERROR" as const, code: request.errorCode ?? TaskRunErrorCodes.UNSPECIFIED_ERROR, message: request.errorMessage, }; + // Held for the catch's log line; the in-try `const` is what consumers use. + let mintedFriendlyId: string | undefined; + try { + // Mint inside the try: classifying a user-supplied parentRunId throws on + // an unclassifiable id, so keep it within the catch's null-return contract. + const failedRunFriendlyId = await this.mintFailedRunFriendlyId({ + organizationId: request.environment.organizationId, + environmentId: request.environment.id, + orgFeatureFlags: request.environment.organization.featureFlags, + parentRunFriendlyId: request.parentRunId, + }); + mintedFriendlyId = failedRunFriendlyId; + const { repository, store } = await getEventRepository( request.environment.organization.id, request.environment.organization.featureFlags as Record, @@ -243,7 +295,7 @@ export class TriggerFailedTaskService { createError instanceof Error ? createError.message : String(createError); logger.error("TriggerFailedTaskService: failed to create pre-failed TaskRun", { taskId: request.taskId, - friendlyId: failedRunFriendlyId, + friendlyId: mintedFriendlyId, originalError: request.errorMessage, createError: createErrorMsg, }); @@ -270,9 +322,22 @@ export class TriggerFailedTaskService { batch?: { id: string; index: number }; errorCode?: TaskRunErrorCodes; }): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; + // Held for the catch's log line; the in-try `const` is what consumers use. + let mintedFriendlyId: string | undefined; try { + // Mint inside the try: classifying a user-supplied parentRunId throws on + // an unclassifiable id, so keep it within the catch's null-return contract. + const failedRunFriendlyId = await this.mintFailedRunFriendlyId({ + organizationId: opts.organizationId, + environmentId: opts.environmentId, + // No loaded org flags in this path; resolveRunIdMintKind falls back to a + // single replica lookup by organizationId only when there is no parent. + orgFeatureFlags: undefined, + parentRunFriendlyId: opts.parentRunId, + }); + mintedFriendlyId = failedRunFriendlyId; + // Best-effort parent run lookup for rootTaskRunId/depth let parentTaskRunId: string | undefined; let rootTaskRunId: string | undefined; @@ -347,7 +412,7 @@ export class TriggerFailedTaskService { } catch (createError) { logger.error("TriggerFailedTaskService: failed to create pre-failed TaskRun (no trace)", { taskId: opts.taskId, - friendlyId: failedRunFriendlyId, + friendlyId: mintedFriendlyId, originalError: opts.errorMessage, createError: createError instanceof Error ? createError.message : String(createError), }); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts new file mode 100644 index 00000000000..931d012acee --- /dev/null +++ b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts @@ -0,0 +1,832 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client. The service is constructed against a real +// testcontainer prisma instead — these empty singletons only satisfy the +// module-level imports of the production wiring (infrastructure boundary). +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); +// Inherited harness boilerplate. The parent read under test takes the +// findRun(where, client) overload with this.prisma, so it does not consult this +// flag; the mock only satisfies other wiring imported transitively. +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { assertNonNullable, containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { IOPacket } from "@trigger.dev/core/v3"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { TaskRun } from "@trigger.dev/database"; +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; +import { + EntitlementValidationParams, + MaxAttemptsValidationParams, + ParentRunValidationParams, + PayloadProcessor, + TagValidationParams, + TracedEventSpan, + TraceEventConcern, + TriggerTaskRequest, + TriggerTaskValidator, + ValidationResult, +} from "~/runEngine/types"; +import { RunEngineTriggerTaskService } from "./triggerTask.server"; + +vi.setConfig({ testTimeout: 60_000 }); // 60 seconds timeout + +class MockPayloadProcessor implements PayloadProcessor { + async process(request: TriggerTaskRequest): Promise { + return { + data: JSON.stringify(request.body.payload), + dataType: "application/json", + }; + } +} + +// Captures the `parentRun` the service resolved (via runStore.findRun) and +// passed into validation, so a test can assert on the resolved parent without +// mocking the read itself. Returns ok so the child triggers regardless. +class CapturingParentRunValidator implements TriggerTaskValidator { + public capturedParentRun: ParentRunValidationParams["parentRun"] | "unset" = "unset"; + + validateTags(_params: TagValidationParams): ValidationResult { + return { ok: true }; + } + validateEntitlement(_params: EntitlementValidationParams): Promise { + return Promise.resolve({ ok: true }); + } + validateMaxAttempts(_params: MaxAttemptsValidationParams): ValidationResult { + return { ok: true }; + } + validateParentRun(params: ParentRunValidationParams): ValidationResult { + this.capturedParentRun = params.parentRun; + return { ok: true }; + } +} + +class MockTraceEventConcern implements TraceEventConcern { + async traceRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceIdempotentRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + _options: { + existingRun: TaskRun; + idempotencyKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + _options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } +} + +function buildEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { + containerTest( + "resolves the parent run through the run-ops store by minted run id", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + // Keep the migrated-parent resolver from reaching the empty ~/db.server + // mock; mint-kind inheritance is not under test here. + isKnownMigrated: async () => false, + }); + + // Trigger a ROOT run first to create a real parent TaskRun. + const parentResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // Trigger a CHILD pointing at the parent's friendlyId. The service must + // resolve the parent via runStore.findRun (minted RunId, env-scoped). + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "child" }, + options: { parentRunId: parentResult.run.friendlyId }, + }, + }); + assertNonNullable(childResult); + + // The capturing validator observed the resolved parent — proving the + // read ran (against the container DB) and returned the right row. + expect(validator.capturedParentRun).not.toBe("unset"); + const capturedParent = validator.capturedParentRun; + assertNonNullable(capturedParent); + expect(capturedParent.id).toBe(parentResult.run.id); + expect(capturedParent.friendlyId).toBe(parentResult.run.friendlyId); + + // depth and root carry through — proving parentRun.depth and the parent + // id were read off the resolved row and threaded into the child. + const parentRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: parentResult.run.id }, + }); + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + + expect(childRow.depth).toBe(parentRow.depth + 1); + expect(childRow.parentTaskRunId).toBe(parentRow.id); + expect(childRow.rootTaskRunId).toBe(parentRow.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "scopes the parent lookup to the run's environment (cross-env parent is not resolved)", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + // Two independent authenticated environments. The setup helper hardcodes + // several globally-unique fields (org/project slug, env apiKey/pkApiKey, + // worker-group token hash), so rename envA's before the second call to + // avoid unique-constraint collisions. + const envA = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + await prisma.organization.update({ + where: { id: envA.organizationId }, + data: { slug: `${envA.organization.slug}-a` }, + }); + await prisma.project.update({ + where: { id: envA.projectId }, + data: { slug: `${envA.project.slug}-a`, externalRef: `${envA.project.externalRef}-a` }, + }); + await prisma.runtimeEnvironment.update({ + where: { id: envA.id }, + data: { apiKey: `${envA.apiKey}-a`, pkApiKey: `${envA.pkApiKey}-a` }, + }); + await prisma.workerGroupToken.updateMany({ + where: { tokenHash: "token_hash" }, + data: { tokenHash: "token_hash_a" }, + }); + await prisma.workerInstanceGroup.updateMany({ + where: { masterQueue: "default" }, + data: { masterQueue: "default_a" }, + }); + const envB = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + expect(envA.id).not.toBe(envB.id); + expect(envA.organizationId).not.toBe(envB.organizationId); + + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, envA, taskIdentifier); + await setupBackgroundWorker(engine, envB, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + // A real parent run in envA. + const parentResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envA, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // Trigger a child in envB pointing at the envA parent's friendlyId. The + // env guard in runStore.findRun's `where` rejects the cross-env parent + // in a single query, so the resolved parentRun is null. + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envB, + body: { + payload: { kind: "child" }, + options: { parentRunId: parentResult.run.friendlyId }, + }, + }); + assertNonNullable(childResult); + + // validateParentRun was called with no resolved parent. + expect(validator.capturedParentRun).not.toBe("unset"); + expect(validator.capturedParentRun ?? null).toBeNull(); + + // The child still triggered, at the root depth with no parent linkage — + // confirming the cross-env parent was dropped, not silently joined. + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + expect(childRow.depth).toBe(0); + expect(childRow.parentTaskRunId).toBeNull(); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "resolves the locked background worker on the control-plane client with no cross-DB join", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + const { worker } = await setupBackgroundWorker(engine, environment, taskIdentifier); + + // Read the seeded worker row to get its real version/id. + const workerRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: worker.id }, + }); + + // Counting proxy over the control-plane client. `this.prisma` is ALWAYS + // the control-plane client; the locked-worker lookup is a DIRECT + // backgroundWorker.findFirst on it. The parent read uses a DIFFERENT + // call (runStore.findRun → taskRun), so a single call() issues two + // separate single-table reads — never one cross-seam join. Here we count + // the findFirst calls and capture their args to assert no include/join. + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + // The queue manager gets the real (unproxied) prisma so the counting + // proxy only observes reads issued by the service itself. + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: workerRow.version }, + }, + }); + assertNonNullable(result); + + // Observable proof the locked worker was resolved on the control-plane + // client: the created run records the worker id in lockedToVersionId. + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + expect(runRow.lockedToVersionId).toBe(workerRow.id); + expect(runRow.taskVersion).toBe(workerRow.version); + + // Exactly one backgroundWorker.findFirst fired for the locked-worker read. + expect(backgroundWorkerFindFirstCalls).toBe(1); + + // NO-JOIN assertion: the read referenced ONLY the backgroundWorker table. + // No `include` (which would join into another table); the `select` lists + // only backgroundWorker scalar columns. + const args = findFirstArgs[0]; + assertNonNullable(args); + expect(args.include).toBeUndefined(); + expect(Object.keys(args.select ?? {}).sort()).toEqual([ + "cliVersion", + "id", + "sdkVersion", + "version", + ]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "issues two independent single-table reads when one call supplies both parentRunId and lockToVersion", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + const { worker } = await setupBackgroundWorker(engine, environment, taskIdentifier); + + const workerRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: worker.id }, + }); + + // Count BOTH reads issued by the service on the control-plane client: + // the parent read (runStore.findRun → taskRun.findFirst) and the + // locked-worker read (backgroundWorker.findFirst). Capture every + // findFirst arg so we can assert no read carries a cross-seam include. + let taskRunFindFirstCalls = 0; + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + if (prop === "taskRun") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(trTarget, trProp, trReceiver) { + if (trProp === "findFirst") { + return async (args: any) => { + taskRunFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(trTarget, trProp, trReceiver); + return typeof value === "function" ? value.bind(trTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + // queueConcern/idempotency get the real unproxied prisma so the + // counting proxy only observes reads issued by the service itself. + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + // ROOT parent first (uses the unproxied prisma via a separate service so + // its internal reads don't pollute the child's counts). + const parentService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + const parentResult = await parentService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // CHILD supplying BOTH parentRunId AND lockToVersion in one call. + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "child" }, + options: { + parentRunId: parentResult.run.friendlyId, + lockToVersion: workerRow.version, + }, + }, + }); + assertNonNullable(childResult); + + const parentRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: parentResult.run.id }, + }); + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + + // Child resolved the parent (single-table parent read). + expect(childRow.parentTaskRunId).toBe(parentRow.id); + expect(childRow.depth).toBe(parentRow.depth + 1); + + // Child locked to the worker (single-table worker read). + expect(childRow.lockedToVersionId).toBe(workerRow.id); + expect(childRow.taskVersion).toBe(workerRow.version); + + // Exactly one backgroundWorker.findFirst fired for the locked-worker read, + // and at least one taskRun.findFirst fired for the parent read. + expect(backgroundWorkerFindFirstCalls).toBe(1); + expect(taskRunFindFirstCalls).toBeGreaterThanOrEqual(1); + + // NO-JOIN proof: no captured read carried an `include` joining + // taskRun <-> backgroundWorker. Every findFirst arg has include undefined. + for (const args of findFirstArgs) { + expect(args?.include).toBeUndefined(); + } + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "lockToVersion matching no worker rejects the trigger after a single scalar-only worker read", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + const bogusVersion = "v-does-not-exist-0000"; + // The no-match worker read returns null; the queue concern then rejects + // the trigger rather than silently locking the run to a phantom version. + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: bogusVersion }, + }, + }) + ).rejects.toThrow(/no worker found with that version/); + + // No run was locked to the bogus version (none was created). + const lockedRuns = await prisma.taskRun.findMany({ + where: { runtimeEnvironmentId: environment.id, taskVersion: bogusVersion }, + }); + expect(lockedRuns).toEqual([]); + + // The lone worker read fired exactly once with the scalar-only select and + // no cross-seam include. + expect(backgroundWorkerFindFirstCalls).toBe(1); + const args = findFirstArgs[0]; + assertNonNullable(args); + expect(args.include).toBeUndefined(); + expect(Object.keys(args.select ?? {}).sort()).toEqual([ + "cliVersion", + "id", + "sdkVersion", + "version", + ]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "does not resolve a locked worker from a different environment", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + // Two independent authenticated environments. Rename envA's globally-unique + // fields before the second setup call to avoid unique-constraint collisions. + const envA = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + await prisma.organization.update({ + where: { id: envA.organizationId }, + data: { slug: `${envA.organization.slug}-a` }, + }); + await prisma.project.update({ + where: { id: envA.projectId }, + data: { slug: `${envA.project.slug}-a`, externalRef: `${envA.project.externalRef}-a` }, + }); + await prisma.runtimeEnvironment.update({ + where: { id: envA.id }, + data: { apiKey: `${envA.apiKey}-a`, pkApiKey: `${envA.pkApiKey}-a` }, + }); + await prisma.workerGroupToken.updateMany({ + where: { tokenHash: "token_hash" }, + data: { tokenHash: "token_hash_a" }, + }); + await prisma.workerInstanceGroup.updateMany({ + where: { masterQueue: "default" }, + data: { masterQueue: "default_a" }, + }); + const envB = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + expect(envA.id).not.toBe(envB.id); + expect(envA.organizationId).not.toBe(envB.organizationId); + + const taskIdentifier = "test-task"; + const { worker: workerA } = await setupBackgroundWorker(engine, envA, taskIdentifier); + const { worker: workerB } = await setupBackgroundWorker(engine, envB, taskIdentifier); + + const workerARow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: workerA.id }, + }); + const workerBRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: workerB.id }, + }); + // Both seeded workers share the same version string. + expect(workerARow.version).toBe(workerBRow.version); + expect(workerARow.id).not.toBe(workerBRow.id); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + // Trigger in envB locking to the shared version string. + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envB, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: workerBRow.version }, + }, + }); + assertNonNullable(result); + + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + // The projectId + runtimeEnvironmentId guard in the single-table worker + // read resolves envB's worker, never envA's same-version worker. + expect(runRow.lockedToVersionId).toBe(workerBRow.id); + expect(runRow.lockedToVersionId).not.toBe(workerARow.id); + expect(runRow.taskVersion).toBe(workerBRow.version); + } finally { + await engine.quit(); + } + } + ); + + containerTest("a root trigger issues no parent lookup", async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + isKnownMigrated: async () => false, + }); + + // Trigger with NO parentRunId. + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "root" } }, + }); + assertNonNullable(result); + + // The validator ran but received no resolved parent: the parent read was + // skipped because no parentRunId was supplied. + expect(validator.capturedParentRun).not.toBe("unset"); + expect(validator.capturedParentRun).toBeUndefined(); + + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + expect(runRow.depth).toBe(0); + expect(runRow.parentTaskRunId).toBeNull(); + } finally { + await engine.quit(); + } + }); +}); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index e8eb9945f38..4d7d58f4ecf 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -14,6 +14,7 @@ import { TriggerTraceContext, } from "@trigger.dev/core/v3"; import { + generateKsuidId, parseTraceparent, RunId, serializeTraceparent, @@ -25,6 +26,10 @@ import { logger } from "~/services/logger.server"; import { parseDelay } from "~/utils/delays"; import { handleMetadataPacket } from "~/utils/packets"; import { startSpan } from "~/v3/tracing.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; +import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import type { TriggerTaskServiceOptions, TriggerTaskServiceResult, @@ -93,6 +98,14 @@ export class RunEngineTriggerTaskService { private readonly evaluateGate: MollifierEvaluateGate; private readonly getMollifierBuffer: MollifierGetBuffer; private readonly isMollifierGloballyEnabled: () => boolean; + // Resolves whether a run that classifies as legacy-by-id-shape has already + // been moved to the new store. Injected so tests can drive the migrated-parent + // case without the split-store infrastructure; defaults to the live resolver. + private readonly isKnownMigrated: (runId: string) => Promise; + // Gates whether the marker-aware inheritance branch runs. With split OFF the + // child residency is a pure id-shape check — zero I/O on the hot path, + // byte-identical to today. Injected so tests can drive split on/off. + private readonly isSplitEnabled: () => Promise; constructor(opts: { prisma: PrismaClientOrTransaction; @@ -108,6 +121,8 @@ export class RunEngineTriggerTaskService { evaluateGate?: MollifierEvaluateGate; getMollifierBuffer?: MollifierGetBuffer; isMollifierGloballyEnabled?: () => boolean; + isKnownMigrated?: (runId: string) => Promise; + isSplitEnabled?: () => Promise; }) { this.prisma = opts.prisma; this.engine = opts.engine; @@ -123,6 +138,37 @@ export class RunEngineTriggerTaskService { this.getMollifierBuffer = opts.getMollifierBuffer ?? defaultGetMollifierBuffer; this.isMollifierGloballyEnabled = opts.isMollifierGloballyEnabled ?? (() => env.TRIGGER_MOLLIFIER_ENABLED === "1"); + this.isKnownMigrated = opts.isKnownMigrated ?? defaultIsKnownMigrated; + this.isSplitEnabled = opts.isSplitEnabled ?? defaultIsSplitEnabled; + } + + // Mint a new run's friendlyId. The id-kind decides which store the run is born + // in (cuid → legacy store, ksuid → new store), so the whole subgraph of a run + // must agree. Two cases: + // + // - ROOT run (no parent): mint by the environment's cutover setting. + // - CHILD run (has a parent): inherit the parent's CURRENT residency, so a + // parent and child never split across stores. A parent that is legacy by + // id-shape but has already been moved to the new store (reported by the + // migrated check) yields a new-store (ksuid) child. + private async mintRunFriendlyId( + environment: AuthenticatedEnvironment, + parentRunFriendlyId?: string + ): Promise { + const mintKind = parentRunFriendlyId + ? await resolveInheritedMintKind(parentRunFriendlyId, { + isSplitEnabled: this.isSplitEnabled, + isKnownMigrated: this.isKnownMigrated, + }) + : await resolveRunIdMintKind({ + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; } public async call({ @@ -150,7 +196,12 @@ export class RunEngineTriggerTaskService { span.setAttribute("taskId", taskId); span.setAttribute("attempt", attempt); - const runFriendlyId = options?.runFriendlyId ?? RunId.generate().friendlyId; + // Mint the run id. A caller-supplied id (idempotent retry) wins; + // otherwise mint by residency — inheriting the parent's store when a + // parent is present, else the environment's setting. + const runFriendlyId = + options?.runFriendlyId ?? + (await this.mintRunFriendlyId(environment, body.options?.parentRunId)); const triggerRequest = { taskId, friendlyId: runFriendlyId, @@ -159,7 +210,6 @@ export class RunEngineTriggerTaskService { options, } satisfies TriggerTaskRequest; - // Validate max attempts const maxAttemptsValidation = this.validator.validateMaxAttempts({ taskId, attempt, @@ -169,7 +219,6 @@ export class RunEngineTriggerTaskService { throw maxAttemptsValidation.error; } - // Validate tags const tagValidation = this.validator.validateTags({ tags: body.options?.tags, }); @@ -178,7 +227,6 @@ export class RunEngineTriggerTaskService { throw tagValidation.error; } - // Validate entitlement (unless skipChecks is enabled) let planType: string | undefined; if (!options.skipChecks) { @@ -190,7 +238,6 @@ export class RunEngineTriggerTaskService { throw entitlementValidation.error; } - // Extract plan type from entitlement response planType = entitlementValidation.plan?.type; } else { // When skipChecks is enabled, planType should be passed via options @@ -239,7 +286,6 @@ export class RunEngineTriggerTaskService { } } - // Get parent run if specified const parentRun = body.options?.parentRunId ? await runStore.findRun( { @@ -250,7 +296,6 @@ export class RunEngineTriggerTaskService { ) : undefined; - // Validate parent run const parentRunValidation = this.validator.validateParentRun({ taskId, parentRun: parentRun ?? undefined, @@ -390,7 +435,6 @@ export class RunEngineTriggerTaskService { envType: environment.type, }); - // Build annotations for this run const triggerSource = options.triggerSource ?? "api"; const triggerAction = options.triggerAction ?? "trigger"; const parentAnnotations = RunAnnotations.safeParse(parentRun?.annotations).data; diff --git a/apps/webapp/app/services/archiveBranch.server.ts b/apps/webapp/app/services/archiveBranch.server.ts index 3372ac87229..c7c8af5860b 100644 --- a/apps/webapp/app/services/archiveBranch.server.ts +++ b/apps/webapp/app/services/archiveBranch.server.ts @@ -81,6 +81,9 @@ export class ArchiveBranchService { }; } + // Branch archive is a SOFT update — do NOT hard-delete run-ops rows here (it would destroy a + // retained branch's history). RunOpsCascadeCleanupService.cleanupEnvironment belongs on the + // env hard-delete/purge path (owned by the cloud env-purge runbook), which has no site today. const slug = `${environment.slug}-${nanoid(6)}`; const shortcode = slug; diff --git a/apps/webapp/app/services/dashboardAgent.server.ts b/apps/webapp/app/services/dashboardAgent.server.ts index a66882b72cc..14eb51fcbef 100644 --- a/apps/webapp/app/services/dashboardAgent.server.ts +++ b/apps/webapp/app/services/dashboardAgent.server.ts @@ -3,6 +3,7 @@ import { TriggerClient } from "@trigger.dev/sdk"; import { chat } from "@trigger.dev/sdk/ai"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; +import { runStore } from "~/v3/runStore.server"; import { githubApp } from "./gitHub.server"; import { logger } from "./logger.server"; @@ -211,10 +212,10 @@ export async function resolveRunCommit( environmentId: string, runFriendlyId: string ): Promise<{ sha: string; version: string; dirty: boolean } | null> { - const run = await prisma.taskRun.findFirst({ - where: { friendlyId: runFriendlyId, runtimeEnvironmentId: environmentId }, - select: { lockedToVersionId: true }, - }); + const run = await runStore.findRun( + { friendlyId: runFriendlyId, runtimeEnvironmentId: environmentId }, + { select: { lockedToVersionId: true } } + ); if (!run?.lockedToVersionId) return null; const deployment = await prisma.workerDeployment.findFirst({ diff --git a/apps/webapp/app/services/deleteProject.server.ts b/apps/webapp/app/services/deleteProject.server.ts index bbce896a57f..8db0a6ede04 100644 --- a/apps/webapp/app/services/deleteProject.server.ts +++ b/apps/webapp/app/services/deleteProject.server.ts @@ -3,6 +3,7 @@ import { prisma } from "~/db.server"; import { marqs } from "~/v3/marqs/index.server"; import { engine } from "~/v3/runEngine.server"; import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { RunOpsCascadeCleanupService } from "~/v3/runOpsMigration/runOpsCascadeCleanup.server"; type Options = ({ projectId: string } | { projectSlug: string }) & { userId: string; @@ -50,6 +51,10 @@ export class DeleteProjectService { }); } + // Hard-delete the project's run-ops rows across both run-ops DBs (replaces the cloud-only + // dropped cross-seam FK cascades). Idempotent; uses the run-ops writers, not #prismaClient. + await new RunOpsCascadeCleanupService().cleanupProject(project.id); + // Mark the project as deleted (do this last because it makes it impossible to try again) // - This disables all API keys // - This disables all schedules from being scheduled diff --git a/apps/webapp/app/services/realtime/runReader.server.ts b/apps/webapp/app/services/realtime/runReader.server.ts index 952280e7749..c6a34d7de7b 100644 --- a/apps/webapp/app/services/realtime/runReader.server.ts +++ b/apps/webapp/app/services/realtime/runReader.server.ts @@ -95,7 +95,7 @@ export type RunHydratorOptions = { const DEFAULT_CACHE_TTL_MS = 250; const DEFAULT_MAX_CACHE_ENTRIES = 5_000; -/** Hydrates runs by id from the read replica, projected to the realtime columns; concurrent same-run refetches are single-flighted + short-TTL cached. */ +/** Hydrates runs by id through the runStore seam (split routing lives in the store, below this file), projected to the realtime columns; concurrent same-run refetches are single-flighted + short-TTL cached. */ export class RunHydrator { readonly #inflight = new Map>(); readonly #cache: BoundedTtlCache; diff --git a/apps/webapp/app/services/realtime/sessions.server.ts b/apps/webapp/app/services/realtime/sessions.server.ts index a7129830e71..71170c322f2 100644 --- a/apps/webapp/app/services/realtime/sessions.server.ts +++ b/apps/webapp/app/services/realtime/sessions.server.ts @@ -1,7 +1,8 @@ import type { PrismaClient, Session } from "@trigger.dev/database"; import type { SessionItem } from "@trigger.dev/core/v3"; +import type { RunStore } from "@internal/run-store"; import { $replica, prisma } from "~/db.server"; -import { runStore } from "~/v3/runStore.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; /** * Prefix that {@link SessionId.generate} attaches to every Session friendlyId. @@ -18,6 +19,9 @@ const SESSION_FRIENDLY_ID_PREFIX = "session_"; * friendlyIds, anything else is looked up against `externalId` scoped to * the caller's environment. */ +// CONTROL-PLANE: `Session` lives on the control-plane DB; these reads are NOT +// routed to run-ops read-through — only the `TaskRun` currentRunId resolves in +// this file are run-ops read-through routed. export async function resolveSessionByIdOrExternalId( prisma: Pick, runtimeEnvironmentId: string, @@ -119,18 +123,27 @@ export function serializeSession(session: Session): SessionItem { * this so the wire-side `currentRunId` is consistent with the rest of * the public API (which only accepts friendlyIds for run lookups). * - * Skips the lookup when `currentRunId` is null. The read goes through - * `$replica` — a TaskRun's `friendlyId` is immutable so replica lag is - * harmless, and serializing on the writer would just add hot-path load. + * Skips the lookup when `currentRunId` is null. + * + * Resolves `currentRunId` -> `friendlyId` through `runStore.findRun` so a + * ksuid (NEW-DB) session run resolves from its owning store rather than the + * control-plane replica. Mirrors `sessionRunManager.server.ts`. + * Tenant-scoped because `Session.currentRunId` is a no-FK pointer. */ -export async function serializeSessionWithFriendlyRunId(session: Session): Promise { +export async function serializeSessionWithFriendlyRunId( + session: Session, + runStore: RunStore = defaultRunStore +): Promise { const base = serializeSession(session); if (!session.currentRunId) return base; const run = await runStore.findRun( - { id: session.currentRunId }, - { select: { friendlyId: true } }, - $replica + { + id: session.currentRunId, + projectId: session.projectId, + runtimeEnvironmentId: session.runtimeEnvironmentId, + }, + { select: { friendlyId: true } } ); return { @@ -148,27 +161,28 @@ export async function serializeSessionWithFriendlyRunId(session: Session): Promi */ export async function serializeSessionsWithFriendlyRunIds( sessions: Session[], - scope: { projectId: string; runtimeEnvironmentId: string } + scope: { projectId: string; runtimeEnvironmentId: string }, + runStore: RunStore = defaultRunStore ): Promise { const runIds = [ ...new Set(sessions.map((s) => s.currentRunId).filter((id): id is string => !!id)), ]; - // `currentRunId` is a plain string pointer (no FK), so scope the lookup to - // the caller's tenant — a stale value must not resolve a run in another env. - const runs = runIds.length - ? await runStore.findRuns( - { + // `runStore.findRuns` fans out across both stores under split (NEW + LEGACY + // replica merge) and is a plain `$replica` find when split is off. Tenant- + // scoped: `Session.currentRunId` is a no-FK pointer, so a stale id must never + // resolve a run in another env. + const runs = + runIds.length > 0 + ? await runStore.findRuns({ where: { id: { in: runIds }, projectId: scope.projectId, runtimeEnvironmentId: scope.runtimeEnvironmentId, }, select: { id: true, friendlyId: true }, - }, - $replica - ) - : []; + }) + : []; const friendlyIdByRunId = new Map(runs.map((run) => [run.id, run.friendlyId])); return sessions.map((session) => ({ diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index 9bf67314779..2cecb790361 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -13,9 +13,22 @@ import { import parseDuration from "parse-duration"; import { decodeRunsCursor, encodeRunsCursor } from "./runsCursor.server"; import { runStore } from "~/v3/runStore.server"; +import { type PrismaClientOrTransaction } from "~/db.server"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; type RunCursorRow = { runId: string; createdAt: number }; +/** + * Hydrates a set of rows for a ClickHouse-derived run-id set against the given + * read client. The closure MUST select `id` so `#hydrateRunsByIds` can key + * set-membership and re-impose ordering; the call site projects `id` away if its + * result type excludes it. + */ +type HydrateFn = ( + client: PrismaClientOrTransaction, + ids: string[] +) => Promise; + export class ClickHouseRunsRepository implements IRunsRepository { constructor(private readonly options: RunsRepositoryOptions) {} @@ -38,7 +51,11 @@ export class ClickHouseRunsRepository implements IRunsRepository { const queryBuilder = this.options.clickhouse.taskRuns.queryBuilder(); applyRunFiltersToQueryBuilder( queryBuilder, - await convertRunListInputOptionsToFilterRunsOptions(options, this.options.prisma) + await convertRunListInputOptionsToFilterRunsOptions( + options, + this.options.prisma, + this.options.runStore ?? runStore + ) ); const forward = options.page.direction === "forward" || !options.page.direction; @@ -140,6 +157,56 @@ export class ClickHouseRunsRepository implements IRunsRepository { return { runIds, pagination: { nextCursor, previousCursor } }; } + /** + * Hydrates a ClickHouse-derived run-id set from the run-ops store. + * Split ON: new run-ops client first, then the LEGACY RUN-OPS READ REPLICA ONLY + * for ids not known-migrated — never the legacy primary. The mixed-residency + * fan-out lives here because `RoutingRunStore.findRuns` punts it. + * Split OFF (single-DB / self-host): one plain `store.findRuns(args, prisma)` + * (passthrough) — no legacy read, no known-migrated probe, no second connection. + */ + async #hydrateRunsByIds( + runIds: string[], + hydrate: HydrateFn + ): Promise { + if (runIds.length === 0) { + return []; + } + + const splitEnabled = this.options.readThrough?.splitEnabled ?? false; + + let rows: T[]; + if (!splitEnabled) { + rows = await hydrate(this.options.prisma, runIds); + } else { + const newClient = this.options.readThrough?.newClient ?? this.options.prisma; + const legacyReplica = this.options.readThrough?.legacyReplica ?? this.options.prisma; + const isKnownMigrated = this.options.readThrough?.isKnownMigrated ?? defaultIsKnownMigrated; + + const newRows = await hydrate(newClient, runIds); + const foundIds = new Set(newRows.map((r) => r.id)); + const missing = runIds.filter((id) => !foundIds.has(id)); + + const toProbeLegacy: string[] = []; + for (const id of missing) { + if (!(await isKnownMigrated(id))) { + toProbeLegacy.push(id); + } + } + + const legacyRows = toProbeLegacy.length ? await hydrate(legacyReplica, toProbeLegacy) : []; + rows = [...newRows, ...legacyRows]; + } + + // Preserve the ClickHouse keyset order (created_at desc, run_id desc) by re-ordering the + // hydrated rows to match the input `runIds`. Sorting by raw `id` was only ~chronological + // when every id was a time-prefixed cuid; a mixed cuid/ksuid page sorts the two id-spaces + // into separate blocks, burying recent runs. Rows whose PG row is gone (e.g. past + // retention) drop out, exactly as before. + const byId = new Map(rows.map((r) => [r.id, r] as const)); + return runIds.map((id) => byId.get(id)).filter((r): r is T => r !== undefined); + } + async listFriendlyRunIds(options: ListRunsOptions) { // First get internal IDs from ClickHouse const { runIds } = await this.listRunIds(options); @@ -148,19 +215,18 @@ export class ClickHouseRunsRepository implements IRunsRepository { return []; } - // Then get friendly IDs from Prisma - const runs = await runStore.findRuns( - { - where: { - id: { - in: runIds, - }, - }, - select: { - friendlyId: true, + const store = this.options.runStore ?? runStore; + + // Then get friendly IDs from the run-ops store (id added for set-membership; + // projected away below so the returned shape stays `string[]`). + const runs = await this.#hydrateRunsByIds(runIds, (client, ids) => + store.findRuns( + { + where: { id: { in: ids } }, + select: { id: true, friendlyId: true }, }, - }, - this.options.prisma + client + ) ); return runs.map((run) => run.friendlyId); @@ -169,51 +235,55 @@ export class ClickHouseRunsRepository implements IRunsRepository { async listRuns(options: ListRunsOptions) { const { runIds, pagination } = await this.listRunIds(options); - let runs = await runStore.findRuns( - { - where: { - id: { - in: runIds, + const store = this.options.runStore ?? runStore; + + let runs = await this.#hydrateRunsByIds(runIds, (client, ids) => + store.findRuns( + { + where: { + id: { + in: ids, + }, + }, + orderBy: { + id: "desc", + }, + select: { + id: true, + friendlyId: true, + taskIdentifier: true, + taskVersion: true, + runtimeEnvironmentId: true, + status: true, + createdAt: true, + startedAt: true, + lockedAt: true, + delayUntil: true, + updatedAt: true, + completedAt: true, + isTest: true, + spanId: true, + idempotencyKey: true, + ttl: true, + expiredAt: true, + costInCents: true, + baseCostInCents: true, + usageDurationMs: true, + runTags: true, + depth: true, + rootTaskRunId: true, + batchId: true, + metadata: true, + metadataType: true, + machinePreset: true, + queue: true, + workerQueue: true, + region: true, + annotations: true, }, }, - orderBy: { - id: "desc", - }, - select: { - id: true, - friendlyId: true, - taskIdentifier: true, - taskVersion: true, - runtimeEnvironmentId: true, - status: true, - createdAt: true, - startedAt: true, - lockedAt: true, - delayUntil: true, - updatedAt: true, - completedAt: true, - isTest: true, - spanId: true, - idempotencyKey: true, - ttl: true, - expiredAt: true, - costInCents: true, - baseCostInCents: true, - usageDurationMs: true, - runTags: true, - depth: true, - rootTaskRunId: true, - batchId: true, - metadata: true, - metadataType: true, - machinePreset: true, - queue: true, - workerQueue: true, - region: true, - annotations: true, - }, - }, - this.options.prisma + client + ) ); // ClickHouse is slightly delayed, so we're going to do in-memory status filtering too @@ -231,7 +301,11 @@ export class ClickHouseRunsRepository implements IRunsRepository { const queryBuilder = this.options.clickhouse.taskRuns.countQueryBuilder(); applyRunFiltersToQueryBuilder( queryBuilder, - await convertRunListInputOptionsToFilterRunsOptions(options, this.options.prisma) + await convertRunListInputOptionsToFilterRunsOptions( + options, + this.options.prisma, + this.options.runStore ?? runStore + ) ); const [queryError, result] = await queryBuilder.execute(); diff --git a/apps/webapp/app/services/runsRepository/runsRepository.server.ts b/apps/webapp/app/services/runsRepository/runsRepository.server.ts index b256738c465..b477ae492de 100644 --- a/apps/webapp/app/services/runsRepository/runsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/runsRepository.server.ts @@ -1,4 +1,5 @@ import { type ClickHouse } from "@internal/clickhouse"; +import { type RunStore } from "@internal/run-store"; import { type Tracer } from "@internal/tracing"; import { type Logger, type LogLevel } from "@trigger.dev/core/logger"; import { MachinePresetName } from "@trigger.dev/core/v3"; @@ -7,7 +8,8 @@ import { type Prisma, TaskRunStatus } from "@trigger.dev/database"; import parseDuration from "parse-duration"; import { z } from "zod"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; -import { type PrismaClientOrTransaction } from "~/db.server"; +import { type PrismaClient, type PrismaClientOrTransaction } from "~/db.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; import { startActiveSpan } from "~/v3/tracer.server"; import { ClickHouseRunsRepository } from "./clickhouseRunsRepository.server"; @@ -17,6 +19,21 @@ export type RunsRepositoryOptions = { logger?: Logger; logLevel?: LogLevel; tracer?: Tracer; + + // Injectable run-ops store; defaults to the `~/v3/runStore.server` singleton + // (passthrough). The list-hydrate fan-out below does not depend on the store + // routing mixed-residency id sets — it applies the read-through fan-out itself. + runStore?: RunStore; + + // Run-ops read-through wiring for the list hydrate. Omitted => passthrough. + readThrough?: { + // `legacyReplica` is a READ REPLICA handle only — there is no legacy-primary field. + newClient?: PrismaClientOrTransaction; + legacyReplica?: PrismaClientOrTransaction; + // Resolved boot constant; when false the split branch is never entered. + splitEnabled?: boolean; + isKnownMigrated?: (runId: string) => Promise; + }; }; const RunStatus = z.enum(Object.values(TaskRunStatus) as [TaskRunStatus, ...TaskRunStatus[]]); @@ -195,6 +212,7 @@ export class RunsRepository implements IRunsRepository { { attributes: { "repository.name": "clickhouse", + "readThrough.split": Boolean(this.options.readThrough?.splitEnabled), organizationId: options.organizationId, projectId: options.projectId, environmentId: options.environmentId, @@ -216,6 +234,7 @@ export class RunsRepository implements IRunsRepository { { attributes: { "repository.name": "clickhouse", + "readThrough.split": Boolean(this.options.readThrough?.splitEnabled), organizationId: options.organizationId, projectId: options.projectId, environmentId: options.environmentId, @@ -261,7 +280,8 @@ export function parseRunListInputOptions(data: any): RunListInputOptions { export async function convertRunListInputOptionsToFilterRunsOptions( options: RunListInputOptions, - prisma: RunsRepositoryOptions["prisma"] + prisma: RunsRepositoryOptions["prisma"], + store: RunStore = defaultRunStore ): Promise { const convertedOptions: FilterRunsOptions = { ...options, @@ -276,24 +296,20 @@ export async function convertRunListInputOptionsToFilterRunsOptions( }); convertedOptions.period = time.period ? (parseDuration(time.period) ?? undefined) : undefined; - // Batch friendlyId to id + // Cross-DB resolution: BatchTaskRun is a RUN-OPS table. A ksuid batch resident on the + // dedicated run-ops DB must resolve via the store's NEW->LEGACY probe — a single control-plane + // client would miss it and leave the friendlyId in the ClickHouse `batch_id` filter, matching + // nothing. Split off / self-host: the store is a passthrough over the one client. if (options.batchId && options.batchId.startsWith("batch_")) { - const batch = await prisma.batchTaskRun.findFirst({ - select: { - id: true, - }, - where: { - friendlyId: options.batchId, - runtimeEnvironmentId: options.environmentId, - }, - }); + const batch = await store.findBatchTaskRunByFriendlyId(options.batchId, options.environmentId); if (batch) { convertedOptions.batchId = batch.id; } } - // ScheduleId can be a friendlyId + // ScheduleId can be a friendlyId. TaskSchedule is a CONTROL-PLANE table, so this stays on + // the passed `prisma` (the control-plane client) in both single-DB and split modes. if (options.scheduleId && options.scheduleId.startsWith("sched_")) { const schedule = await prisma.taskSchedule.findFirst({ select: { diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 5982fd4460c..94e9ab8fc31 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -15,7 +15,11 @@ import { type RunFailedWebhook, TaskRunError, } from "@trigger.dev/core/v3"; -import { type ProjectAlertChannelType, type ProjectAlertType } from "@trigger.dev/database"; +import { + type ProjectAlertChannelType, + type ProjectAlertType, + type RuntimeEnvironmentType, +} from "@trigger.dev/database"; import assertNever from "assert-never"; import { subtle } from "crypto"; import { environmentTitle } from "~/components/environments/EnvironmentLabel"; @@ -46,6 +50,45 @@ import { generateFriendlyId } from "~/v3/friendlyIdentifiers"; import { fromPromise } from "neverthrow"; import { BaseService } from "../baseService.server"; import { CURRENT_API_VERSION } from "~/api/versions"; +import { + ControlPlaneResolver, + controlPlaneResolver as defaultControlPlaneResolver, +} from "~/v3/runOpsMigration/controlPlaneResolver.server"; + +// Run-ops scalars read off `alert.taskRun` downstream. The control-plane fields (env type/branch, +// lockedBy file/export, lockedToVersion version) are resolved via the resolver and stitched on +// below, so the run-ops findRun selects scalars only. +const taskRunAlertSelect = { + id: true, + friendlyId: true, + taskIdentifier: true, + taskVersion: true, + sdkVersion: true, + cliVersion: true, + status: true, + number: true, + isTest: true, + createdAt: true, + startedAt: true, + completedAt: true, + idempotencyKey: true, + runTags: true, + machinePreset: true, + error: true, + runtimeEnvironmentId: true, + lockedById: true, + lockedToVersionId: true, +} satisfies Prisma.TaskRunSelect; + +type ResolvedAlertTaskRun = Prisma.Result< + typeof prisma.taskRun, + { select: typeof taskRunAlertSelect }, + "findUniqueOrThrow" +> & { + runtimeEnvironment: { type: RuntimeEnvironmentType; branchName: string | null }; + lockedBy: { filePath: string; exportName: string | null } | null; + lockedToVersion: { version: string } | null; +}; type FoundAlert = Prisma.Result< typeof prisma.projectAlert, @@ -58,18 +101,6 @@ type FoundAlert = Prisma.Result< }; }; environment: true; - taskRun: { - include: { - lockedBy: true; - lockedToVersion: true; - runtimeEnvironment: { - select: { - type: true; - branchName: true; - }; - }; - }; - }; workerDeployment: { include: { worker: { @@ -88,7 +119,9 @@ type FoundAlert = Prisma.Result< }; }, "findUniqueOrThrow" ->; +> & { + taskRun: ResolvedAlertTaskRun | null; +}; class SkipRetryError extends Error {} @@ -98,6 +131,20 @@ type DeploymentIntegrationMetadata = { }; export class DeliverAlertService extends BaseService { + #controlPlaneResolver: ControlPlaneResolver; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: import("@internal/run-store").RunStore; + controlPlaneResolver?: ControlPlaneResolver; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#controlPlaneResolver = opts.controlPlaneResolver ?? defaultControlPlaneResolver; + } + public async call(alertId: string) { const alertWithoutRun = await this._prisma.projectAlert.findFirst({ where: { id: alertId }, @@ -133,22 +180,37 @@ export class DeliverAlertService extends BaseService { let taskRun: FoundAlert["taskRun"] = null; if (alertWithoutRun.taskRunId) { - taskRun = await this.runStore.findRun( + const resolvedTaskRun = await this.runStore.findRun( { id: alertWithoutRun.taskRunId }, - { - include: { - lockedBy: true, - lockedToVersion: true, - runtimeEnvironment: { - select: { - type: true, - branchName: true, - }, - }, - }, - }, + { select: taskRunAlertSelect }, this._prisma ); + + if (resolvedTaskRun) { + const env = await this.#controlPlaneResolver.resolveAuthenticatedEnv( + resolvedTaskRun.runtimeEnvironmentId + ); + const lockedWorker = await this.#controlPlaneResolver.resolveRunLockedWorker({ + lockedById: resolvedTaskRun.lockedById, + lockedToVersionId: resolvedTaskRun.lockedToVersionId, + }); + + if (env) { + taskRun = { + ...resolvedTaskRun, + runtimeEnvironment: { type: env.type, branchName: env.branchName }, + lockedBy: lockedWorker?.lockedBy + ? { + filePath: lockedWorker.lockedBy.filePath, + exportName: lockedWorker.lockedBy.exportName, + } + : null, + lockedToVersion: lockedWorker?.lockedToVersion + ? { version: lockedWorker.lockedToVersion.version } + : null, + }; + } + } } const alert: FoundAlert = { ...alertWithoutRun, taskRun }; @@ -686,7 +748,6 @@ export class DeliverAlertService extends BaseService { return; } - // Get the org integration const integration = slackProperties.data.integrationId ? await this._prisma.organizationIntegration.findFirst({ where: { @@ -793,7 +854,6 @@ export class DeliverAlertService extends BaseService { ], }); - // Upsert the storage if (message.ts) { if (storage) { await this._prisma.projectAlertStorage.update({ @@ -969,7 +1029,6 @@ export class DeliverAlertService extends BaseService { const signature = await subtle.sign("HMAC", key, hashPayload); const signatureHex = Buffer.from(signature).toString("hex"); - // Send the webhook to the URL specified in webhook.url const response = await fetch(webhook.url, { method: "POST", headers: { diff --git a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts index 31912c39fd0..8fea2910f02 100644 --- a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts +++ b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts @@ -1,27 +1,46 @@ +import { type RunStore } from "@internal/run-store"; import { type Prisma, type ProjectAlertChannel } from "@trigger.dev/database"; -import { type prisma } from "~/db.server"; +import { type PrismaClientOrTransaction, type prisma } from "~/db.server"; import { alertsWorker } from "~/v3/alertsWorker.server"; +import { + ControlPlaneResolver, + controlPlaneResolver as defaultControlPlaneResolver, +} from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { BaseService } from "../baseService.server"; import { DeliverAlertService } from "./deliverAlert.server"; +// The alert hydration reads only run-ops scalars (id/projectId/runtimeEnvironmentId); the env's +// type (and its parent's) is resolved via the control-plane resolver so the run-ops DB can split +// without a cross-provider join. The prior `lockedBy` + `runtimeEnvironment` includes were unused. type FoundRun = Prisma.Result< typeof prisma.taskRun, - { include: { lockedBy: true; runtimeEnvironment: true } }, + { select: { id: true; projectId: true; runtimeEnvironmentId: true } }, "findUniqueOrThrow" >; export class PerformTaskRunAlertsService extends BaseService { + #controlPlaneResolver: ControlPlaneResolver; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: RunStore; + controlPlaneResolver?: ControlPlaneResolver; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#controlPlaneResolver = opts.controlPlaneResolver ?? defaultControlPlaneResolver; + } + public async call(runId: string) { const run = await this.runStore.findRun( { id: runId }, { - include: { - lockedBy: true, - runtimeEnvironment: { - include: { - parentEnvironment: true, - }, - }, + select: { + id: true, + projectId: true, + runtimeEnvironmentId: true, }, }, this._prisma @@ -31,7 +50,12 @@ export class PerformTaskRunAlertsService extends BaseService { return; } - // Find all the alert channels + const env = await this.#controlPlaneResolver.resolveEnv(run.runtimeEnvironmentId); + + if (!env) { + return; + } + const alertChannels = await this._prisma.projectAlertChannel.findMany({ where: { projectId: run.projectId, @@ -39,7 +63,7 @@ export class PerformTaskRunAlertsService extends BaseService { has: "TASK_RUN", }, environmentTypes: { - has: run.runtimeEnvironment.parentEnvironment?.type ?? run.runtimeEnvironment.type, + has: env.parentEnvironmentType ?? env.type, }, enabled: true, }, diff --git a/apps/webapp/app/v3/services/batchTriggerV3.server.ts b/apps/webapp/app/v3/services/batchTriggerV3.server.ts index ae7fcd83d44..1e7f563adf5 100644 --- a/apps/webapp/app/v3/services/batchTriggerV3.server.ts +++ b/apps/webapp/app/v3/services/batchTriggerV3.server.ts @@ -11,16 +11,25 @@ import { isUniqueConstraintError, Prisma, } from "@trigger.dev/database"; +import type { RunStore } from "@internal/run-store"; +import { generateKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { z } from "zod"; import type { PrismaClientOrTransaction } from "~/db.server"; import { prisma } from "~/db.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; import { env } from "~/env.server"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; import { batchTaskRunItemStatusForRunStatus } from "~/models/taskRun.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { getEntitlement } from "~/services/platform.v3.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { resolveRunIdMintKind, type RunIdMintKind } from "~/v3/engineVersion.server"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; +import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { batchTriggerWorker } from "../batchTriggerWorker.server"; -import { generateFriendlyId } from "../friendlyIdentifiers"; import { legacyRunEngineWorker } from "../legacyRunEngineWorker.server"; import { marqs } from "../marqs/index.server"; import { guardQueueSizeLimitsForEnv } from "../queueSizeLimits.server"; @@ -101,7 +110,22 @@ export class BatchTriggerV3Service extends BaseService { constructor( batchProcessingStrategy?: BatchProcessingStrategy, asyncBatchProcessSizeThreshold: number = ASYNC_BATCH_PROCESS_SIZE_THRESHOLD, - protected readonly _prisma: PrismaClientOrTransaction = prisma + protected readonly _prisma: PrismaClientOrTransaction = prisma, + protected readonly runStore: RunStore = defaultRunStore, + // Injected so tests drive the migrated-parent branch without the split-store + // infrastructure; defaults to the live resolver (same pattern as + // RunEngineTriggerTaskService). + private readonly isKnownMigrated: (runId: string) => Promise = defaultIsKnownMigrated, + // Injected so tests force the env-default branch deterministically; defaults + // to the live per-env mint resolver. + private readonly resolveMintKind: (environment: { + organizationId: string; + id: string; + orgFeatureFlags?: unknown; + }) => Promise = resolveRunIdMintKind, + // Injected so the migrated-marker read stays off the hot path when split is off + // (same guard as RunEngineTriggerTaskService); defaults to the live resolver. + private readonly isSplitEnabled: () => Promise = defaultIsSplitEnabled ) { super(_prisma); @@ -123,13 +147,15 @@ export class BatchTriggerV3Service extends BaseService { throw new ServiceValidationError("A batch trigger must have at least one item"); } + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side before any create arm (passthrough when split is off). + await controlPlaneResolver.assertEnvExists(environment.id); + const existingBatch = options.idempotencyKey - ? await this._prisma.batchTaskRun.findFirst({ - where: { - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - }, - }) + ? await this.runStore.findBatchTaskRunByIdempotencyKey( + environment.id, + options.idempotencyKey + ) : undefined; if (existingBatch) { @@ -149,9 +175,10 @@ export class BatchTriggerV3Service extends BaseService { }); // Update the existing batch to remove the idempotency key - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: existingBatch.id }, data: { idempotencyKey: null }, + select: { id: true }, }); // Don't return, just continue with the batch trigger @@ -162,7 +189,14 @@ export class BatchTriggerV3Service extends BaseService { } } - const batchId = generateFriendlyId("batch"); + const { id: batchInternalId, friendlyId: batchId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", batchId); @@ -202,9 +236,8 @@ export class BatchTriggerV3Service extends BaseService { } } - const runs = await this.#prepareRunData(environment, body); + const runs = await this.#prepareRunData(environment, body, batchId); - // Calculate how many new runs we need to create const newRunCount = runs.filter((r) => !r.isCached).length; if (newRunCount === 0) { @@ -212,19 +245,18 @@ export class BatchTriggerV3Service extends BaseService { batchId, }); - await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: body.items.length, - runIds: runs.map((r) => r.id), - status: "COMPLETED", - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: body.items.length, + runIds: runs.map((r) => r.id), + status: "COMPLETED", + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); return { @@ -265,6 +297,7 @@ export class BatchTriggerV3Service extends BaseService { const batch = await this.#createAndProcessBatchTaskRun( batchId, + batchInternalId, runs, payloadPacket, newRunCount, @@ -319,18 +352,52 @@ export class BatchTriggerV3Service extends BaseService { } } + // Mint a child run's friendlyId so it lands in the SAME physical store as its + // residency anchor. The caller passes the batch's friendlyId, so a ksuid + // anchor (or a cuid-shaped one already migrated to the new store) yields a ksuid + // (NEW) child and a cuid anchor yields a cuid (LEGACY) child. With no anchor it + // falls back to the env's cutover setting. Mirrors + // RunEngineTriggerTaskService.mintRunFriendlyId. + private async mintChildFriendlyId( + environment: AuthenticatedEnvironment, + anchorFriendlyId?: string + ): Promise { + const mintKind = anchorFriendlyId + ? await resolveInheritedMintKind(anchorFriendlyId, { + isSplitEnabled: this.isSplitEnabled, + isKnownMigrated: this.isKnownMigrated, + }) + : await this.resolveMintKind({ + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; + } + async #prepareRunData( environment: AuthenticatedEnvironment, - body: BatchTriggerTaskV2RequestBody + body: BatchTriggerTaskV2RequestBody, + batchFriendlyId: string ): Promise> { + // Anchor every child to the batch's residency: the batch friendlyId is + // minted once, so deriving each child's id-kind from it — rather than re-resolving + // the per-org flag, which can flip mid-batch — keeps batch + children co-resident. + const childAnchor = batchFriendlyId; + // batchTriggerAndWait cannot have cached runs because that does not work in run engine v1 and is not available in the client if (body?.dependentAttempt) { - return body.items.map((item) => ({ - id: generateFriendlyId("run"), - isCached: false, - idempotencyKey: undefined, - taskIdentifier: item.task, - })); + return Promise.all( + body.items.map(async (item) => ({ + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, + idempotencyKey: undefined, + taskIdentifier: item.task, + })) + ); } // Group items by taskIdentifier @@ -374,42 +441,42 @@ export class BatchTriggerV3Service extends BaseService { ) ).then((results) => results.flat()); - // Now we need to create an array of all the run IDs, in order - // If we have a cached run, that isn't expired, we should use that run ID - // If we have a cached run, that is expired, we should generate a new run ID and save that cached run ID to a set of expired run IDs - // If we don't have a cached run, we should generate a new run ID + // Build the run IDs in order: reuse an unexpired cached id, else mint a new id (and record any + // expired cached id so its idempotency key can be cleared below). const expiredRunIds = new Set(); - const runs = body.items.map((item) => { - const cachedRun = cachedRuns.find((r) => r.idempotencyKey === item.options?.idempotencyKey); + const runs = await Promise.all( + body.items.map(async (item) => { + const cachedRun = cachedRuns.find((r) => r.idempotencyKey === item.options?.idempotencyKey); + + if (cachedRun) { + if (cachedRun.idempotencyKeyExpiresAt && cachedRun.idempotencyKeyExpiresAt < new Date()) { + expiredRunIds.add(cachedRun.friendlyId); - if (cachedRun) { - if (cachedRun.idempotencyKeyExpiresAt && cachedRun.idempotencyKeyExpiresAt < new Date()) { - expiredRunIds.add(cachedRun.friendlyId); + return { + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, + idempotencyKey: item.options?.idempotencyKey ?? undefined, + taskIdentifier: item.task, + }; + } return { - id: generateFriendlyId("run"), - isCached: false, + id: cachedRun.friendlyId, + isCached: true, idempotencyKey: item.options?.idempotencyKey ?? undefined, taskIdentifier: item.task, }; } return { - id: cachedRun.friendlyId, - isCached: true, + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, idempotencyKey: item.options?.idempotencyKey ?? undefined, taskIdentifier: item.task, }; - } - - return { - id: generateFriendlyId("run"), - isCached: false, - idempotencyKey: item.options?.idempotencyKey ?? undefined, - taskIdentifier: item.task, - }; - }); + }) + ); // Expire the cached runs that are no longer valid if (expiredRunIds.size) { @@ -424,6 +491,7 @@ export class BatchTriggerV3Service extends BaseService { async #createAndProcessBatchTaskRun( batchId: string, + batchInternalId: string, runs: Array, payloadPacket: IOPacket, newRunCount: number, @@ -433,21 +501,20 @@ export class BatchTriggerV3Service extends BaseService { dependentAttempt?: TaskRunAttempt ) { if (runs.length <= this._asyncBatchProcessSizeThreshold) { - const batch = await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: runs.length, - runIds: runs.map((r) => r.id), - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: runs.length, + runIds: runs.map((r) => r.id), + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); const result = await this.#processBatchTaskRunItems( @@ -466,42 +533,40 @@ export class BatchTriggerV3Service extends BaseService { error: result.error, }); - await this._prisma.batchTaskRun.update({ - where: { - id: batch.id, - }, + await this.runStore.updateBatchTaskRun({ + where: { id: batch.id }, data: { status: "ABORTED", completedAt: new Date(), }, + select: { id: true }, }); throw result.error; } - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); return batch; } else { - const batch = await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: body.items.length, - runIds: runs.map((r) => r.id), - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: body.items.length, + runIds: runs.map((r) => r.id), + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); switch (this._batchProcessingStrategy) { @@ -524,11 +589,12 @@ export class BatchTriggerV3Service extends BaseService { count: PROCESSING_BATCH_SIZE, })); - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { processingJobsExpectedCount: ranges.length, }, + select: { id: true }, }); await Promise.all( @@ -594,33 +660,30 @@ export class BatchTriggerV3Service extends BaseService { const $attemptCount = options.attemptCount + 1; - // Add early return if max attempts reached if ($attemptCount > MAX_ATTEMPTS) { logger.error("[BatchTriggerV2][processBatchTaskRun] Max attempts reached", { options, attemptCount: $attemptCount, }); - // You might want to update the batch status to failed here return; } - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { id: options.batchId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - }, - }); + const batch = await this.runStore.findBatchTaskRunById(options.batchId); if (!batch) { return; } - // Check to make sure the currentIndex is not greater than the runCount + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batch.runtimeEnvironmentId); + if (!environment) { + logger.error("[BatchTriggerV2][processBatchTaskRun] Environment not found", { + batchId: batch.id, + runtimeEnvironmentId: batch.runtimeEnvironmentId, + }); + return; + } + if (options.range.start >= batch.runCount) { logger.debug("[BatchTriggerV2][processBatchTaskRun] currentIndex is greater than runCount", { options, @@ -638,7 +701,7 @@ export class BatchTriggerV3Service extends BaseService { data: batch.payload ?? undefined, dataType: batch.payloadType, }, - batch.runtimeEnvironment + environment ); const payload = await parsePacket(payloadPacket); @@ -659,7 +722,7 @@ export class BatchTriggerV3Service extends BaseService { const result = await this.#processBatchTaskRunItems( batch, - batch.runtimeEnvironment, + environment, options.range.start, options.range.count, $payload, @@ -695,12 +758,12 @@ export class BatchTriggerV3Service extends BaseService { switch (options.strategy) { case "sequential": { - // We can tell if we are done by checking if the result.workingIndex is equal or greater than the runCount + // Done once we've walked past the last item in the batch if (result.workingIndex >= batch.runCount) { - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); logger.debug("[BatchTriggerV2][processBatchTaskRun] Batch processing complete", { @@ -710,7 +773,6 @@ export class BatchTriggerV3Service extends BaseService { attemptCount: $attemptCount, }); } else { - // Requeue the next batch of processing await this.#enqueueBatchTaskRun({ batchId: batch.id, processingId: options.processingId, @@ -726,9 +788,9 @@ export class BatchTriggerV3Service extends BaseService { break; } case "parallel": { - // We need to increment the processingJobsCount and check if we are done + // Each processing job increments the count; the last one to arrive seals the batch const { processingJobsCount, processingJobsExpectedCount } = - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { processingJobsCount: { @@ -742,10 +804,10 @@ export class BatchTriggerV3Service extends BaseService { }); if (processingJobsCount >= processingJobsExpectedCount) { - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); logger.debug("[BatchTriggerV2][processBatchTaskRun] Batch processing complete", { @@ -766,7 +828,6 @@ export class BatchTriggerV3Service extends BaseService { items: BatchTriggerTaskV2RequestBody["items"], options?: BatchTriggerTaskServiceOptions ): Promise<{ workingIndex: number; error?: Error }> { - // Grab the next PROCESSING_BATCH_SIZE runIds const runIds = batch.runIds.slice(currentIndex, currentIndex + batchSize); logger.debug("[BatchTriggerV2][processBatchTaskRun] Processing batch items", { @@ -776,7 +837,7 @@ export class BatchTriggerV3Service extends BaseService { runCount: batch.runCount, }); - // Combine the "window" between currentIndex and currentIndex + PROCESSING_BATCH_SIZE with the runId and the item in the payload which is an array + // Pair each runId in this window with its item from the payload array const itemsToProcess = runIds.map((runId, index) => ({ runId, item: items[index + currentIndex], @@ -815,13 +876,14 @@ export class BatchTriggerV3Service extends BaseService { } if (expectedCount > 0) { - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { expectedCount: { increment: expectedCount, }, }, + select: { id: true }, }); } @@ -873,12 +935,10 @@ export class BatchTriggerV3Service extends BaseService { if (!result.isCached) { try { - await this._prisma.batchTaskRunItem.create({ - data: { - batchTaskRunId: batch.id, - taskRunId: result.run.id, - status: batchTaskRunItemStatusForRunStatus(result.run.status), - }, + await this.runStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: result.run.id, + status: batchTaskRunItemStatusForRunStatus(result.run.status), }); return true; @@ -953,18 +1013,12 @@ export class BatchTriggerV3Service extends BaseService { export async function tryCompleteBatchV3( batchId: string, tx: PrismaClientOrTransaction, - scheduleResumeOnComplete: boolean + scheduleResumeOnComplete: boolean, + // Threaded in so a ksuid (NEW-resident) batch + its items are read/written on the owning + // store, not the control-plane `tx`. Defaults to the singleton (single-DB = passthrough). + runStore: RunStore = defaultRunStore ) { - const batch = await tx.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { - id: true, - sealed: true, - status: true, - expectedCount: true, - dependentTaskAttemptId: true, - }, - }); + const batch = await runStore.findBatchTaskRunById(batchId); if (!batch) { logger.debug("tryCompleteBatchV3: Batch not found", { batchId }); @@ -981,9 +1035,9 @@ export async function tryCompleteBatchV3( return; } - // Count completed items (read-only, no contention) - const completedCount = await tx.batchTaskRunItem.count({ - where: { batchTaskRunId: batchId, status: "COMPLETED" }, + const completedCount = await runStore.countBatchTaskRunItems({ + batchTaskRunId: batchId, + status: "COMPLETED", }); if (completedCount < batch.expectedCount) { @@ -996,7 +1050,7 @@ export async function tryCompleteBatchV3( } // Mark batch COMPLETED (idempotent via status check) - const updated = await tx.batchTaskRun.updateMany({ + const updated = await runStore.updateManyBatchTaskRun({ where: { id: batchId, status: "PENDING" }, data: { status: "COMPLETED", completedAt: new Date(), completedCount }, }); @@ -1019,7 +1073,10 @@ export async function completeBatchTaskRunItemV3( tx: PrismaClientOrTransaction, scheduleResumeOnComplete = false, taskRunAttemptId?: string, - retryAttempt?: number + retryAttempt?: number, + // Threaded in so a ksuid (NEW-resident) batch's item lands on the owning store; route by + // batchTaskRunId (items co-reside with their batch). Defaults to the singleton. + runStore: RunStore = defaultRunStore ) { const isRetry = retryAttempt !== undefined; @@ -1033,9 +1090,10 @@ export async function completeBatchTaskRunItemV3( }); try { - // Update item to COMPLETED (no transaction needed, no contention) - const updated = await tx.batchTaskRunItem.updateMany({ - where: { id: itemId, status: "PENDING" }, + // Update item to COMPLETED (no transaction needed, no contention). Routed by + // batchTaskRunId so the item write lands on the batch's owning DB. + const updated = await runStore.updateManyBatchTaskRunItems({ + where: { id: itemId, batchTaskRunId, status: "PENDING" }, data: { status: "COMPLETED", taskRunAttemptId }, }); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts new file mode 100644 index 00000000000..0bb3291f633 --- /dev/null +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts @@ -0,0 +1,125 @@ +// Real PG14 (legacy replica) + PG17 (new) proof for the bulk batch read-through adapter. +// We NEVER mock the DB: each closure runs a real `$queryRaw` against the passed container +// (crossing the actual PG14↔PG17 boundary) then filters an in-memory seeded set by id — +// mirroring readThrough.server.test.ts's `realRead`. The only injected fakes are the pure +// boundaries the plan allows (`isKnownMigrated`) plus throwing spies asserting a store was +// NEVER touched. +import { heteroPostgresTest } from "@internal/testcontainers"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { hydrateRunsAcrossSeam } from "./BulkActionV2.batchReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid body → LEGACY residency. 27-char body → NEW residency. +const LEGACY_RUN_ID = "run_" + "a".repeat(25); +const NEW_RUN_ID = "run_" + "b".repeat(27); + +type Row = { id: string }; + +// Real read against the given container, then return rows for the ids present in `present`. +async function realReadFiltered( + client: PrismaReplicaClient, + ids: string[], + present: Set +): Promise { + await client.$queryRaw<{ marker: number }[]>`SELECT 1 AS marker`; + return ids.filter((id) => present.has(id)).map((id) => ({ id })); +} + +describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { + heteroPostgresTest( + "(a) mixed page: NEW id from new, LEGACY id from legacy replica; new id never hits legacy", + async ({ prisma14, prisma17 }) => { + const onNew = new Set([NEW_RUN_ID]); + const onLegacy = new Set([LEGACY_RUN_ID]); + + const readLegacyReplica = vi.fn( + async (replica: PrismaReplicaClient, ids: string[]): Promise => { + if (ids.includes(NEW_RUN_ID)) { + throw new Error("legacy replica must never be probed for a NEW-residency id"); + } + return realReadFiltered(replica, ids, onLegacy); + } + ); + + const rows = await hydrateRunsAcrossSeam({ + runIds: [NEW_RUN_ID, LEGACY_RUN_ID], + readNew: (client, ids) => realReadFiltered(client, ids, onNew), + readLegacyReplica, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: async () => false, + }, + }); + + const ids = rows.map((r) => r.id).sort(); + expect(ids).toEqual([LEGACY_RUN_ID, NEW_RUN_ID].sort()); + expect(readLegacyReplica).toHaveBeenCalledTimes(1); + // legacy was only probed for the legacy id + expect(readLegacyReplica.mock.calls[0][1]).toEqual([LEGACY_RUN_ID]); + } + ); + + heteroPostgresTest( + "(b) known-migrated short-circuit: legacy-classified id missed by new is not probed and is omitted", + async ({ prisma14, prisma17 }) => { + const onNew = new Set(); // new misses it + const throwingLegacy = vi.fn(async (): Promise => { + throw new Error("readLegacyReplica must never run for a known-migrated id"); + }); + + const rows = await hydrateRunsAcrossSeam({ + runIds: [LEGACY_RUN_ID], + readNew: (client, ids) => realReadFiltered(client, ids, onNew), + readLegacyReplica: throwingLegacy, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: async () => true, + }, + }); + + expect(rows).toEqual([]); + expect(throwingLegacy).not.toHaveBeenCalled(); + } + ); + + heteroPostgresTest( + "(c) passthrough: splitEnabled false reads only the single client; legacy + filter never touched", + async ({ prisma14, prisma17 }) => { + const onNew = new Set([NEW_RUN_ID, LEGACY_RUN_ID]); + const throwingLegacy = vi.fn(async (): Promise => { + throw new Error("readLegacyReplica must never run in single-DB mode"); + }); + const throwingFilter = vi.fn(async (): Promise => { + throw new Error("isKnownMigrated must never run in single-DB mode"); + }); + const readNew = vi.fn((client: PrismaReplicaClient, ids: string[]) => + realReadFiltered(client, ids, onNew) + ); + + const rows = await hydrateRunsAcrossSeam({ + runIds: [NEW_RUN_ID, LEGACY_RUN_ID], + readNew, + readLegacyReplica: throwingLegacy, + deps: { + splitEnabled: false, + // single collapsed store (use prisma17 here as the "new"/primary analog) + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: throwingFilter, + }, + }); + + const ids = rows.map((r) => r.id).sort(); + expect(ids).toEqual([LEGACY_RUN_ID, NEW_RUN_ID].sort()); + expect(readNew).toHaveBeenCalledTimes(1); + expect(throwingLegacy).not.toHaveBeenCalled(); + expect(throwingFilter).not.toHaveBeenCalled(); + } + ); +}); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts new file mode 100644 index 00000000000..53ff0e1304d --- /dev/null +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts @@ -0,0 +1,119 @@ +/** + * Batch adapter over the per-id `readThroughRun` (see + * `~/v3/runOpsMigration/readThrough.server.ts`). A bulk action processes a PAGE of + * member run ids at once, so instead of N per-id round trips this reproduces the + * per-id read-through ordering as SET reads: + * + * 1. single-DB passthrough (splitEnabled === false): ONE read against the collapsed + * store, no residency classification, no legacy probe, no known-migrated filter. + * 2. split on: classify each id's residency via `ownerEngine`, read NEW for every id + * that could be on new (residency NEW *and* legacy-candidates — read-through is + * new-FIRST for legacy too), apply the known-migrated short-circuit only to + * legacy-candidates the new read missed, then probe the LEGACY READ REPLICA ONLY + * for whatever remains. + * + * Like the per-id layer this NEVER touches a legacy primary/writer — there is no such + * handle. An id is read from new OR legacy, never both: legacy is only probed for ids + * new missed AND that are not known-migrated, so the returned set needs no dedupe. + */ +import type { PrismaReplicaClient } from "~/db.server"; +import { + runOpsLegacyReplica as defaultLegacyReplica, + runOpsNewReplica as defaultNewClient, +} from "~/db.server"; +import { ownerEngine, UnclassifiableRunId } from "@trigger.dev/core/v3/isomorphic"; +import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; + +export type SeamReadDeps = { + /** + * Resolved boot constant. REQUIRED here — the caller resolves it once per + * request via `isSplitEnabled()`; this adapter never awaits it itself. + */ + splitEnabled: boolean; + isKnownMigrated?: (runId: string) => Promise; + newClient?: PrismaReplicaClient; + legacyReplica?: PrismaReplicaClient; + logger?: { warn: (m: string, meta?: unknown) => void }; +}; + +type HydrateRunsAcrossSeamInput = { + runIds: string[]; + readNew: (client: PrismaReplicaClient, ids: string[]) => Promise; + readLegacyReplica: (replica: PrismaReplicaClient, ids: string[]) => Promise; + deps: SeamReadDeps; +}; + +/** Every row shape we hydrate carries an `id` (CANCEL select includes it; REPLAY is a full row). */ +function getId(row: unknown): string { + return (row as { id: string }).id; +} + +export async function hydrateRunsAcrossSeam( + input: HydrateRunsAcrossSeamInput +): Promise { + const { runIds, deps } = input; + + if (runIds.length === 0) { + return []; + } + + const newClient = deps.newClient ?? defaultNewClient; + + // Passthrough: one plain read against the single collapsed store. No residency + // classification, no legacy probe, no known-migrated filter, no second connection. + // When the caller passes its own `_replica` as `newClient`, this is byte-identical to + // the pre-migration single-DB read. + if (deps.splitEnabled === false) { + return input.readNew(newClient, runIds); + } + + // Split is on. Classify residency; unclassifiable → LEGACY (probe rather than drop). + const newIds: string[] = []; + const legacyCandidateIds: string[] = []; + for (const runId of runIds) { + let residency: "LEGACY" | "NEW"; + try { + residency = ownerEngine(runId); + } catch (e) { + if (e instanceof UnclassifiableRunId) { + deps.logger?.warn("hydrateRunsAcrossSeam: UnclassifiableRunId, treating as LEGACY", { + runId, + valueLength: e.valueLength, + }); + residency = "LEGACY"; + } else { + throw e; + } + } + if (residency === "NEW") { + newIds.push(runId); + } else { + legacyCandidateIds.push(runId); + } + } + + // Read NEW for everything that could be on new — NEW-residency ids AND legacy-candidates + // (read-through is new-FIRST for legacy too) — in one read. + const legacyReplica = deps.legacyReplica ?? defaultLegacyReplica; + const newRows = await input.readNew(newClient, [...newIds, ...legacyCandidateIds]); + const foundOnNew = new Set(newRows.map(getId)); + + // Legacy-candidates the new read missed: apply the known-migrated short-circuit. A + // known-migrated id lives on new but the read missed it (lag) — drop it, do NOT probe + // legacy. Run the checks concurrently, and only for these missed candidates. + const isMigrated = deps.isKnownMigrated ?? defaultIsKnownMigrated; + const missedCandidates = legacyCandidateIds.filter((id) => !foundOnNew.has(id)); + const migratedFlags = await Promise.all(missedCandidates.map((id) => isMigrated(id))); + const legacyToProbe = missedCandidates.filter((_, i) => !migratedFlags[i]); + + // Legacy READ REPLICA only — never a legacy writer/primary (no such handle exists). + // A member absent from both DBs is simply not hydrated (matching today's `findMany`, + // where a missing id yields no row). + let legacyRows: T[] = []; + if (legacyToProbe.length > 0) { + legacyRows = await input.readLegacyReplica(legacyReplica, legacyToProbe); + } + + // Order within the page is irrelevant (downstream pMap does not depend on it). + return [...newRows, ...legacyRows]; +} diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index d03ab71796f..cb5c7cf0b98 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -25,6 +25,12 @@ import parseDuration from "parse-duration"; import { v3BulkActionPath } from "~/utils/pathBuilder"; import { formatDateTime } from "~/components/primitives/DateTime"; import pMap from "p-map"; +import { type PrismaReplicaClient } from "~/db.server"; +import { isSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { + hydrateRunsAcrossSeam, + type SeamReadDeps, +} from "./BulkActionV2.batchReadThrough.server"; export type ProcessToCompletionOptions = { /** Absolute timestamp (ms) after which processing stops and returns incomplete. */ @@ -36,6 +42,20 @@ export type ProcessToCompletionResult = { }; export class BulkActionService extends BaseService { + #splitEnabledPromise?: Promise; + + // Resolves split mode once per service instance and returns the read-through deps for + // bulk member hydration. Single-DB: read through the service replica (byte-identical to + // the pre-migration read). Split: adapter defaults to run-ops new + legacy read replica. + async #seamReadDeps(): Promise { + this.#splitEnabledPromise ??= isSplitEnabled(); + const splitEnabled = await this.#splitEnabledPromise; + return { + splitEnabled, + newClient: splitEnabled ? undefined : (this._replica as unknown as PrismaReplicaClient), + }; + } + public async create( organizationId: string, projectId: string, @@ -218,7 +238,6 @@ export class BulkActionService extends BaseService { prisma: this._replica as PrismaClient, }); - // In the future we can support multiple query names, when we make changes if (group.queryName !== "bulk_action_v1") { throw new Error(`Bulk action group has invalid query name: ${group.queryName}`); } @@ -246,25 +265,37 @@ export class BulkActionService extends BaseService { case BulkActionType.CANCEL: { const cancelService = new CancelTaskRunService(this._prisma); - const runs = await this.runStore.findRuns( - { - where: { - id: { - in: runIdsToProcess, + const seamDeps = await this.#seamReadDeps(); + const runs = await hydrateRunsAcrossSeam({ + runIds: runIdsToProcess, + readNew: (client, ids) => + client.taskRun.findMany({ + where: { id: { in: ids } }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, }, - }, - select: { - id: true, - engine: true, - friendlyId: true, - status: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - }, - }, - this._replica - ); + }), + readLegacyReplica: (replica, ids) => + replica.taskRun.findMany({ + where: { id: { in: ids } }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + }, + }), + deps: seamDeps, + }); await pMap( runs, @@ -300,16 +331,14 @@ export class BulkActionService extends BaseService { case BulkActionType.REPLAY: { const replayService = new ReplayTaskRunService(this._prisma); - const runs = await this.runStore.findRuns( - { - where: { - id: { - in: runIdsToProcess, - }, - }, - }, - this._replica - ); + const seamDeps = await this.#seamReadDeps(); + const runs = await hydrateRunsAcrossSeam({ + runIds: runIdsToProcess, + readNew: (client, ids) => client.taskRun.findMany({ where: { id: { in: ids } } }), + readLegacyReplica: (replica, ids) => + replica.taskRun.findMany({ where: { id: { in: ids } } }), + deps: seamDeps, + }); await pMap( runs, diff --git a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts index c1562275e58..39271889b56 100644 --- a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts +++ b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts @@ -1,9 +1,12 @@ +import { type RunStore } from "@internal/run-store"; import { z } from "zod"; +import { type PrismaClientOrTransaction } from "~/db.server"; import { findLatestSession } from "~/models/runtimeEnvironment.server"; import { logger } from "~/services/logger.server"; import { commonWorker } from "../commonWorker.server"; +import { type ReadThroughDeps, readThroughRun } from "../runOpsMigration/readThrough.server"; import { BaseService } from "./baseService.server"; -import { CancelTaskRunService } from "./cancelTaskRun.server"; +import { type CancelableTaskRun, CancelTaskRunService } from "./cancelTaskRun.server"; export const CancelDevSessionRunsServiceOptions = z.object({ runIds: z.array(z.string()), @@ -15,6 +18,23 @@ export const CancelDevSessionRunsServiceOptions = z.object({ export type CancelDevSessionRunsServiceOptions = z.infer; export class CancelDevSessionRunsService extends BaseService { + // Injectable read-through deps for the run-ops TaskRun read. Undefined in production: + // readThroughRun then uses its ~/db.server singleton handles and the boot split flag, + // so single-DB is unchanged. Tests inject the hetero new/legacy handles + splitEnabled. + readonly #readThroughDeps?: ReadThroughDeps; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: RunStore; + readThroughDeps?: ReadThroughDeps; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#readThroughDeps = opts.readThroughDeps; + } + public async call(options: CancelDevSessionRunsServiceOptions) { const cancelledSession = options.cancelledSessionId ? await this._prisma.runtimeEnvironmentSession.findFirst({ @@ -49,12 +69,17 @@ export class CancelDevSessionRunsService extends BaseService { const cancelTaskRunService = new CancelTaskRunService(); + // readThroughRun resolves residency from the run id alone; an env scope is only + // available when a cancelled session was resolved. + const environmentId = cancelledSession?.environmentId ?? ""; + for (const runId of options.runIds) { await this.#cancelInProgressRun( runId, cancelTaskRunService, options.cancelledAt, - options.reason + options.reason, + environmentId ); } } @@ -63,18 +88,53 @@ export class CancelDevSessionRunsService extends BaseService { runId: string, service: CancelTaskRunService, cancelledAt: Date, - reason: string + reason: string, + environmentId: string ) { logger.debug("Cancelling in progress run", { runId }); - const taskRun = runId.startsWith("run_") - ? await this.runStore.findRun({ friendlyId: runId }, this._prisma) - : await this.runStore.findRun({ id: runId }, this._prisma); + // Read-through: new store first, legacy read replica for an old + // in-retention run; single plain read in single-DB passthrough. + const where = runId.startsWith("run_") ? { friendlyId: runId } : { id: runId }; + + const result = await readThroughRun({ + runId, + environmentId, + readNew: (client) => + client.taskRun.findFirst({ + where, + select: { + id: true, + engine: true, + status: true, + friendlyId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }), + readLegacy: (replica) => + replica.taskRun.findFirst({ + where, + select: { + id: true, + engine: true, + status: true, + friendlyId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }), + deps: this.#readThroughDeps, + }); - if (!taskRun) { + if (result.source === "not-found" || result.source === "past-retention") { return; } + const taskRun = result.value; + try { await service.call(taskRun, { reason, cancelAttempts: true, cancelledAt }); } catch (e) { diff --git a/apps/webapp/app/v3/services/createCheckpoint.server.ts b/apps/webapp/app/v3/services/createCheckpoint.server.ts index e9e4e3d5560..43ff25f2053 100644 --- a/apps/webapp/app/v3/services/createCheckpoint.server.ts +++ b/apps/webapp/app/v3/services/createCheckpoint.server.ts @@ -146,14 +146,12 @@ export class CreateCheckpointService extends BaseService { break; } case "WAIT_FOR_BATCH": { - const batchRun = await this._prisma.batchTaskRun.findFirst({ - where: { - friendlyId: reason.batchFriendlyId, - }, - select: { - resumedAt: true, - }, - }); + // Routed by friendlyId so a ksuid (NEW-resident) batch is found on the owning DB; + // env-scoped to the dependent attempt's run (a batch shares its dependent's env). + const batchRun = await this.runStore.findBatchTaskRunByFriendlyId( + reason.batchFriendlyId, + attempt.taskRun.runtimeEnvironmentId + ); if (!batchRun) { logger.error("CreateCheckpointService: Pre-check - Batch not found", { @@ -363,15 +361,12 @@ export class CreateCheckpointService extends BaseService { }); await marqs?.cancelHeartbeat(attempt.taskRunId); - const batchRun = await this._prisma.batchTaskRun.findFirst({ - select: { - id: true, - batchVersion: true, - }, - where: { - friendlyId: reason.batchFriendlyId, - }, - }); + // Routed by friendlyId so a ksuid (NEW-resident) batch is found on the owning DB; + // env-scoped to the dependent attempt's run (a batch shares its dependent's env). + const batchRun = await this.runStore.findBatchTaskRunByFriendlyId( + reason.batchFriendlyId, + attempt.taskRun.runtimeEnvironmentId + ); if (!batchRun) { logger.error("CreateCheckpointService: Batch not found", { diff --git a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts index cb66166d104..094e75c9a11 100644 --- a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts +++ b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts @@ -15,6 +15,7 @@ import { BaseService, ServiceValidationError } from "./baseService.server"; import { CrashTaskRunService } from "./crashTaskRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { runStore } from "../runStore.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; export class CreateTaskRunAttemptService extends BaseService { public async call({ @@ -62,19 +63,6 @@ export class CreateTaskRunAttemptService extends BaseService { number: "desc", }, }, - lockedBy: { - include: { - worker: { - select: { - id: true, - version: true, - sdkVersion: true, - cliVersion: true, - supportsLazyAttempts: true, - }, - }, - }, - }, batchItems: { include: { batchTaskRun: { @@ -108,18 +96,16 @@ export class CreateTaskRunAttemptService extends BaseService { throw new ServiceValidationError("Task run is already finished", 400); } - const lockedBy = taskRun.lockedBy; + const lockedWorker = await controlPlaneResolver.resolveRunLockedWorker({ + lockedById: taskRun.lockedById, + }); + const lockedBy = lockedWorker?.lockedBy; if (!lockedBy) { throw new ServiceValidationError("Task run is not locked", 400); } - const queue = await findQueueInEnvironment( - taskRun.queue, - environment.id, - lockedBy.id, - lockedBy - ); + const queue = await findQueueInEnvironment(taskRun.queue, environment.id, lockedBy.id); if (!queue) { throw new ServiceValidationError("Queue not found", 404); @@ -275,13 +261,8 @@ async function getAuthenticatedEnvironmentFromRun( friendlyId: isFriendlyId ? friendlyId : undefined, }, { - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, + select: { + runtimeEnvironmentId: true, }, }, prismaClient ?? prisma @@ -291,5 +272,7 @@ async function getAuthenticatedEnvironmentFromRun( return; } - return taskRun?.runtimeEnvironment; + return ( + (await controlPlaneResolver.resolveAuthenticatedEnv(taskRun.runtimeEnvironmentId)) ?? undefined + ); } diff --git a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts index 2e9d86916c0..9b78622a057 100644 --- a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts +++ b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts @@ -5,6 +5,7 @@ import { commonWorker } from "../commonWorker.server"; import { BaseService } from "./baseService.server"; import { enqueueRun } from "./enqueueRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { isV3Disabled } from "../engineDeprecation.server"; export class EnqueueDelayedRunService extends BaseService { @@ -39,12 +40,6 @@ export class EnqueueDelayedRunService extends BaseService { }, { include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, dependency: { include: { dependentBatchRun: { @@ -82,6 +77,13 @@ export class EnqueueDelayedRunService extends BaseService { return; } + const env = await controlPlaneResolver.resolveAuthenticatedEnv(run.runtimeEnvironmentId); + + if (!env) { + logger.debug("EnqueueDelayedRunService: environment not found", { runId }); + return; + } + if (run.status !== "DELAYED") { logger.debug("Delayed run cannot be enqueued because it's not in DELAYED status", { run, @@ -109,7 +111,7 @@ export class EnqueueDelayedRunService extends BaseService { } await enqueueRun({ - env: run.runtimeEnvironment, + env, run: run, dependentRun: run.dependency?.dependentAttempt?.taskRun ?? diff --git a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts index b6c25db43ed..ea4c24514f3 100644 --- a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts +++ b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts @@ -1,3 +1,4 @@ +import { isClassifiable, ownerEngine } from "@trigger.dev/core/v3/isomorphic"; import { env } from "~/env.server"; import { logger } from "~/services/logger.server"; import { marqs } from "~/v3/marqs/index.server"; @@ -69,11 +70,27 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { return; } - // Clear any runs awaiting deployment for execution + // NEW-1 defense-in-depth: the open-predicate findRuns fan-out can select runs from + // either DB, but the status flip below is a single control-plane updateMany. A + // ksuid (NEW-resident) run can only reach WAITING_FOR_DEPLOY via a misconfiguration + // (it is a V1/cuid-only status — V2 uses PENDING_VERSION). Surface it loudly rather + // than silently strand the run, and only mutate the LEGACY-resident runs the + // control-plane client can actually reach. + const newResidentRuns = runsWaitingForDeploy.filter( + (run) => isClassifiable(run.id) && ownerEngine(run.id) === "NEW" + ); + if (newResidentRuns.length) { + logger.error( + "WAITING_FOR_DEPLOY selected NEW-resident runs; skipping their control-plane status flip", + { runIds: newResidentRuns.map((run) => run.id) } + ); + } + const legacyRuns = runsWaitingForDeploy.filter((run) => !newResidentRuns.includes(run)); + const pendingRuns = await this._prisma.taskRun.updateMany({ where: { id: { - in: runsWaitingForDeploy.map((run) => run.id), + in: legacyRuns.map((run) => run.id), }, }, data: { @@ -83,12 +100,14 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { if (pendingRuns.count) { logger.debug("Task runs waiting for deploy are now ready for execution", { - tasks: runsWaitingForDeploy.map((run) => run.id), + tasks: legacyRuns.map((run) => run.id), total: pendingRuns.count, }); } - for (const run of runsWaitingForDeploy) { + // Only enqueue the runs whose status was actually flipped (the legacy set) — never + // marqs-enqueue a NEW-resident run we couldn't transition out of WAITING_FOR_DEPLOY. + for (const run of legacyRuns) { await marqs?.enqueueMessage( backgroundWorker.runtimeEnvironment, run.queue, diff --git a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts index 809c1bb49e3..06b4db57ec4 100644 --- a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts +++ b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts @@ -5,6 +5,7 @@ import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { tryCatch } from "@trigger.dev/core/utils"; import { getEventRepositoryForStore } from "../eventRepository/index.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { isV3Disabled } from "../engineDeprecation.server"; export class ExpireEnqueuedRunService extends BaseService { @@ -29,13 +30,24 @@ export class ExpireEnqueuedRunService extends BaseService { id: runId, }, { - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, + select: { + id: true, + status: true, + engine: true, + lockedAt: true, + ttl: true, + taskEventStore: true, + runtimeEnvironmentId: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + organizationId: true, + isTest: true, }, }, this._prisma @@ -55,6 +67,13 @@ export class ExpireEnqueuedRunService extends BaseService { return; } + const env = await controlPlaneResolver.resolveEnv(run.runtimeEnvironmentId); + + if (!env) { + logger.debug("ExpireEnqueuedRunService: environment not found", { runId }); + return; + } + if (run.status !== "PENDING") { logger.debug("Run cannot be expired because it's not in PENDING status", { run, @@ -90,7 +109,7 @@ export class ExpireEnqueuedRunService extends BaseService { const eventRepository = await getEventRepositoryForStore( run.taskEventStore, - run.runtimeEnvironment.organization.id + env.organizationId ); if (run.ttl) { diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index d6d35d0e4ca..1443a6b7a0d 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -19,6 +19,7 @@ import { completeBatchTaskRunItemV3 } from "./batchTriggerV3.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { ResumeBatchRunService } from "./resumeBatchRun.server"; import { ResumeDependentParentsService } from "./resumeDependentParents.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; type BaseInput = { id: string; @@ -92,10 +93,8 @@ export class FinalizeTaskRunService extends BaseService { } } - // I moved the error update here for two reasons: - // - A single update is more efficient than two - // - If the status updates to a final status, realtime will receive that status and then shut down the stream - // before the error is updated, which would cause the error to be lost + // Error is written in the same update as the status: a separate later write races realtime, + // which shuts the stream down on the final status before the error lands, losing it. const taskRunError = error ? sanitizeError(error) : undefined; const run = await this._prisma.taskRun.update({ @@ -131,7 +130,6 @@ export class FinalizeTaskRunService extends BaseService { }); } - //resume any dependencies const resumeService = new ResumeDependentParentsService(this._prisma); const result = await resumeService.call({ id: run.id }); @@ -144,7 +142,6 @@ export class FinalizeTaskRunService extends BaseService { }); } - //enqueue alert if (isFailedRunStatus(run.status)) { await PerformTaskRunAlertsService.enqueue(run.id); } @@ -157,22 +154,23 @@ export class FinalizeTaskRunService extends BaseService { { select: { id: true, - lockedToVersion: { - select: { - supportsLazyAttempts: true, - }, - }, - runtimeEnvironment: { - select: { - type: true, - }, - }, + runtimeEnvironmentId: true, + lockedToVersionId: true, }, }, this._prisma ); - if (extendedRun && extendedRun.runtimeEnvironment.type !== "DEVELOPMENT") { + const extendedEnv = extendedRun + ? await controlPlaneResolver.resolveEnv(extendedRun.runtimeEnvironmentId) + : null; + const extendedLockedWorker = extendedRun + ? await controlPlaneResolver.resolveRunLockedWorker({ + lockedToVersionId: extendedRun.lockedToVersionId, + }) + : null; + + if (extendedRun && extendedEnv && extendedEnv.type !== "DEVELOPMENT") { logger.warn("FinalizeTaskRunService: Fatal status, requesting worker exit", { runId: run.id, status: run.status, @@ -183,7 +181,9 @@ export class FinalizeTaskRunService extends BaseService { version: "v1", runId: run.id, // Give the run a few seconds to exit to complete any flushing etc - delayInMs: extendedRun.lockedToVersion?.supportsLazyAttempts ? 5_000 : undefined, + delayInMs: extendedLockedWorker?.lockedToVersion?.supportsLazyAttempts + ? 5_000 + : undefined, }); } } @@ -247,7 +247,6 @@ export class FinalizeTaskRunService extends BaseService { await completeBatchTaskRunItemV3(item.id, item.batchTaskRunId, this._prisma); } else { // THIS IS DEPRECATED and only happens with batchVersion != v3 - // Update the item to complete await this._prisma.batchTaskRunItem.update({ where: { id: item.id, diff --git a/apps/webapp/app/v3/services/resumeBatchRun.server.ts b/apps/webapp/app/v3/services/resumeBatchRun.server.ts index fb3e24d1340..81a59f8a073 100644 --- a/apps/webapp/app/v3/services/resumeBatchRun.server.ts +++ b/apps/webapp/app/v3/services/resumeBatchRun.server.ts @@ -3,34 +3,31 @@ import { commonWorker } from "../commonWorker.server"; import { marqs } from "~/v3/marqs/index.server"; import { BaseService } from "./baseService.server"; import { logger } from "~/services/logger.server"; -import type { BatchTaskRun } from "@trigger.dev/database"; +import type { BatchTaskRun, Prisma } from "@trigger.dev/database"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { workerQueue } from "~/services/worker.server"; import { isV3Disabled } from "../engineDeprecation.server"; const finishedBatchRunStatuses = ["COMPLETED", "FAILED", "CANCELED"]; -type RetrieveBatchRunResult = NonNullable>>; +const BATCH_RUN_INCLUDE = { + items: { + select: { + status: true, + taskRunAttemptId: true, + }, + }, +} satisfies Prisma.BatchTaskRunInclude; + +type RetrieveBatchRunResult = Prisma.BatchTaskRunGetPayload<{ + include: typeof BATCH_RUN_INCLUDE; +}>; export class ResumeBatchRunService extends BaseService { public async call(batchRunId: string) { - const batchRun = await this._prisma.batchTaskRun.findFirst({ - where: { - id: batchRunId, - }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - items: { - select: { - status: true, - taskRunAttemptId: true, - }, - }, - }, + const batchRun = await this.runStore.findBatchTaskRunById(batchRunId, { + include: BATCH_RUN_INCLUDE, }); if (!batchRun) { @@ -44,8 +41,21 @@ export class ResumeBatchRunService extends BaseService { return "ERROR"; } + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batchRun.runtimeEnvironmentId); + if (!environment) { + logger.error("ResumeBatchRunService: Environment not found", { + batchRunId, + runtimeEnvironmentId: batchRun.runtimeEnvironmentId, + }); + + return "ERROR"; + } + // v3 (engine V1) shutdown: don't resume batches for abandoned V1 projects. v4 is unaffected. - if (isV3Disabled() && batchRun.runtimeEnvironment.project.engine === "V1") { + // The BatchTaskRun -> RuntimeEnvironment relation is dropped, so read the engine from the + // resolved environment's project rather than the unloaded batchRun.runtimeEnvironment relation. + if (isV3Disabled() && environment.project.engine === "V1") { logger.debug("[ResumeBatchRunService] Skipping resume for shut-down v3 batch", { batchRunId, }); @@ -53,13 +63,13 @@ export class ResumeBatchRunService extends BaseService { } if (batchRun.batchVersion === "v3") { - return await this.#handleV3BatchRun(batchRun); + return await this.#handleV3BatchRun(batchRun, environment); } else { - return await this.#handleLegacyBatchRun(batchRun); + return await this.#handleLegacyBatchRun(batchRun, environment); } } - async #handleV3BatchRun(batchRun: RetrieveBatchRunResult) { + async #handleV3BatchRun(batchRun: RetrieveBatchRunResult, environment: AuthenticatedEnvironment) { // V3 batch runs should already be complete by the time this is called if (batchRun.status !== "COMPLETED") { logger.debug("ResumeBatchRunService: Batch run is already completed", { @@ -82,10 +92,17 @@ export class ResumeBatchRunService extends BaseService { return "ERROR"; } - return await this.#handleDependentTaskAttempt(batchRun, batchRun.dependentTaskAttemptId); + return await this.#handleDependentTaskAttempt( + batchRun, + batchRun.dependentTaskAttemptId, + environment + ); } - async #handleLegacyBatchRun(batchRun: RetrieveBatchRunResult) { + async #handleLegacyBatchRun( + batchRun: RetrieveBatchRunResult, + environment: AuthenticatedEnvironment + ) { if (batchRun.status === "COMPLETED") { logger.debug("ResumeBatchRunService: Batch run is already completed", { batchRunId: batchRun.id, @@ -99,7 +116,6 @@ export class ResumeBatchRunService extends BaseService { } if (batchRun.batchVersion === "v2") { - // Make sure batchRun.items.length is equal to or greater than batchRun.runCount if (batchRun.items.length < batchRun.runCount) { logger.debug("ResumeBatchRunService: All items aren't yet completed [v2]", { batchRunId: batchRun.id, @@ -128,26 +144,32 @@ export class ResumeBatchRunService extends BaseService { } // If we are in development, or there is no dependent attempt, we can just mark the batch as completed and return - if (batchRun.runtimeEnvironment.type === "DEVELOPMENT" || !batchRun.dependentTaskAttemptId) { + if (environment.type === "DEVELOPMENT" || !batchRun.dependentTaskAttemptId) { // We need to update the batchRun status so we don't resume it again - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batchRun.id, }, data: { status: "COMPLETED", }, + select: { id: true }, }); return "COMPLETED"; } - return await this.#handleDependentTaskAttempt(batchRun, batchRun.dependentTaskAttemptId); + return await this.#handleDependentTaskAttempt( + batchRun, + batchRun.dependentTaskAttemptId, + environment + ); } async #handleDependentTaskAttempt( batchRun: RetrieveBatchRunResult, - dependentTaskAttemptId: string + dependentTaskAttemptId: string, + environment: AuthenticatedEnvironment ) { const dependentTaskAttempt = await this._prisma.taskRunAttempt.findFirst({ where: { @@ -179,7 +201,6 @@ export class ResumeBatchRunService extends BaseService { } // This batch has a dependent attempt and just finalized, we should resume that attempt - const environment = batchRun.runtimeEnvironment; const dependentRun = dependentTaskAttempt.taskRun; if (dependentTaskAttempt.status === "PAUSED" && batchRun.checkpointEventId) { @@ -298,7 +319,7 @@ export class ResumeBatchRunService extends BaseService { async #setBatchToResumedOnce(batchRun: BatchTaskRun) { // v3 batches don't use the status for deciding whether a batch has been resumed if (batchRun.batchVersion === "v3") { - const result = await this._prisma.batchTaskRun.updateMany({ + const result = await this.runStore.updateManyBatchTaskRun({ where: { id: batchRun.id, resumedAt: null, @@ -308,16 +329,14 @@ export class ResumeBatchRunService extends BaseService { }, }); - // Check if any records were updated if (result.count > 0) { - // The status was changed, so we return true return true; } else { return false; } } - const result = await this._prisma.batchTaskRun.updateMany({ + const result = await this.runStore.updateManyBatchTaskRun({ where: { id: batchRun.id, status: { @@ -329,9 +348,7 @@ export class ResumeBatchRunService extends BaseService { }, }); - // Check if any records were updated if (result.count > 0) { - // The status was changed, so we return true return true; } else { return false; @@ -381,24 +398,3 @@ export class ResumeBatchRunService extends BaseService { } } -async function retrieveBatchRun(id: string, prisma: PrismaClientOrTransaction) { - return await prisma.batchTaskRun.findFirst({ - where: { - id, - }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - items: { - select: { - status: true, - taskRunAttemptId: true, - }, - }, - }, - }); -} diff --git a/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts new file mode 100644 index 00000000000..2f193f4c3b4 --- /dev/null +++ b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts @@ -0,0 +1,148 @@ +import { describe, expect, it, vi } from "vitest"; + +// Module-level db wiring is imported transitively by the service file. The mint +// helper under test never touches the DB (it is driven with injected deps), so +// these empty singletons only satisfy the import graph — same boundary pattern +// as triggerTask.server.test.ts and runEngineBatchTriggerStoreRouting.test.ts. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); + +import { BatchId, generateKsuidId, ownerEngine, RunId } from "@trigger.dev/core/v3/isomorphic"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { BatchTriggerV3Service } from "~/v3/services/batchTriggerV3.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +const CUID_LEN = 25; +const KSUID_LEN = 27; + +// Minimal AuthenticatedEnvironment — only the fields the mint path reads +// (organizationId, id, organization.featureFlags) need to be real. A root batch +// (no parentRunId) with no ksuid override mints cuid, which is the env-default +// branch we assert on below. +function fakeEnv(): AuthenticatedEnvironment { + return { + id: "env_123", + organizationId: "org_123", + organization: { featureFlags: {} }, + } as unknown as AuthenticatedEnvironment; +} + +// Build the service with the two mint deps injected so the test drives both +// inheritance branches without the split-store infrastructure. resolveRunIdMintKind +// is forced to "cuid" (its production default when split is off / org not cut over), +// proving the CHILD branch overrides the env default purely from the parent's id-shape. +function buildService(isKnownMigrated: (id: string) => Promise) { + return new BatchTriggerV3Service( + undefined, + undefined, + {} as any, + {} as any, + isKnownMigrated, + async () => "cuid" + ); +} + +describe("BatchTriggerV3Service child-residency inheritance", () => { + it("a ksuid parent yields ksuid (NEW) child friendlyIds", async () => { + const service = buildService(async () => false); + const parentFriendlyId = RunId.toFriendlyId( + // 27-char ksuid internal id → NEW residency parent + "a".repeat(KSUID_LEN) + ); + expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("NEW"); + + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + parentFriendlyId + ); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); + }); + + it("a cuid parent yields cuid (LEGACY) child friendlyIds", async () => { + const service = buildService(async () => false); + const parentFriendlyId = RunId.generate().friendlyId; // cuid (25) → LEGACY parent + expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("LEGACY"); + + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + parentFriendlyId + ); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); + + it("a cuid-shaped but migrated parent yields ksuid (NEW) children", async () => { + // Parent is legacy by id-shape but already swept to the NEW DB: the migrated + // check wins and children are born ksuid/NEW (mirrors triggerTask resolveInheritedMintKind). + const service = buildService(async () => true); + const parentFriendlyId = RunId.generate().friendlyId; // cuid shape + + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + parentFriendlyId + ); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); + }); + + it("a ROOT batch (no parentRunId) mints by the env setting (cuid default here)", async () => { + const service = buildService(async () => false); + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + undefined + ); + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); + + // A root batch's children are anchored to the batch's friendlyId, NOT to a + // re-resolution of the per-org flag. Even with the env flag forced to "cuid" (a flip + // away from the batch's residency), a ksuid batch anchor yields ksuid children — so + // batch + children stay co-resident and TaskRun.batchId never crosses the seam. + it("a ksuid batch anchor yields ksuid children even when the env flag resolves cuid", async () => { + const service = buildService(async () => false); // resolveMintKind forced to "cuid" + const batchFriendlyId = BatchId.toFriendlyId(generateKsuidId()); // ksuid (NEW) batch + expect(ownerEngine(batchFriendlyId)).toBe("NEW"); + + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + batchFriendlyId + ); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); + }); + + // The cuid mirror: a cuid batch anchor yields cuid children even if the flag flipped ON. + it("a cuid batch anchor yields cuid children even when the env flag resolves ksuid", async () => { + const service = new BatchTriggerV3Service( + undefined, + undefined, + {} as any, + {} as any, + async () => false, + async () => "ksuid" // env flag flipped ON mid-batch + ); + const batchFriendlyId = BatchId.generate().friendlyId; // cuid (LEGACY) batch + expect(ownerEngine(batchFriendlyId)).toBe("LEGACY"); + + const childFriendlyId = await (service as any).mintChildFriendlyId( + fakeEnv(), + batchFriendlyId + ); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); +}); diff --git a/apps/webapp/test/batchTriggerV3StoreRouting.test.ts b/apps/webapp/test/batchTriggerV3StoreRouting.test.ts new file mode 100644 index 00000000000..5e1f60d4de6 --- /dev/null +++ b/apps/webapp/test/batchTriggerV3StoreRouting.test.ts @@ -0,0 +1,252 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import { isUniqueConstraintError, type PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +vi.setConfig({ testTimeout: 60_000 }); + +// Proves BatchTriggerV3's three store seams (cached-run lookup, expired-key clear, +// membership write) route correctly against real PG14 (legacy) + PG17 (run-ops) +// containers, using the service's exact query shapes. The service methods are +// JS #-private, so the seam is driven directly — same approach as the sibling +// legacy-authority test. + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey?: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey ?? null, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +async function seedBatch(prisma: PrismaClient, runtimeEnvironmentId: string, suffix: string) { + const batchId = generateKsuidId(); + return prisma.batchTaskRun.create({ + data: { + id: batchId, + friendlyId: `batch_${suffix}_${batchId}`, + runtimeEnvironmentId, + }, + }); +} + +describe("BatchTriggerV3 · store-seam routing (cross-DB)", () => { + heteroPostgresTest( + "(A) cached-run reuse resolves via the legacy (PG14) authority; a PG17-only key is invisible", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "batch-cached" + ); + const newSide = await seedOrgProjectEnv(prisma17, "batch-cached-new"); + + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const key1 = "idem-batch-1"; + const key2 = "idem-batch-2"; + const freshKey = "idem-batch-fresh"; + + const run1 = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key1, + }); + const run2 = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key2, + }); + + // A row with one of the SAME keys lives only on PG17 (run-ops). The + // legacy-pinned read must NOT see it. + await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key1, + }); + + // The service's exact cached-run query shape, pinned to PG14. + const cachedRuns = await legacyStore.findRuns( + { + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: { in: [key1, key2, freshKey] }, + }, + select: { + friendlyId: true, + idempotencyKey: true, + idempotencyKeyExpiresAt: true, + }, + }, + prisma14 + ); + + // Exactly the 2 seeded rows; the fresh key matches nothing. + expect(cachedRuns).toHaveLength(2); + const friendlyIds = cachedRuns.map((r) => r.friendlyId).sort(); + expect(friendlyIds).toEqual([run1.friendlyId, run2.friendlyId].sort()); + // Each friendlyId distinct, exactly one row per seeded key. + expect(new Set(friendlyIds).size).toBe(2); + expect(cachedRuns.filter((r) => r.idempotencyKey === key1)).toHaveLength(1); + expect(cachedRuns.filter((r) => r.idempotencyKey === key2)).toHaveLength(1); + } + ); + + heteroPostgresTest( + "(B) expired-key clear is routed to the legacy (PG14) authority and does not touch PG17", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "batch-expired" + ); + const newSide = await seedOrgProjectEnv(prisma17, "batch-expired-new"); + + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const expiredKey = "idem-batch-expired"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: expiredKey, + idempotencyKeyExpiresAt: new Date(Date.now() - 60_000), + }); + + // A PG17 row with the same key, to prove the clear does not reach it. + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: expiredKey, + }); + + // The service's exact expired-key clear shape, pinned to PG14. + await legacyStore.clearIdempotencyKey({ byFriendlyIds: [legacyRun.friendlyId] }, prisma14); + + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + + // The PG17 row is untouched. + const untouched = await prisma17.taskRun.findFirst({ where: { id: newRun.id } }); + expect(untouched?.idempotencyKey).toBe(expiredKey); + } + ); + + heteroPostgresTest( + "(C) membership write lands on the run-ops (PG17) store; duplicate raises a unique-constraint error", + async ({ prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma17, + "batch-membership" + ); + + const runOpsStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const batch = await seedBatch(prisma17, runtimeEnvironment.id, "membership"); + const run = await seedRun(prisma17, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + }); + + await runOpsStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: run.id, + status: "PENDING", + }); + + const item = await prisma17.batchTaskRunItem.findFirst({ + where: { batchTaskRunId: batch.id, taskRunId: run.id }, + }); + expect(item).not.toBeNull(); + expect(item?.status).toBe("PENDING"); + + // Re-calling with the SAME pair raises a unique-constraint error at the + // store layer (the service's try/catch is what swallows it). + let caught: unknown; + try { + await runOpsStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: run.id, + status: "PENDING", + }); + } catch (error) { + caught = error; + } + + expect(caught).toBeDefined(); + expect(isUniqueConstraintError(caught, ["batchTaskRunId", "taskRunId"])).toBe(true); + + // Still exactly one row. + const count = await prisma17.batchTaskRunItem.count({ + where: { batchTaskRunId: batch.id, taskRunId: run.id }, + }); + expect(count).toBe(1); + } + ); +}); diff --git a/apps/webapp/test/bulkActionV2ReadRouting.test.ts b/apps/webapp/test/bulkActionV2ReadRouting.test.ts new file mode 100644 index 00000000000..e8a51aeb3b3 --- /dev/null +++ b/apps/webapp/test/bulkActionV2ReadRouting.test.ts @@ -0,0 +1,242 @@ +// Service-level proof for bulk CANCEL/REPLAY member hydration across the run-ops seam. +// +// `BulkActionService.process()` builds its ClickHouse-backed RunsRepository internally and +// has no test seam to inject the member-id page, and driving it end-to-end would require a +// full ClickHouse replication stack just to make `listRunIds` return the seeded ids. The +// cross-DB hydration semantics — the DoD's core — are proven exhaustively at the adapter +// unit level (BulkActionV2.batchReadThrough.server.test.ts). Here we prove the SERVICE-level +// wiring by driving the exact closures `process()` passes to `hydrateRunsAcrossSeam` against +// REAL rows seeded on the two containers (PG14 legacy + PG17 new), so the PG14↔PG17 boundary +// is genuinely crossed and the full REPLAY row shape is exercised. We NEVER mock the DB. +import { heteroPostgresTest } from "@internal/testcontainers"; +import { describe, expect, vi } from "vitest"; +import type { PrismaClient } from "@trigger.dev/database"; +import type { PrismaReplicaClient } from "~/db.server"; +import { hydrateRunsAcrossSeam } from "~/v3/services/bulk/BulkActionV2.batchReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 27-char body → NEW residency (ksuid analog). 25-char body → LEGACY residency (cuid analog). +function newId(c: string) { + return "run_" + c.repeat(27); +} +function legacyId(c: string) { + return "run_" + c.repeat(25); +} + +// The exact closures BulkActionService.process() uses for each branch. +const cancelSelect = { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, +} as const; + +function cancelReadNew(client: PrismaReplicaClient, ids: string[]) { + return client.taskRun.findMany({ where: { id: { in: ids } }, select: cancelSelect }); +} +function cancelReadLegacy(replica: PrismaReplicaClient, ids: string[]) { + return replica.taskRun.findMany({ where: { id: { in: ids } }, select: cancelSelect }); +} +function replayReadNew(client: PrismaReplicaClient, ids: string[]) { + return client.taskRun.findMany({ where: { id: { in: ids } } }); +} +function replayReadLegacy(replica: PrismaReplicaClient, ids: string[]) { + return replica.taskRun.findMany({ where: { id: { in: ids } } }); +} + +async function seedEnv(prisma: PrismaClient, slug: string) { + const user = await prisma.user.create({ + data: { email: `${slug}@test.com`, name: "t", authenticationMethod: "MAGIC_LINK" }, + }); + const organization = await prisma.organization.create({ + data: { + title: "Org", + slug: `org-${slug}`, + members: { create: { userId: user.id, role: "ADMIN" } }, + }, + }); + const project = await prisma.project.create({ + data: { + name: "Proj", + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `ext-${slug}`, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `api-${slug}`, + pkApiKey: `pk-${slug}`, + shortcode: `sc-${slug}`, + }, + }); + return { organization, project, environment }; +} + +async function seedRun( + prisma: PrismaClient, + ctx: { organization: { id: string }; project: { id: string }; environment: { id: string } }, + id: string +) { + await prisma.taskRun.create({ + data: { + id, + friendlyId: id, + taskIdentifier: "t", + status: "EXECUTING", + payload: JSON.stringify({}), + payloadType: "application/json", + traceId: id, + spanId: id, + queue: "main", + runtimeEnvironmentId: ctx.environment.id, + projectId: ctx.project.id, + organizationId: ctx.organization.id, + environmentType: "PRODUCTION", + engine: "V2", + }, + }); +} + +describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 new)", () => { + heteroPostgresTest( + "CANCEL across both DBs hydrates every member; the NEW id never hits the legacy replica", + async ({ prisma14, prisma17 }) => { + const newRunId = newId("a"); + const legacyRunId = legacyId("b"); + + const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "cancel-new"); + const legacyCtx = await seedEnv(prisma14 as unknown as PrismaClient, "cancel-legacy"); + await seedRun(prisma17 as unknown as PrismaClient, newCtx, newRunId); + await seedRun(prisma14 as unknown as PrismaClient, legacyCtx, legacyRunId); + + const legacySpy = vi.fn((replica: PrismaReplicaClient, ids: string[]) => { + if (ids.includes(newRunId)) { + throw new Error("legacy replica must never be probed for a NEW-residency id"); + } + return cancelReadLegacy(replica, ids); + }); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [newRunId, legacyRunId], + readNew: cancelReadNew, + readLegacyReplica: legacySpy, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: async () => false, + }, + }); + + // Every member hydrated → every member reaches cancel (none dropped). + expect(runs.map((r) => r.id).sort()).toEqual([newRunId, legacyRunId].sort()); + expect(legacySpy.mock.calls[0][1]).toEqual([legacyRunId]); + } + ); + + heteroPostgresTest( + "REPLAY across both DBs hydrates every member as a FULL row", + async ({ prisma14, prisma17 }) => { + const newRunId = newId("c"); + const legacyRunId = legacyId("d"); + + const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "replay-new"); + const legacyCtx = await seedEnv(prisma14 as unknown as PrismaClient, "replay-legacy"); + await seedRun(prisma17 as unknown as PrismaClient, newCtx, newRunId); + await seedRun(prisma14 as unknown as PrismaClient, legacyCtx, legacyRunId); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [newRunId, legacyRunId], + readNew: replayReadNew, + readLegacyReplica: replayReadLegacy, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: async () => false, + }, + }); + + expect(runs.map((r) => r.id).sort()).toEqual([newRunId, legacyRunId].sort()); + // Full row, not a select projection: a non-selected column is populated. + const newRow = runs.find((r) => r.id === newRunId)!; + const legacyRow = runs.find((r) => r.id === legacyRunId)!; + expect(newRow.runtimeEnvironmentId).toBe(newCtx.environment.id); + expect(legacyRow.runtimeEnvironmentId).toBe(legacyCtx.environment.id); + } + ); + + heteroPostgresTest( + "known-migrated member is served from new and the legacy replica is never queried for it", + async ({ prisma14, prisma17 }) => { + // A legacy-classified id that lives on new (read hits it). Even with isKnownMigrated=true, + // because new HITS, legacy is never probed. + const migratedRunId = legacyId("e"); + const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "migrated-new"); + await seedRun(prisma17 as unknown as PrismaClient, newCtx, migratedRunId); + + const throwingLegacy = vi.fn(() => { + throw new Error("legacy replica must never be queried for a known-migrated member"); + }); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [migratedRunId], + readNew: cancelReadNew, + readLegacyReplica: throwingLegacy as never, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: async () => true, + }, + }); + + expect(runs.map((r) => r.id)).toEqual([migratedRunId]); + expect(throwingLegacy).not.toHaveBeenCalled(); + } + ); + + heteroPostgresTest( + "single-DB passthrough hydrates all members from one client; legacy + filter never invoked", + async ({ prisma14, prisma17 }) => { + // In single-DB mode the service passes its _replica as newClient. Seed everything there. + const idA = newId("f"); + const idB = legacyId("g"); + const ctx = await seedEnv(prisma17 as unknown as PrismaClient, "passthrough"); + await seedRun(prisma17 as unknown as PrismaClient, ctx, idA); + await seedRun(prisma17 as unknown as PrismaClient, ctx, idB); + + const throwingLegacy = vi.fn(() => { + throw new Error("legacy replica must never run in single-DB mode"); + }); + const throwingFilter = vi.fn(async () => { + throw new Error("isKnownMigrated must never run in single-DB mode"); + }); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [idA, idB], + readNew: cancelReadNew, + readLegacyReplica: throwingLegacy as never, + deps: { + splitEnabled: false, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + isKnownMigrated: throwingFilter, + }, + }); + + expect(runs.map((r) => r.id).sort()).toEqual([idA, idB].sort()); + expect(throwingLegacy).not.toHaveBeenCalled(); + expect(throwingFilter).not.toHaveBeenCalled(); + } + ); +}); diff --git a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts new file mode 100644 index 00000000000..3da312a23c6 --- /dev/null +++ b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts @@ -0,0 +1,245 @@ +// Real PG14 (legacy) + PG17 (new) proof for the dev-session-cancel TaskRun read. +// The DB is never mocked: reads hit the two real containers. Only pure boundaries +// (splitEnabled, isKnownMigrated) and recording client wrappers are injected. +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { CancelDevSessionRunsService } from "~/v3/services/cancelDevSessionRuns.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid body (length-disjoint from the 27-char KSUID) → LEGACY residency. +function generateLegacyCuid() { + const suffix = Array.from( + { length: 24 }, + () => "0123456789abcdefghijklmnopqrstuvwxyz"[Math.floor(Math.random() * 36)] + ).join(""); + return `c${suffix}`; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + ids: { id: string; friendlyId: string }, + env: { runtimeEnvironmentId: string; projectId: string; organizationId: string } +) { + return prisma.taskRun.create({ + data: { + id: ids.id, + friendlyId: ids.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: env.runtimeEnvironmentId, + projectId: env.projectId, + organizationId: env.organizationId, + environmentType: "DEVELOPMENT", + // V1 so the (best-effort, error-swallowed) cancel does not require the V2 engine; + // the unit under test is the READ resolution, not the cancel side effect. + engine: "V1", + status: "EXECUTING", + }, + }); +} + +// A read client whose taskRun.findFirst is recorded; throws if used after being marked +// forbidden, so we can prove a store was NEVER read. +function recording(client: PrismaClient, opts: { forbidden?: boolean } = {}) { + const calls: unknown[] = []; + const taskRun = { + findFirst: (args: unknown) => { + calls.push(args); + if (opts.forbidden) { + throw new Error("this store must never be read"); + } + return (client as unknown as PrismaReplicaClient).taskRun.findFirst(args as never); + }, + }; + return { handle: { ...client, taskRun } as unknown as PrismaReplicaClient, calls }; +} + +describe("CancelDevSessionRunsService store routing (hetero)", () => { + heteroPostgresTest( + "Test B: a NEW run (ksuid) resolves on the new store via read-through, by friendlyId and by id", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + expect(id.length).toBe(27); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv(prisma17, "new"); + await seedRun( + prisma17, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + // by friendlyId + { + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + const service = new CancelDevSessionRunsService({ + prisma: prisma17, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => false, + }, + }); + await service.call({ + runIds: [friendlyId], + cancelledAt: new Date(), + reason: "test", + }); + // ksuid → NEW: new store served the read, legacy never touched. + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + + // by internal id + { + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + const service = new CancelDevSessionRunsService({ + prisma: prisma17, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => false, + }, + }); + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + }); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + } + ); + + heteroPostgresTest( + "Test C: an OLD in-retention run (cuid) resolves off the LEGACY replica, never a legacy primary", + async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + expect(id.length).toBe(25); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "legacy" + ); + await seedRun( + prisma14, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + const newClient = recording(prisma17); + const legacy = recording(prisma14); + const service = new CancelDevSessionRunsService({ + prisma: prisma14, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => false, + }, + }); + + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + }); + + // NEW first (miss) → resolved off the LEGACY REPLICA handle (no primary handle exists). + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(1); + } + ); +}); + +describe("CancelDevSessionRunsService passthrough (single-DB)", () => { + postgresTest( + "Test A: with no read-through deps, the run is read from the single DB and session reads stay on it", + async ({ prisma }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv(prisma, "pt"); + await seedRun( + prisma, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + const session = await prisma.runtimeEnvironmentSession.create({ + data: { environmentId: runtimeEnvironment.id, ipAddress: "127.0.0.1" }, + }); + + // splitEnabled=false → single plain read against the one client; the session + // control-plane read runs on the same prisma. + const service = new CancelDevSessionRunsService({ + prisma, + readThroughDeps: { splitEnabled: false, newClient: prisma as unknown as PrismaReplicaClient }, + }); + + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + cancelledSessionId: session.id, + }); + + // Run found + handed to cancel against the single DB; confirm the row is present. + const row = await prisma.taskRun.findFirst({ where: { id } }); + expect(row).not.toBeNull(); + expect(row?.friendlyId).toBe(friendlyId); + } + ); +}); diff --git a/apps/webapp/test/engine/streamBatchItems.test.ts b/apps/webapp/test/engine/streamBatchItems.test.ts index 48b0c3ccca1..038820ecbb8 100644 --- a/apps/webapp/test/engine/streamBatchItems.test.ts +++ b/apps/webapp/test/engine/streamBatchItems.test.ts @@ -15,6 +15,7 @@ vi.mock("~/services/platform.v3.server", async (importOriginal) => { }); import { RunEngine } from "@internal/run-engine"; +import { PostgresRunStore } from "@internal/run-store"; import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; // Per-test redis isolation: each test runs its own RunEngine whose background work outlives the test // body. NoClickhouse because this suite never touches ClickHouse - skips the worker-scoped boot+migrate. @@ -41,9 +42,6 @@ import { setTimeout as sleep } from "node:timers/promises"; vi.setConfig({ testTimeout: 120_000 }); describe("StreamBatchItemsService", () => { - /** - * Helper to create a batch directly in the database - */ async function createBatch( prisma: PrismaClient, environmentId: string, @@ -74,9 +72,6 @@ describe("StreamBatchItemsService", () => { return batch; } - /** - * Helper to create an async iterable from items - */ async function* itemsToAsyncIterable( items: Array<{ task: string; payload: string; index: number }> ) { @@ -85,9 +80,6 @@ describe("StreamBatchItemsService", () => { } } - /** - * Build N valid batch items. - */ function makeItems(count: number, taskId = "test-task") { return Array.from({ length: count }, (_, index) => ({ task: taskId, @@ -660,6 +652,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -784,6 +784,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -908,6 +916,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1033,6 +1049,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1245,6 +1269,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1376,6 +1408,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1529,12 +1569,11 @@ describe("StreamBatchItemsService", () => { processingConcurrency: 10, }); - // Force the count-mismatch branch by leaving Redis at 0 items vs - // runCount=4. The pre-loop must see "initial" state (so it passes - // through to the loop), and the count-mismatch re-query must see - // "post-callback" state. Use a findFirst counter to flip the DB - // between those two reads, exactly matching the production timing - // where the callback fires while our loop is running. + // The pre-loop validate-find must see "initial" state (so it passes through to the + // loop), and the count-mismatch re-query must see "post-callback" state. Use a findFirst + // counter to flip the DB between those two reads, matching production timing where the + // callback fires while our loop runs. Both reads route through the engine's run-store + // (route-by-batch-id under the split), so the racing client backs the store. let findFirstCallCount = 0; const racingPrisma = { ...prisma, @@ -1543,10 +1582,6 @@ describe("StreamBatchItemsService", () => { findFirst: async (args: Parameters[0]) => { findFirstCallCount++; if (findFirstCallCount === 2) { - // The post-loop count-mismatch re-query: BatchQueue completed - // all items and the callback fired in the window before this - // read. Status stays PENDING (all runs created OK) but - // processingCompletedAt is now set. await prisma.batchTaskRun.update({ where: { id: batch.id }, data: { @@ -1562,6 +1597,11 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1925,9 +1965,6 @@ describe("StreamBatchItemsService", () => { }); describe("createNdjsonParserStream", () => { - /** - * Helper to collect all items from a ReadableStream - */ async function collectStream(stream: ReadableStream): Promise { const results: T[] = []; for await (const item of streamToAsyncIterable(stream)) { @@ -1936,9 +1973,6 @@ describe("createNdjsonParserStream", () => { return results; } - /** - * Helper to create a ReadableStream from an array of Uint8Array chunks - */ function chunksToStream(chunks: Uint8Array[]): ReadableStream { let index = 0; return new ReadableStream({ diff --git a/apps/webapp/test/engine/triggerFailedTask.test.ts b/apps/webapp/test/engine/triggerFailedTask.test.ts new file mode 100644 index 00000000000..0e282e1931e --- /dev/null +++ b/apps/webapp/test/engine/triggerFailedTask.test.ts @@ -0,0 +1,309 @@ +import { describe, expect } from "vitest"; + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { RunId, classifyKind, generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { TriggerFailedTaskService } from "../../app/runEngine/services/triggerFailedTask.server"; + +vi.setConfig?.({ testTimeout: 60_000 }); + +function makeEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +describe("TriggerFailedTaskService — failed run residency", () => { + containerTest( + "root failed run mints cuid when split is off (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async () => false, + }); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "root" }, + errorMessage: "boom", + }); + + expect(friendlyId).toBeTruthy(); + expect(classifyKind(friendlyId!)).toBe("cuid"); + + // The failed run write must land (persistence) with no parent linkage. + const persisted = await prisma.taskRun.findFirst({ where: { friendlyId: friendlyId! } }); + expect(persisted).not.toBeNull(); + expect(persisted!.status).toBe("SYSTEM_FAILURE"); + expect(persisted!.depth).toBe(0); + expect(persisted!.parentTaskRunId).toBeNull(); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a NEW (ksuid) parent mints ksuid (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async () => false, + }); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("ksuid"); + + // The failed run write must land (persistence) and link to the resolved parent. + const persisted = await prisma.taskRun.findFirst({ where: { friendlyId: friendlyId! } }); + expect(persisted).not.toBeNull(); + expect(persisted!.status).toBe("SYSTEM_FAILURE"); + + const parent = await prisma.taskRun.findFirst({ where: { friendlyId: parentFriendlyId } }); + expect(persisted!.parentTaskRunId).toBe(parent!.id); + expect(persisted!.depth).toBe(parent!.depth + 1); + expect(persisted!.rootTaskRunId).toBe(parent!.rootTaskRunId ?? parent!.id); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a LEGACY (cuid) parent mints cuid (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.generate().friendlyId; // cuid → LEGACY + expect(classifyKind(parentFriendlyId)).toBe("cuid"); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async () => false, + }); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a migrated LEGACY parent mints ksuid (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.generate().friendlyId; + expect(classifyKind(parentFriendlyId)).toBe("cuid"); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async (id: string) => id === parentFriendlyId, + }); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a NEW parent mints ksuid (callWithoutTraceEvents)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async () => false, + }); + + const friendlyId = await service.callWithoutTraceEvents({ + environmentId: environment.id, + environmentType: environment.type, + projectId: environment.projectId, + organizationId: environment.organizationId, + taskId: taskIdentifier, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "callWithoutTraceEvents returns null (best-effort) when the derived parent row is absent", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const service = new TriggerFailedTaskService({ + prisma, + engine, + isKnownMigrated: async () => false, + }); + + // A well-formed ksuid parent friendlyId that was NEVER triggered → no row. + // Exercises the missing-parent fallback in callWithoutTraceEvents. + const absentParentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + + const friendlyId = await service.callWithoutTraceEvents({ + environmentId: environment.id, + environmentType: environment.type, + projectId: environment.projectId, + organizationId: environment.organizationId, + taskId: taskIdentifier, + payload: { test: "absent-parent" }, + errorMessage: "boom", + parentRunId: absentParentFriendlyId, + }); + + // Fallback derives parentTaskRunId from an id with no row; the parentTaskRunId FK rejects the create, so the method returns null instead of throwing. + expect(friendlyId).toBeNull(); + const orphan = await prisma.taskRun.findFirst({ + where: { parentTaskRunId: RunId.fromFriendlyId(absentParentFriendlyId) }, + }); + expect(orphan).toBeNull(); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index 235c67637b3..ffdb05fc192 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -19,6 +19,12 @@ import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/ import { assertNonNullable, containerTest } from "@internal/testcontainers"; import { trace } from "@opentelemetry/api"; import type { IOPacket } from "@trigger.dev/core/v3"; +import { + RunId, + classifyKind, + generateInternalId, + generateKsuidId, +} from "@trigger.dev/core/v3/isomorphic"; import type { TaskRun } from "@trigger.dev/database"; import { Redis } from "ioredis"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; @@ -42,7 +48,7 @@ import { RunEngineTriggerTaskService } from "../../app/runEngine/services/trigge import { promiseWithResolvers } from "@trigger.dev/core"; import { setTimeout } from "node:timers/promises"; -vi.setConfig({ testTimeout: 60_000 }); // 60 seconds timeout +vi.setConfig({ testTimeout: 60_000 }); class MockPayloadProcessor implements PayloadProcessor { async process(request: TriggerTaskRequest): Promise { @@ -211,7 +217,6 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); const queuesManager = new DefaultQueueManager(prisma, engine); @@ -489,7 +494,6 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); const queuesManager = new DefaultQueueManager(prisma, engine); @@ -605,13 +609,12 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, taskIdentifier]); const parentRun1 = await engine.trigger( { number: 1, - friendlyId: "run_p1", + friendlyId: "run_cmqxvncxq0000kaulzpafkicv", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -642,7 +645,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun2 = await engine.trigger( { number: 2, - friendlyId: "run_p2", + friendlyId: "run_cmqxvncxr0001kauldv9mqa9z", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -690,6 +693,7 @@ describe("RunEngineTriggerTaskService", () => { tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, // 1MB triggerRacepointSystem, + isKnownMigrated: async () => false, }); const idempotencyKey = "test-idempotency-key"; @@ -1116,7 +1120,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun1 = await engine.trigger( { number: 1, - friendlyId: "run_p1", + friendlyId: "run_cmqxvncxq0000kaulzpafkicv", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -1146,7 +1150,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun2 = await engine.trigger( { number: 2, - friendlyId: "run_p2", + friendlyId: "run_cmqxvncxr0001kauldv9mqa9z", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -1205,6 +1209,7 @@ describe("RunEngineTriggerTaskService", () => { tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, // 1MB triggerRacepointSystem, + isKnownMigrated: async () => false, }); const idempotencyKey = "test-preserve-friendly-id"; @@ -2276,3 +2281,299 @@ describe("DefaultQueueManager task metadata cache", () => { } ); }); + +describe("RunEngineTriggerTaskService — child run residency inheritance", () => { + // Helper: stand up an engine + service wired for a single (real) Postgres/Redis + // pair, with an injectable marker boundary so the migrated-parent case can be + // driven without the split-DB infrastructure. Returns the service plus the + // authenticated environment and a registered task identifier. + async function setupResidencyService( + prisma: any, + redisOptions: any, + opts?: { + isKnownMigrated?: (runId: string) => Promise; + isSplitEnabled?: () => Promise; + } + ) { + const engine = new RunEngine({ + prisma, + worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "residency-task"; + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + // Default: nothing is migrated. Individual tests override. + isKnownMigrated: opts?.isKnownMigrated ?? (async () => false), + // Default split OFF in CI — matches split-off semantics and keeps the + // existing tests deterministic. Tests that exercise the marker pass true. + isSplitEnabled: opts?.isSplitEnabled ?? (async () => false), + }); + + return { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService }; + } + + containerTest( + "root run mints by the env flag (cuid when split is off)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "root" } }, + }); + + expect(result?.run.friendlyId).toBeDefined(); + // Split disabled in CI ⇒ flag resolves "cuid". + expect(classifyKind(result!.run.friendlyId)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "child of a LEGACY (cuid) parent is minted cuid (born LEGACY)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + // Root parent — cuid in CI (split off). + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + }); + expect(classifyKind(parent!.run.friendlyId)).toBe("cuid"); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "child" }, options: { parentRunId: parent!.run.friendlyId } }, + }); + + expect(classifyKind(child!.run.friendlyId)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "child of a NEW (ksuid) parent is minted ksuid (born NEW)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + // Construct a NEW-resident parent directly by minting a ksuid friendlyId + // and creating its run row, so the child inherits NEW by id-shape alone + // (no marker needed). We trigger the parent with an explicit ksuid id via + // the runFriendlyId option so the row physically exists for the parent + // lookup the child path performs. + const parentFriendlyId = RunId.toFriendlyId( + // 27-char ksuid → classifies NEW + (await import("@trigger.dev/core/v3/isomorphic")).generateKsuidId() + ); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + options: { runFriendlyId: parentFriendlyId }, + }); + expect(parent!.run.friendlyId).toBe(parentFriendlyId); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "child" }, options: { parentRunId: parentFriendlyId } }, + }); + + expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "child of a migrated LEGACY (cuid) parent is minted ksuid (born NEW)", + async ({ prisma, redisOptions }) => { + // The parent's id is a cuid (LEGACY by shape) but the migrated marker says + // it now lives on NEW. The child MUST inherit NEW. We drive this with an + // injected isKnownMigrated boundary that reports the parent as migrated. + let migratedParentFriendlyId = ""; + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions, { + isKnownMigrated: async (id: string) => id === migratedParentFriendlyId, + isSplitEnabled: async () => true, + }); + + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + }); + migratedParentFriendlyId = parent!.run.friendlyId; + expect(classifyKind(migratedParentFriendlyId)).toBe("cuid"); // LEGACY by shape + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "child" }, + options: { parentRunId: migratedParentFriendlyId }, + }, + }); + + // Marker says parent is on NEW ⇒ child inherits NEW ⇒ ksuid. + expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "child trigger does not consult the migrated marker when split is off", + async ({ prisma, redisOptions }) => { + let consulted = false; + const spy = async (_id: string) => { + consulted = true; + return false; + }; + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions, { + isKnownMigrated: spy, + isSplitEnabled: async () => false, + }); + + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + }); + expect(classifyKind(parent!.run.friendlyId)).toBe("cuid"); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "child" }, options: { parentRunId: parent!.run.friendlyId } }, + }); + + // Split off ⇒ pure id-shape inheritance, byte-identical to today. + expect(classifyKind(child!.run.friendlyId)).toBe("cuid"); + // The marker resolver was NOT called when split is off. + expect(consulted).toBe(false); + + await engine.quit(); + } + ); + + containerTest( + "child trigger consults the migrated marker when split is on", + async ({ prisma, redisOptions }) => { + let consulted = false; + let migratedParentFriendlyId = ""; + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions, { + isKnownMigrated: async (id: string) => { + consulted = true; + return id === migratedParentFriendlyId; + }, + isSplitEnabled: async () => true, + }); + + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + }); + migratedParentFriendlyId = parent!.run.friendlyId; + expect(classifyKind(migratedParentFriendlyId)).toBe("cuid"); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { + payload: { test: "child" }, + options: { parentRunId: migratedParentFriendlyId }, + }, + }); + + // Split on ⇒ marker consulted; marker says migrated ⇒ ksuid. + expect(consulted).toBe(true); + expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "caller-supplied runFriendlyId wins verbatim and skips residency inheritance", + async ({ prisma, redisOptions }) => { + let consulted = false; + const spy = async (_id: string) => { + consulted = true; + return false; + }; + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions, { + isKnownMigrated: spy, + isSplitEnabled: async () => true, + }); + + // Explicit cuid id for the run, and a ksuid/NEW parent id. + const explicitFriendlyId = RunId.toFriendlyId(generateInternalId()); + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + expect(classifyKind(explicitFriendlyId)).toBe("cuid"); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "explicit" }, options: { parentRunId: parentFriendlyId } }, + options: { runFriendlyId: explicitFriendlyId }, + }); + + // Caller-supplied id wins verbatim — NOT re-minted to ksuid despite the NEW parent. + expect(result!.run.friendlyId).toBe(explicitFriendlyId); + // The supplied-id short-circuit skips the mint/marker entirely. + expect(consulted).toBe(false); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/idempotencyDedupResidency.test.ts b/apps/webapp/test/idempotencyDedupResidency.test.ts new file mode 100644 index 00000000000..585503ebf93 --- /dev/null +++ b/apps/webapp/test/idempotencyDedupResidency.test.ts @@ -0,0 +1,179 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub so the runStore singleton doesn't eagerly connect at import. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, runOpsNewPrisma: {}, runOpsLegacyPrisma: {} })); +// Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import type { TriggerTaskRequest } from "~/runEngine/types"; + +vi.setConfig({ testTimeout: 60_000 }); + +function makeConcern(client: PrismaClient) { + return new IdempotencyKeyConcern(client as never, {} as never, {} as never); +} + +function makeRequest(opts: { + environmentId: string; + organizationId: string; + projectId: string; + taskId: string; + idempotencyKey: string; +}): TriggerTaskRequest { + return { + taskId: opts.taskId, + environment: { + id: opts.environmentId, + organizationId: opts.organizationId, + projectId: opts.projectId, + organization: { featureFlags: {} }, + }, + options: {}, + body: { options: { idempotencyKey: opts.idempotencyKey } }, + } as unknown as TriggerTaskRequest; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("IdempotencyKeyConcern · residency-routed dedup (cross-DB)", () => { + heteroPostgresTest( + "a would-be-new run resolves its key against the new (PG17) DB, not the legacy (PG14) DB", + async ({ prisma14, prisma17 }) => { + // Same env shape on both DBs. + const legacy = await seedOrgProjectEnv(prisma14, "resid-legacy"); + const next = await seedOrgProjectEnv(prisma17, "resid-new"); + + const key = "idem-resid-1"; + + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: next.runtimeEnvironment.id, + projectId: next.project.id, + organizationId: next.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + }); + + const concernOnNew = makeConcern(prisma17); + const hit = await concernOnNew.handleTriggerRequest( + makeRequest({ + environmentId: next.runtimeEnvironment.id, + organizationId: next.organization.id, + projectId: next.project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + expect(hit.isCached).toBe(true); + if (hit.isCached === true) { + expect(hit.run.id).toBe(newRun.id); + } + + // The legacy DB holds no row for this key — a legacy-pinned probe would miss it. + const legacyMatches = await prisma14.taskRun.count({ + where: { + runtimeEnvironmentId: legacy.runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: key, + }, + }); + expect(legacyMatches).toBe(0); + } + ); + + heteroPostgresTest( + "a would-be-legacy run still resolves its key against the legacy (PG14) DB", + async ({ prisma14 }) => { + const legacy = await seedOrgProjectEnv(prisma14, "resid-legacy-only"); + const key = "idem-resid-legacy"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: legacy.runtimeEnvironment.id, + projectId: legacy.project.id, + organizationId: legacy.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + }); + + const concernOnLegacy = makeConcern(prisma14); + const hit = await concernOnLegacy.handleTriggerRequest( + makeRequest({ + environmentId: legacy.runtimeEnvironment.id, + organizationId: legacy.organization.id, + projectId: legacy.project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + expect(hit.isCached).toBe(true); + if (hit.isCached === true) { + expect(hit.run.id).toBe(legacyRun.id); + } + } + ); +}); diff --git a/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts new file mode 100644 index 00000000000..fd007c8821b --- /dev/null +++ b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts @@ -0,0 +1,301 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub `~/db.server` so the `runStore` singleton doesn't eagerly connect at +// import. The concern passes its constructor `prisma` arg as the explicit +// client/tx to every store call, so the singleton's bound handles are never +// exercised — the passed client runs the query. Mirrors the shipped +// `mollifierClaimResolution` test: env-wiring mock only; the DB under test is +// the real PG14 + PG17 hetero-fixture containers. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, runOpsNewPrisma: {}, runOpsLegacyPrisma: {} })); +// Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import type { TriggerTaskRequest } from "~/runEngine/types"; + +vi.setConfig({ testTimeout: 60_000 }); + +// The constructor `prisma` arg is the client the four store sites execute +// against. With the run-ops split off (mocked above) the dedup resolver is a +// pass-through that returns this same client, so constructing with the PG14 or +// PG17 fixture client decides which DB the residency-routed dedup probe reads. +function makeConcern(client: PrismaClient) { + return new IdempotencyKeyConcern( + client as never, + {} as never, // engine — unused on the reuse / clear paths + {} as never // traceEventConcern — unused on the reuse / clear paths + ); +} + +function makeRequest(opts: { + environmentId: string; + organizationId: string; + projectId: string; + taskId: string; + idempotencyKey: string; +}): TriggerTaskRequest { + return { + taskId: opts.taskId, + environment: { + id: opts.environmentId, + organizationId: opts.organizationId, + projectId: opts.projectId, + // Leave the org mollifier flag unset so the pre-gate claim path is + // skipped — this test exercises the PG existing-run lookup + clear, + // not the Redis claim. (resolveOrgMollifierFlag returns falsy for an + // org with no mollifier flag, so claimEligible is false.) + organization: { featureFlags: {} }, + }, + options: {}, + body: { options: { idempotencyKey: opts.idempotencyKey } }, + } as unknown as TriggerTaskRequest; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("IdempotencyKeyConcern · residency-routed dedup (cross-DB)", () => { + heteroPostgresTest( + "resolves a legacy-resident key against the legacy DB; a key whose run lives on the new DB is resolved against the new DB", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "legacy-auth" + ); + + // Seed the same org/project/env shape on the NEW (PG17) DB so we can + // place a row there for a *different* key — proving the legacy-pinned + // read does not see it. + const newSide = await seedOrgProjectEnv(prisma17, "new-side"); + + const reusedKey = "idem-reuse-1"; + + // The authoritative existing run lives on the LEGACY (PG14) DB. + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: reusedKey, + status: "EXECUTING", + }); + + // A row for a DIFFERENT key lives only on the NEW (PG17) DB. + const newOnlyKey = "idem-new-only"; + await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: newOnlyKey, + status: "EXECUTING", + }); + + const concern = makeConcern(prisma14); + + // (1) Reuse with the legacy key resolves the legacy-seeded run. + const reuse = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: reusedKey, + }), + undefined + ); + expect(reuse.isCached).toBe(true); + if (reuse.isCached === true) { + expect(reuse.run.id).toBe(legacyRun.id); + } + + // Exactly one run matches the key on the legacy DB — no duplicate. + const legacyMatches = await prisma14.taskRun.count({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: reusedKey, + }, + }); + expect(legacyMatches).toBe(1); + + // (2) A key whose run lives on the new DB is resolved against the new DB. + const concernOnNew = makeConcern(prisma17); + const newSideHit = await concernOnNew.handleTriggerRequest( + makeRequest({ + environmentId: newSide.runtimeEnvironment.id, + organizationId: newSide.organization.id, + projectId: newSide.project.id, + taskId: "my-task", + idempotencyKey: newOnlyKey, + }), + undefined + ); + expect(newSideHit.isCached).toBe(true); + if (newSideHit.isCached === true) { + expect(newSideHit.run.idempotencyKey).toBe(newOnlyKey); + } + + // (3) An unknown key on the legacy env does not wrongly return the + // stale legacy hit for a different key. + const unknown = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: "idem-never-seen", + }), + undefined + ); + expect(unknown.isCached).toBe(false); + } + ); + + heteroPostgresTest( + "cleared-status reuse clears the key on the legacy (PG14) DB and proceeds with a fresh trigger", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "cleared-status" + ); + + const key = "idem-cleared-1"; + + // Existing run is in a failed (cleared) status — the concern must + // clear its key against the legacy authority and return isCached:false. + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "COMPLETED_WITH_ERRORS", + }); + + const concern = makeConcern(prisma14); + + const result = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + + // A fresh trigger proceeds (not cached). + expect(result.isCached).toBe(false); + + // The clear executed against the legacy (PG14) DB: re-query PG14 and + // assert the key + its expiry are now null on the seeded run. + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + expect(cleared?.idempotencyKeyExpiresAt).toBeNull(); + } + ); + + heteroPostgresTest( + "expired idempotency key is cleared on the legacy (PG14) DB and a fresh trigger proceeds", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "expired-key" + ); + + const key = "idem-expired-1"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() - 60_000), // already expired + }); + + const concern = makeConcern(prisma14); + + const result = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + + expect(result.isCached).toBe(false); + + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + expect(cleared?.idempotencyKeyExpiresAt).toBeNull(); + } + ); +}); diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts new file mode 100644 index 00000000000..a34b7958279 --- /dev/null +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -0,0 +1,349 @@ +// Real heterogeneous legacy + new Postgres proof for the alert-hydration TaskRun read. +// The DB is never mocked. A test-only RunStore wraps two real PostgresRunStore +// instances and routes findRun by id residency (ksuid → NEW, cuid → LEGACY), +// mirroring the sibling routing suite. The ProjectAlertChannel read must stay control-plane. +// +// The alert env-type read (parentEnvironment?.type ?? type) is resolved via the app +// ControlPlaneResolver over a control-plane client DISTINCT from the run-ops store, proving the +// cross-provider inversion. The prior version co-located env + run and masked it. +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { ReadClient, RunStore } from "@internal/run-store"; +import type { Prisma, PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect } from "vitest"; +import { ControlPlaneCache } from "~/v3/runOpsMigration/controlPlaneCache.server"; +import { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; + +function buildControlPlaneResolver(controlPlane: PrismaClient) { + return new ControlPlaneResolver({ + controlPlanePrimary: controlPlane, + controlPlaneReplica: controlPlane, + cache: new ControlPlaneCache({ ttlMs: 60_000, maxEntries: 100 }), + // Split OFF: plain control-plane query every call, byte-identical to the inline join. + splitEnabled: () => false, + }); +} + +vi.setConfig({ testTimeout: 60_000 }); + +// Test-only routing store: resolve findRun by id length (27 → NEW, else LEGACY), +// dropping any forwarded client so each inner store uses its OWN prisma. NOT a mock — +// real DB I/O against two PostgresRunStore instances. +class RoutingRunStore implements RunStore { + readonly #newStore: PostgresRunStore; + readonly #legacyStore: PostgresRunStore; + + constructor(newStore: PostgresRunStore, legacyStore: PostgresRunStore) { + this.#newStore = newStore; + this.#legacyStore = legacyStore; + } + + #resolveById(runId: string): PostgresRunStore { + return runId.length === 27 ? this.#newStore : this.#legacyStore; + } + + #idFromWhere(where: Prisma.TaskRunWhereInput): string | undefined { + const id = (where as { id?: unknown }).id; + return typeof id === "string" ? id : undefined; + } + + async findRun( + where: Prisma.TaskRunWhereInput, + argsOrClient?: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } | ReadClient, + _client?: ReadClient + ): Promise { + const id = this.#idFromWhere(where); + if (id !== undefined) { + return (this.#resolveById(id).findRun as any)(where, argsOrClient); + } + const fromNew = await (this.#newStore.findRun as any)(where, argsOrClient); + return fromNew ?? (this.#legacyStore.findRun as any)(where, argsOrClient); + } + + // The remaining RunStore methods are not exercised here; delegate to NEW to satisfy + // the interface. + findRunOrThrow(...a: any[]): any { + return (this.#newStore.findRunOrThrow as any)(...a); + } + findRuns(...a: any[]): any { + return (this.#newStore.findRuns as any)(...a); + } + createRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createRun(p, tx); + } + createCancelledRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createCancelledRun(p, tx); + } + createFailedRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createFailedRun(p, tx); + } + updateMetadata(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).updateMetadata as any)(...[runId, ...a]); + } + startAttempt(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).startAttempt as any)(runId, ...a); + } + completeAttemptSuccess(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).completeAttemptSuccess as any)(runId, ...a); + } + recordRetryOutcome(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).recordRetryOutcome as any)(runId, ...a); + } + requeueRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).requeueRun as any)(runId, ...a); + } + recordBulkActionMembership(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).recordBulkActionMembership as any)(runId, ...a); + } + cancelRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).cancelRun as any)(runId, ...a); + } + failRunPermanently(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).failRunPermanently as any)(runId, ...a); + } + expireRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).expireRun as any)(runId, ...a); + } + expireRunsBatch(runIds: string[], ...a: any[]): any { + return (this.#resolveById(runIds[0] ?? "").expireRunsBatch as any)(runIds, ...a); + } + lockRunToWorker(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).lockRunToWorker as any)(runId, ...a); + } + parkPendingVersion(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).parkPendingVersion as any)(runId, ...a); + } + promotePendingVersionRuns(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).promotePendingVersionRuns as any)(runId, ...a); + } + suspendForCheckpoint(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).suspendForCheckpoint as any)(runId, ...a); + } + resumeFromCheckpoint(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).resumeFromCheckpoint as any)(runId, ...a); + } + rescheduleRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).rescheduleRun as any)(runId, ...a); + } + enqueueDelayedRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).enqueueDelayedRun as any)(runId, ...a); + } + rewriteDebouncedRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).rewriteDebouncedRun as any)(runId, ...a); + } + clearIdempotencyKey(params: any, tx?: any): any { + const runId = params?.byId?.runId ?? ""; + return this.#resolveById(runId).clearIdempotencyKey(params, tx); + } + pushTags(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).pushTags as any)(runId, ...a); + } + pushRealtimeStream(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).pushRealtimeStream as any)(runId, ...a); + } +} + +function buildRoutingStore(prisma17: PrismaClient, prisma14: PrismaClient) { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + return new RoutingRunStore(newStore, legacyStore); +} + +async function seedProject(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + ids: { id: string; friendlyId: string }, + env: { runtimeEnvironmentId: string; projectId: string; organizationId: string } +) { + return prisma.taskRun.create({ + data: { + id: ids.id, + friendlyId: ids.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: env.runtimeEnvironmentId, + projectId: env.projectId, + organizationId: env.organizationId, + environmentType: "PRODUCTION", + engine: "V2", + status: "COMPLETED_SUCCESSFULLY", + }, + }); +} + +const TASK_RUN_CROSS_SEAM_FKS = [ + "TaskRun_runtimeEnvironmentId_fkey", + "TaskRun_projectId_fkey", + "TaskRun_organizationId_fkey", +] as const; + +async function dropTaskRunCrossSeamFks(prisma: PrismaClient) { + for (const constraint of TASK_RUN_CROSS_SEAM_FKS) { + await prisma.$executeRawUnsafe(`ALTER TABLE "TaskRun" DROP CONSTRAINT IF EXISTS "${constraint}"`); + } +} + +describe("PerformTaskRunAlertsService store routing (hetero)", () => { + heteroPostgresTest( + "Test B: env type resolves via the control-plane resolver (distinct DB) while the run resolves on the run-ops store", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + // Cloud shape: run-ops = the new DB (cross-seam FKs dropped), control-plane = the legacy DB. + // The control-plane ProjectAlert -> run-ops TaskRun FK is also dropped on the control-plane DB. + await dropTaskRunCrossSeamFks(prisma17); + await prisma14.$executeRawUnsafe( + `ALTER TABLE "ProjectAlert" DROP CONSTRAINT IF EXISTS "ProjectAlert_taskRunId_fkey"` + ); + + // Org/project/env + a PARENT env + the alert channel are control-plane → the control-plane DB. + const { project, organization, runtimeEnvironment } = await seedProject(prisma14, "cp"); + // A branch env whose parent type drives the channel filter (parentEnvironmentType ?? type). + const parentEnv = await prisma14.runtimeEnvironment.create({ + data: { + slug: "cp-parent", + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: "cp-parent", + pkApiKey: "cp-parent", + shortcode: "cp-parent", + }, + }); + const childEnv = await prisma14.runtimeEnvironment.create({ + data: { + slug: "cp-child", + type: "PREVIEW", + projectId: project.id, + organizationId: organization.id, + apiKey: "cp-child", + pkApiKey: "cp-child", + shortcode: "cp-child", + parentEnvironmentId: parentEnv.id, + }, + }); + + // The run-ops scalar row lives on the run-ops DB, referencing the child (preview) env on the control-plane DB. + await seedRun( + prisma17, + { id, friendlyId }, + { + runtimeEnvironmentId: childEnv.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + // A channel scoped to the PARENT env's type (PRODUCTION). It matches only if the service + // computes parentEnvironmentType ?? type — i.e. the parent's PRODUCTION, not the run env's + // PREVIEW. This proves the resolver's parentEnvironmentType is honoured. + await prisma14.projectAlertChannel.create({ + data: { + friendlyId: `alert_${id}`, + name: "test-channel", + projectId: project.id, + alertTypes: ["TASK_RUN"], + environmentTypes: ["PRODUCTION"], + type: "EMAIL", + properties: { type: "EMAIL", email: "test@example.com" }, + enabled: true, + }, + }); + + // prisma (control-plane channel read) = the control-plane DB; the run-ops read is routed to + // the run-ops DB; the env type is resolved via the resolver over the control-plane client. + const service = new PerformTaskRunAlertsService({ + prisma: prisma14, + runStore: buildRoutingStore(prisma17, prisma14), + controlPlaneResolver: buildControlPlaneResolver(prisma14), + }); + + // The downstream DeliverAlertService.enqueue hits redis (absent here); the projectAlert row + // is created before that, so tolerate the enqueue rejection. + await service.call(id).catch(() => {}); + + // The channel matched on the PARENT env type → a DeliverAlert row was created on the control-plane DB. + const delivered = await prisma14.projectAlert.findMany({ where: { projectId: project.id } }); + expect(delivered.length).toBe(1); + + // Inversion: the run-ops DB holds NO env rows; a co-located join would resolve null. + expect(await prisma17.runtimeEnvironment.count()).toBe(0); + // The run-ops store has the run; the control-plane DB never received it. + expect(await prisma14.taskRun.findFirst({ where: { id } })).toBeNull(); + } + ); +}); + +describe("PerformTaskRunAlertsService passthrough (single-DB)", () => { + postgresTest( + "Test A: with the default store, run read + alert-channel read both resolve on the single DB", + async ({ prisma }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedProject(prisma, "pt"); + await seedRun( + prisma, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + await prisma.projectAlertChannel.create({ + data: { + friendlyId: `alert_${id}`, + name: "test-channel", + projectId: project.id, + alertTypes: ["TASK_RUN"], + environmentTypes: ["PRODUCTION"], + type: "EMAIL", + properties: { type: "EMAIL", email: "test@example.com" }, + enabled: true, + }, + }); + + const service = new PerformTaskRunAlertsService({ + prisma, + controlPlaneResolver: buildControlPlaneResolver(prisma), + }); + await service.call(id).catch(() => {}); + + const delivered = await prisma.projectAlert.findMany({ where: { projectId: project.id } }); + expect(delivered.length).toBe(1); + } + ); +}); diff --git a/apps/webapp/test/realtime/runReaderReadThrough.test.ts b/apps/webapp/test/realtime/runReaderReadThrough.test.ts new file mode 100644 index 00000000000..7f53b4c7d56 --- /dev/null +++ b/apps/webapp/test/realtime/runReaderReadThrough.test.ts @@ -0,0 +1,493 @@ +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { ReadClient, RunStore } from "@internal/run-store"; +import { ownerEngine, type Residency } from "@trigger.dev/core/v3/isomorphic"; +import type { Prisma, PrismaClient } from "@trigger.dev/database"; +import { describe, expect, vi } from "vitest"; +import { RunHydrator } from "~/services/realtime/runReader.server"; + +// Realtime read-route proof for the RunHydrator. +// +// On origin/main the realtime RunHydrator's two run-ops reads already flow through the runStore +// seam: `hydrateByIds` -> `runStore.findRuns(..., replica)` and `#fetch` -> `runStore.findRun(..., +// replica)`. The split-aware routing (new-DB-first, legacy READ REPLICA only for ids not +// known-migrated) is the store's job below the seam, so this file proves the hydrator *inherits* +// that routing — plus that the single-flight + short-TTL cache and the skipColumns projection +// (which live in the hydrator, not the store) are unaffected by the seam. +// +// The heterogeneous fixture gives real legacy + new Postgres containers; NO DB is mocked. The ONLY +// non-DB fake is the residency selector that the routing-shaped store uses (`ownerEngine`: ksuid -> +// NEW, cuid -> LEGACY), exactly the substrate the RoutingRunStore ships. Run ids are 25 chars (cuid +// -> LEGACY) or 27 chars (ksuid -> NEW) so the classifier routes them deterministically. + +// 25-char internal id -> cuid -> LEGACY; 27-char internal id -> ksuid -> NEW. The +// classifier strips a leading `_`, so these ids must carry NO underscore (a bare +// alphanumeric body of the exact length). +function newId(label: string): string { + return ("k" + label.replace(/[^a-z0-9]/gi, "")).padEnd(27, "0").slice(0, 27); +} +function legacyId(label: string): string { + return ("c" + label.replace(/[^a-z0-9]/gi, "")).padEnd(25, "0").slice(0, 25); +} + +async function seedEnvironment(prisma: PrismaClient, slugSuffix: string) { + const organization = await prisma.organization.create({ + data: { title: `Org ${slugSuffix}`, slug: `org-${slugSuffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `Project ${slugSuffix}`, + slug: `project-${slugSuffix}`, + externalRef: `proj_${slugSuffix}`, + organizationId: organization.id, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slugSuffix}`, + pkApiKey: `pk_dev_${slugSuffix}`, + shortcode: `short_${slugSuffix}`, + }, + }); + return { organization, project, environment }; +} + +async function seedRun( + prisma: PrismaClient, + params: { + runId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; + payload?: string; + output?: string | null; + metadata?: string | null; + runTags?: string[]; + error?: Prisma.InputJsonValue; + } +) { + await prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: "PENDING", + friendlyId: `run_friendly_${params.runId.slice(0, 8)}`, + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: "my-task", + payload: params.payload ?? '{"hello":"world"}', + payloadType: "application/json", + ...(params.output !== undefined && { output: params.output }), + outputType: "application/json", + ...(params.metadata !== undefined && { metadata: params.metadata }), + ...(params.error !== undefined && { error: params.error }), + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + runTags: params.runTags ?? ["alpha", "beta"], + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); +} + +/** + * A routing-shaped RunStore: routes the single-run `findRun` by residency (the exact substrate + * the RoutingRunStore ships) and fans `findRuns` out across NEW + LEGACY, merging by id + * (the union/dedup the routing store owns; this hydrator inherits it). For not-known-migrated ids + * the read falls back to the LEGACY slot — which is wired over a READ REPLICA handle, never a + * writer. Only `findRun`/`findRuns` (the two reads this unit exercises) are implemented; the rest + * throw so any accidental call surfaces. The only non-DB fake here is the residency selector. + * + * By design the router ignores the explicit read `client` and reads off the selected slot's OWN + * configured replica, so the hydrator's `replica` arg is dropped here. + */ +function makeRoutingShapedStore(options: { + newStore: PostgresRunStore; + legacyStore: PostgresRunStore; + classify?: (id: string) => Residency; +}): RunStore { + const classify = options.classify ?? ownerEngine; + const route = (id: string | undefined): PostgresRunStore => { + if (typeof id !== "string") return options.legacyStore; + try { + return classify(id) === "NEW" ? options.newStore : options.legacyStore; + } catch { + // Not known-migrated / unclassifiable -> fall back to the LEGACY read replica only. + return options.legacyStore; + } + }; + + const idFromWhere = (where: Prisma.TaskRunWhereInput): string | undefined => { + const id = where.id; + if (typeof id === "string") return id; + if (id && typeof id === "object" && "equals" in id && typeof id.equals === "string") { + return id.equals; + } + return undefined; + }; + + const handler: ProxyHandler = { + get(_target, prop) { + if (prop === "findRun") { + // Drop the explicit `client`: the selected slot reads off its OWN replica. + return (where: Prisma.TaskRunWhereInput, args: unknown, _client?: ReadClient) => + (route(idFromWhere(where)).findRun as (...rest: unknown[]) => Promise)( + where, + args + ); + } + if (prop === "findRuns") { + return async ( + args: { where: Prisma.TaskRunWhereInput; select: Prisma.TaskRunSelect }, + _client?: ReadClient + ) => { + // Fan out across both slots (each on its OWN replica) and merge by id (the routing + // store's union/dedup contract). + const [fromNew, fromLegacy] = await Promise.all([ + options.newStore.findRuns(args as never), + options.legacyStore.findRuns(args as never), + ]); + const byId = new Map>(); + for (const row of [...fromLegacy, ...fromNew] as Record[]) { + byId.set(row.id as string, row); + } + return [...byId.values()]; + }; + } + throw new Error(`routing-shaped store: ${String(prop)} not implemented in test`); + }, + }; + + return new Proxy({} as RunStore, handler); +} + +describe("RunHydrator read-route through the runStore seam (legacy + new)", () => { + // Realtime hydrate pulls run-ops rows from the run-ops replica. A split hydrate returns the + // union of NEW + LEGACY-replica rows, byte-identical to source, via both + // getRunById and hydrateByIds. + heteroPostgresTest( + "split hydrate returns the NEW + legacy-replica union, byte-identical", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "u14"); + const seed17 = await seedEnvironment(prisma17, "u17"); + // Both seed envs use the SAME runtimeEnvironmentId so the env-scoped `where` matches across + // the two physical DBs (each env row is local to its DB but carries the same id). + const envId = seed17.environment.id; + await prisma14.runtimeEnvironment.update({ + where: { id: seed14.environment.id }, + data: { id: envId }, + }); + + const newRunId = newId("union_new"); + const legacyRunId = legacyId("union_old"); + + await seedRun(prisma17, { + runId: newRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + payload: '{"side":"new"}', + output: '{"result":42}', + metadata: '{"m":1}', + runTags: ["new", "z"], + error: { type: "BUILT_IN_ERROR", name: "Boom", message: "new-side" }, + }); + await seedRun(prisma14, { + runId: legacyRunId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"side":"legacy"}', + output: null, + metadata: null, + runTags: ["legacy", "a"], + error: { type: "STRING_ERROR", raw: "legacy-side" }, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const rows = await hydrator.hydrateByIds(envId, [newRunId, legacyRunId]); + expect(rows.map((r) => r.id).sort()).toEqual([legacyRunId, newRunId].sort()); + + const newRow = rows.find((r) => r.id === newRunId)!; + const legacyRow = rows.find((r) => r.id === legacyRunId)!; + + // Byte-identical to source incl. JSON columns, runTags, error JSON. + expect(newRow.payload).toBe('{"side":"new"}'); + expect(newRow.output).toBe('{"result":42}'); + expect(newRow.metadata).toBe('{"m":1}'); + expect(newRow.runTags).toEqual(["new", "z"]); + expect(newRow.error).toEqual({ type: "BUILT_IN_ERROR", name: "Boom", message: "new-side" }); + + expect(legacyRow.payload).toBe('{"side":"legacy"}'); + expect(legacyRow.output).toBeNull(); + expect(legacyRow.metadata).toBeNull(); + expect(legacyRow.runTags).toEqual(["legacy", "a"]); + expect(legacyRow.error).toEqual({ type: "STRING_ERROR", raw: "legacy-side" }); + + // getRunById resolves each individual run from its correct source through the seam. + const newById = await hydrator.getRunById(envId, newRunId); + const legacyById = await hydrator.getRunById(envId, legacyRunId); + expect(newById?.payload).toBe('{"side":"new"}'); + expect(legacyById?.payload).toBe('{"side":"legacy"}'); + } + ); + + // A known-migrated (NEW-residency) run is NOT re-probed on the legacy replica. + heteroPostgresTest( + "known-migrated run is never probed on the legacy slot", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "k17"); + const envId = seed17.environment.id; + const migratedRunId = newId("known_mig"); + await seedRun(prisma17, { + runId: migratedRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const row = await hydrator.getRunById(envId, migratedRunId); + expect(row?.id).toBe(migratedRunId); + // The NEW-residency id resolved against the NEW slot only — the legacy probe never ran. + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); + + // An old in-retention run is served from the LEGACY read replica (never a writer/primary path). + heteroPostgresTest( + "old in-retention run served from the legacy replica slot", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + // The LEGACY slot exposes only a read/replica handle: `prisma14` is wired as BOTH prisma and + // readOnlyPrisma, and the hydrator passes it as the explicit read client — there is no + // legacy-writer read path on the read route (the replica-only invariant is structural in the + // store; asserted here as inheritance). + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "o14"); + const envId = seed14.environment.id; + const oldRunId = legacyId("old_run"); + await seedRun(prisma14, { + runId: oldRunId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"era":"old"}', + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const byId = await hydrator.getRunById(envId, oldRunId); + expect(byId?.payload).toBe('{"era":"old"}'); + + const [hydrated] = await hydrator.hydrateByIds(envId, [oldRunId]); + expect(hydrated.payload).toBe('{"era":"old"}'); + } + ); + + // A live-migrated run continues streaming across the seam crossing with no gap. + heteroPostgresTest( + "live-migrated run continues streaming across the seam crossing", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "m14"); + const seed17 = await seedEnvironment(prisma17, "m17"); + const envId = seed17.environment.id; + await prisma14.runtimeEnvironment.update({ + where: { id: seed14.environment.id }, + data: { id: envId }, + }); + + // The run starts life on LEGACY; the residency selector classifies it NEW once it migrates. + // We model the migration by seeding the same run id on LEGACY first, then on NEW, while + // flipping the classifier from LEGACY to NEW for that id at the seam crossing. + const runId = legacyId("migrating"); + await seedRun(prisma14, { + runId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"home":"legacy"}', + }); + + let migrated = false; + const classify = (id: string): Residency => + id === runId && migrated ? "NEW" : ownerEngine(id); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + // Use a 0ms TTL so each getRunById re-reads through the seam (no cached stale row across the + // crossing). Single-flight/TTL are proven separately below. + const runStore = makeRoutingShapedStore({ newStore, legacyStore, classify }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 0 }); + + // Before migration: served from LEGACY. + const before = await hydrator.getRunById(envId, runId); + expect(before?.payload).toBe('{"home":"legacy"}'); + expect(legacyFindRunSpy).toHaveBeenCalled(); + + // Migrate: the run now lives on NEW and the classifier routes it NEW. + await seedRun(prisma17, { + runId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + payload: '{"home":"new"}', + }); + migrated = true; + legacyFindRunSpy.mockClear(); + + // After migration: served from NEW, with no gap and no legacy re-probe. + const after = await hydrator.getRunById(envId, runId); + expect(after?.payload).toBe('{"home":"new"}'); + expect(after?.id).toBe(runId); + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); +}); + +describe("RunHydrator single-flight + TTL cache intact across the seam", () => { + // The cache/single-flight live in the hydrator, independent of the storage seam. Proven in + // SPLIT mode here (a counting wrapper over the selected underlying store's read). + heteroPostgresTest( + "split mode: two concurrent getRunById -> one underlying read; repeat within TTL is cached", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const newFindRunSpy = vi.spyOn(newStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "s17"); + const envId = seed17.environment.id; + const runId = newId("cached_run"); + await seedRun(prisma17, { + runId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 60_000 }); + + // Two concurrent calls -> single-flight collapses to ONE underlying read. + const [a, b] = await Promise.all([ + hydrator.getRunById(envId, runId), + hydrator.getRunById(envId, runId), + ]); + expect(a?.id).toBe(runId); + expect(b?.id).toBe(runId); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + + // A third call within the TTL returns the cached value with no new read. + const c = await hydrator.getRunById(envId, runId); + expect(c?.id).toBe(runId); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + } + ); + + // A cached `null` (missing run) is a valid not-found hit and is not re-read within the TTL. + heteroPostgresTest( + "split mode: a cached null (missing run) is not re-read within the TTL", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const newFindRunSpy = vi.spyOn(newStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "n17"); + const envId = seed17.environment.id; + const missingRunId = newId("missing_run"); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 60_000 }); + + const first = await hydrator.getRunById(envId, missingRunId); + expect(first).toBeNull(); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + + const second = await hydrator.getRunById(envId, missingRunId); + expect(second).toBeNull(); + // Still one read — the null was cached as a valid "not found" hit. + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + } + ); +}); + +describe("RunHydrator single-DB passthrough (one PostgresRunStore over one client)", () => { + // Passthrough: in single-DB the store is one PostgresRunStore over one client; the hydrator + // behaves byte-for-byte as today. No split branch, no legacy slot, no second connection. + postgresTest( + "single store: getRunById + hydrateByIds read from the one client, cache intact", + { timeout: 60_000 }, + async ({ prisma }) => { + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const findRunSpy = vi.spyOn(store, "findRun"); + + const seed = await seedEnvironment(prisma, "sd1"); + const envId = seed.environment.id; + const runIdA = newId("single_a"); + const runIdB = legacyId("single_b"); + for (const runId of [runIdA, runIdB]) { + await seedRun(prisma, { + runId, + organizationId: seed.organization.id, + projectId: seed.project.id, + runtimeEnvironmentId: envId, + payload: `{"id":"${runId}"}`, + }); + } + + const hydrator = new RunHydrator({ replica: prisma, runStore: store, cacheTtlMs: 60_000 }); + + // hydrateByIds returns both rows from the single client. + const rows = await hydrator.hydrateByIds(envId, [runIdA, runIdB]); + expect(rows.map((r) => r.id).sort()).toEqual([runIdA, runIdB].sort()); + + // getRunById hydrates from the single store; the cache short-circuits a repeat read. + const a1 = await hydrator.getRunById(envId, runIdA); + const a2 = await hydrator.getRunById(envId, runIdA); + expect(a1?.payload).toBe(`{"id":"${runIdA}"}`); + expect(a2?.payload).toBe(`{"id":"${runIdA}"}`); + expect(findRunSpy).toHaveBeenCalledTimes(1); + } + ); + + // Empty id-set short-circuits with no store call. + postgresTest("empty id-set returns [] without touching the store", async ({ prisma }) => { + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const findRunsSpy = vi.spyOn(store, "findRuns"); + const hydrator = new RunHydrator({ replica: prisma, runStore: store }); + + const rows = await hydrator.hydrateByIds("env_none", []); + expect(rows).toEqual([]); + expect(findRunsSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/webapp/test/realtime/streamRegistrationRouting.test.ts b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts new file mode 100644 index 00000000000..e086b6ce53d --- /dev/null +++ b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts @@ -0,0 +1,240 @@ +import { heteroPostgresTest, redisTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { PrismaClient } from "@trigger.dev/database"; +import Redis from "ioredis"; +import { describe, expect } from "vitest"; +import { RedisRealtimeStreams } from "~/services/realtime/redisRealtimeStreams.server.js"; + +// Seeds organization -> project -> runtimeEnvironment -> taskRun on the given prisma client. +// Mirrors the route's target run: a V2 run with an (optionally completed) lifecycle and an +// initially-empty realtimeStreams array. +async function seedRun( + prisma: PrismaClient, + params: { + runId: string; + slugSuffix: string; + completedAt?: Date; + } +) { + const organization = await prisma.organization.create({ + data: { + title: "Test Organization", + slug: `test-organization-${params.slugSuffix}`, + }, + }); + + const project = await prisma.project.create({ + data: { + name: "Test Project", + slug: `test-project-${params.slugSuffix}`, + externalRef: `proj_${params.slugSuffix}`, + organizationId: organization.id, + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_apikey_${params.slugSuffix}`, + pkApiKey: `pk_dev_apikey_${params.slugSuffix}`, + shortcode: `short_code_${params.slugSuffix}`, + }, + }); + + await prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: "PENDING", + friendlyId: `run_friendly_${params.slugSuffix}`, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + ...(params.completedAt !== undefined && { completedAt: params.completedAt }), + }, + }); + + return { organization, project, environment }; +} + +// The exact routed sequence performed by realtime.v1.streams.$runId.$target.$streamId(.append) PUT: +// read the target via the store, then push the streamId iff it is not already present and the run +// is not completed. Driving this against the store is the routed seam (no engine instance required). +async function routedRegisterStream( + store: PostgresRunStore, + client: PrismaClient, + runId: string, + streamId: string +): Promise<{ pushed: boolean }> { + const target = await store.findRun( + { id: runId }, + { + select: { + id: true, + realtimeStreams: true, + realtimeStreamsVersion: true, + completedAt: true, + }, + }, + client + ); + + if (!target) { + throw new Error("Run not found"); + } + + // Completed-run guard (route returns 400 here). + if (target.completedAt) { + return { pushed: false }; + } + + if (!target.realtimeStreams.includes(streamId)) { + await store.pushRealtimeStream(target.id, streamId, client); + return { pushed: true }; + } + + return { pushed: false }; +} + +describe("realtime stream registration — run-ops store routed writes", () => { + heteroPostgresTest( + "push routes to run-ops store for a run on the new DB", + { timeout: 60_000 }, + async ({ prisma17, prisma14 }) => { + // The run-ops store owns the PG17 (new) DB. + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_new_db"; + await seedRun(prisma17, { runId, slugSuffix: "push17" }); + + const streamId = "stream-abc"; + const result = await routedRegisterStream(store, prisma17, runId, streamId); + + expect(result.pushed).toBe(true); + + // Write landed on the new (PG17) DB. + const onNewDb = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(onNewDb?.realtimeStreams).toContain(streamId); + + // Write is isolated to the new DB — the legacy (PG14) DB carries no run with that streamId. + const onLegacyDb = await prisma14.taskRun.findFirst({ + where: { realtimeStreams: { has: streamId } }, + select: { id: true }, + }); + expect(onLegacyDb).toBeNull(); + } + ); + + heteroPostgresTest( + "idempotent — already-registered streamId issues no second write", + { timeout: 60_000 }, + async ({ prisma17 }) => { + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_idempotent"; + await seedRun(prisma17, { runId, slugSuffix: "idem17" }); + + const streamId = "stream-once"; + + const first = await routedRegisterStream(store, prisma17, runId, streamId); + expect(first.pushed).toBe(true); + + const second = await routedRegisterStream(store, prisma17, runId, streamId); + // The includes() guard skipped the second push. + expect(second.pushed).toBe(false); + + const row = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + // Exactly one entry — no duplicate appended. + expect(row?.realtimeStreams).toEqual([streamId]); + expect(row?.realtimeStreams).toHaveLength(1); + } + ); + + heteroPostgresTest( + "completed run guard issues no push", + { timeout: 60_000 }, + async ({ prisma17 }) => { + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_completed"; + await seedRun(prisma17, { + runId, + slugSuffix: "completed17", + completedAt: new Date("2026-06-01T00:00:00.000Z"), + }); + + const streamId = "stream-late"; + const result = await routedRegisterStream(store, prisma17, runId, streamId); + + // The completedAt guard blocks the push (route returns 400). + expect(result.pushed).toBe(false); + + const row = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(row?.realtimeStreams).toEqual([]); + } + ); + + redisTest( + "chunks flow — stream attaches and chunks are ingested", + { timeout: 30_000 }, + async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + const streams = new RedisRealtimeStreams({ redis: redisOptions }); + + const runId = "run_chunks_flow"; + const streamId = "registered-stream"; + + const chunks = [ + JSON.stringify({ chunk: 0, data: "chunk 0" }), + JSON.stringify({ chunk: 1, data: "chunk 1" }), + JSON.stringify({ chunk: 2, data: "chunk 2" }), + ]; + + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk + "\n")); + } + controller.close(); + }, + }); + + const response = await streams.ingestData(stream, runId, streamId, "default"); + expect(response.status).toBe(200); + + const streamKey = `stream:${runId}:${streamId}`; + const entries = await redis.xrange(streamKey, "-", "+"); + expect(entries.length).toBe(3); + + const lastChunkIndex = await streams.getLastChunkIndex(runId, streamId, "default"); + expect(lastChunkIndex).toBe(2); + + await redis.del(streamKey); + await redis.quit(); + } + ); +}); diff --git a/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts new file mode 100644 index 00000000000..36e4c3f9e34 --- /dev/null +++ b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts @@ -0,0 +1,220 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub these so the default singletons don't eagerly connect at import. The +// reset service passes its `_prisma` arg as the explicit tx to every store +// call, so the singleton handles are never exercised — the passed PG14 client +// runs the query. The DB under test is the real PG14 + PG17 hetero fixture. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); +vi.mock("~/v3/runEngine.server", () => ({ engine: {} })); + +// With `getMollifierBuffer()` returning null the PG clear path runs cleanly +// (no Redis surface). The buffer path is out of scope for this unit. +const bufferMock: { current: unknown } = { current: null }; +vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ + getMollifierBuffer: () => bufferMock.current, +})); + +import { PostgresRunStore } from "@internal/run-store"; +import { ResetIdempotencyKeyService } from "~/v3/services/resetIdempotencyKey.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +function makeService(legacyPrisma: PrismaClient) { + const legacyStore = new PostgresRunStore({ + prisma: legacyPrisma, + readOnlyPrisma: legacyPrisma, + }); + return new ResetIdempotencyKeyService(legacyPrisma as never, legacyPrisma as never, legacyStore); +} + +function makeEnv(opts: { id: string; organizationId: string }): AuthenticatedEnvironment { + return { id: opts.id, organizationId: opts.organizationId } as unknown as AuthenticatedEnvironment; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("ResetIdempotencyKeyService · legacy-authority pin (cross-DB)", () => { + heteroPostgresTest( + "clears the key on the legacy (PG14) authority only; a PG17-only same-key row is untouched, and reuse-after-reset finds no row", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "reset-legacy" + ); + const newSide = await seedOrgProjectEnv(prisma17, "reset-new-side"); + + const key = "idem-reset-1"; + const taskIdentifier = "my-task"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() + 60_000), + }); + + // Same (task, key) tuple planted ONLY on PG17 — a legacy-pinned reset + // must not leak to it. + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() + 60_000), + }); + + const service = makeService(prisma14); + + const result = await service.call( + key, + taskIdentifier, + makeEnv({ id: runtimeEnvironment.id, organizationId: organization.id }) + ); + + expect(result).toEqual({ id: key }); + + // Cleared on legacy; run otherwise intact (not deleted). + const clearedLegacy = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(clearedLegacy).not.toBeNull(); + expect(clearedLegacy?.idempotencyKey).toBeNull(); + expect(clearedLegacy?.idempotencyKeyExpiresAt).toBeNull(); + expect(clearedLegacy?.id).toBe(legacyRun.id); + expect(clearedLegacy?.status).toBe("EXECUTING"); + + // PG17-only row untouched — no leak to the wrong DB. + const untouchedNew = await prisma17.taskRun.findFirst({ where: { id: newRun.id } }); + expect(untouchedNew?.idempotencyKey).toBe(key); + expect(untouchedNew?.idempotencyKeyExpiresAt).not.toBeNull(); + + // Reuse-after-reset: no row resolves on legacy → a fresh run would mint. + const reusable = await prisma14.taskRun.findFirst({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier, + idempotencyKey: key, + }, + }); + expect(reusable).toBeNull(); + } + ); + + heteroPostgresTest( + "handoff re-check (totalCount === 0 branch) clears a row that materialises on the legacy (PG14) authority after the initial clear", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "reset-handoff" + ); + + const key = "idem-handoff-1"; + const taskIdentifier = "my-task"; + + // Model the PG↔buffer race: initial clear sees no row (count 0), buffer + // reports no cleared run (totalCount 0), then the run materialises on + // legacy mid-call (drainer's engine.trigger) before the handoff re-check. + bufferMock.current = { + resetIdempotency: vi.fn(async () => { + await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + }); + return { clearedRunId: null as string | null }; + }), + }; + + const service = makeService(prisma14); + + const result = await service.call( + key, + taskIdentifier, + makeEnv({ id: runtimeEnvironment.id, organizationId: organization.id }) + ); + + // Handoff re-check cleared the materialised row on legacy → success. + expect(result).toEqual({ id: key }); + + const reusable = await prisma14.taskRun.findFirst({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier, + idempotencyKey: key, + }, + }); + expect(reusable).toBeNull(); + + bufferMock.current = null; + } + ); +}); diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts new file mode 100644 index 00000000000..d92497953c4 --- /dev/null +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -0,0 +1,216 @@ +import { heteroRunOpsPostgresTest, postgresTest } from "@internal/testcontainers"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { resolveWaitpointThroughReadThrough } from "~/runEngine/concerns/resolveWaitpointThroughReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid (length-disjoint from the 27-char KSUID) -> LEGACY residency. +function generateLegacyCuid() { + const suffix = Array.from( + { length: 24 }, + () => "0123456789abcdefghijklmnopqrstuvwxyz"[Math.floor(Math.random() * 36)] + ).join(""); + return `c${suffix}`; +} + +function recording( + client: PrismaClient | RunOpsPrismaClient, + opts: { forbidden?: boolean } = {} +) { + const calls: unknown[] = []; + const waitpoint = { + findFirst: (args: unknown) => { + calls.push(args); + if (opts.forbidden) { + throw new Error("this store must never be read"); + } + return (client as unknown as PrismaReplicaClient).waitpoint.findFirst(args as never); + }, + }; + return { handle: { ...client, waitpoint } as unknown as PrismaReplicaClient, calls }; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `apikey-${suffix}`, + pkApiKey: `pk-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, environment }; +} + +async function seedWaitpoint( + prisma: PrismaClient | RunOpsPrismaClient, + id: string, + env: { id: string; projectId: string } +) { + return prisma.waitpoint.create({ + data: { + id, + friendlyId: `waitpoint_${id}`, + type: "MANUAL", + status: "PENDING", + idempotencyKey: `idem-${id}`, + userProvidedIdempotencyKey: false, + projectId: env.projectId, + environmentId: env.id, + }, + }); +} + +const read = (waitpointId: string, environmentId: string) => (client: PrismaReplicaClient) => + client.waitpoint.findFirst({ + where: { id: waitpointId, environmentId }, + select: { id: true, status: true, projectId: true, environmentId: true }, + }); + +describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run-ops PG17)", () => { + heteroRunOpsPostgresTest( + "ksuid waitpoint resolves on the dedicated run-ops client; legacy replica never touched", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + expect(id.length).toBe(27); + + // [TEST-NEWSEED] The dedicated run-ops DB has no control-plane tables; the waitpoint's + // environment/project FKs are synthetic scalar ids. + const environmentId = generateKsuidId(); + const projectId = generateKsuidId(); + const seeded = await seedWaitpoint(prisma17, id, { id: environmentId, projectId }); + + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + deps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => false, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(result!.projectId).toBe(projectId); + expect(result!.environmentId).toBe(environmentId); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + ); + + heteroRunOpsPostgresTest( + "cuid waitpoint resolves off the LEGACY replica (new probed first, miss)", + async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + expect(id.length).toBe(25); + + const { project, environment } = await seedOrgProjectEnv(prisma14, "legacy"); + const seeded = await seedWaitpoint(prisma14, id, { + id: environment.id, + projectId: project.id, + }); + + const newClient = recording(prisma17); + const legacy = recording(prisma14); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => false, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(1); + } + ); + + heteroRunOpsPostgresTest( + "not-found maps to null (no throw)", + async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + const { environment } = await seedOrgProjectEnv(prisma14, "nf"); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: true, + newClient: recording(prisma17).handle, + legacyReplica: recording(prisma14).handle, + isKnownMigrated: async () => false, + }, + }); + + expect(result).toBeNull(); + } + ); + + postgresTest( + "passthrough (single-DB): one plain read; legacy + isKnownMigrated never invoked", + async ({ prisma }) => { + const id = generateKsuidId(); + const { project, environment } = await seedOrgProjectEnv(prisma, "pt"); + const seeded = await seedWaitpoint(prisma, id, { + id: environment.id, + projectId: project.id, + }); + + const single = recording(prisma); + const legacy = recording(prisma, { forbidden: true }); + let knownMigratedInvoked = false; + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + newClient: single.handle, + legacyReplica: legacy.handle, + isKnownMigrated: async () => { + knownMigratedInvoked = true; + return false; + }, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(single.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + expect(knownMigratedInvoked).toBe(false); + } + ); +}); diff --git a/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts b/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts new file mode 100644 index 00000000000..0e0f0841f20 --- /dev/null +++ b/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, vi } from "vitest"; + +// Redirect the module-level db client to the per-test container prisma so the worker-path +// env resolution (`findEnvironmentById`/`controlPlaneResolver`, which read `~/db.server`) +// hits the real container DB. The DB itself is never mocked — only the module binding is +// pointed at the container client created by the fixture. +const dbHolder = vi.hoisted(() => ({ prisma: undefined as any })); +vi.mock("~/db.server", () => ({ + get prisma() { + return dbHolder.prisma; + }, + get $replica() { + return dbHolder.prisma; + }, +})); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; +import { PostgresRunStore, RoutingRunStore } from "@internal/run-store"; +import { containerTestWithIsolatedRedisNoClickhouse as containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { BatchId } from "@trigger.dev/core/v3/isomorphic"; +import type { PrismaClient } from "@trigger.dev/database"; +import { RunEngineBatchTriggerService } from "../app/runEngine/services/batchTrigger.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +function buildEngine(prisma: PrismaClient, redisOptions: any, store?: RoutingRunStore) { + return new RunEngine({ + prisma, + ...(store ? { store } : {}), + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + disabled: true, + }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 }, + }, + baseCostInCents: 0.0005, + }, + batchQueue: { redis: redisOptions }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +function batchCreateData(params: { + id: string; + friendlyId: string; + runtimeEnvironmentId: string; + runCount: number; + payload: string; +}) { + return { + id: params.id, + friendlyId: params.friendlyId, + runtimeEnvironmentId: params.runtimeEnvironmentId, + runCount: params.runCount, + runIds: [] as string[], + payload: params.payload, + payloadType: "application/json", + options: {}, + batchVersion: "runengine:v1", + }; +} + +describe("RunEngineBatchTriggerService store routing", () => { + // The service issues BatchTaskRun create/find/update through `this._engine.runStore`. + // With an injected RoutingRunStore whose NEW slot is a PostgresRunStore, those calls + // land on the run-ops store (born on NEW), not on a separate `this._prisma` path. + containerTest( + "create/find/update route through the injected run-ops store", + async ({ prisma, redisOptions }) => { + dbHolder.prisma = prisma; + const runStore = new RoutingRunStore({ + new: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + legacy: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + }); + const engine = buildEngine(prisma, redisOptions, runStore); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new RunEngineBatchTriggerService("sequential", prisma, engine); + + // The service holds the injected routing store. + expect(service["_engine"].runStore).toBe(runStore); + + // (create) Born on the run-ops store and present in the DB. + const { id, friendlyId } = BatchId.generate(); + const created = await service["_engine"].runStore.createBatchTaskRun( + batchCreateData({ + id, + friendlyId, + runtimeEnvironmentId: authenticatedEnvironment.id, + runCount: 1, + payload: "[]", + }) + ); + expect(created.id).toBe(id); + expect(await prisma.batchTaskRun.findUnique({ where: { id } })).not.toBeNull(); + + // (find + update) Drive the worker entrypoint with an empty payload so no child runs + // are triggered: the path exercises findBatchTaskRunById -> findEnvironmentById -> + // inline-payload parse -> updateBatchTaskRun, all through the store. + await service.processBatchTaskRun({ + batchId: id, + processingId: "0", + range: { start: 0, count: 50 }, + attemptCount: 0, + strategy: "sequential", + }); + + // The update routed through the store ran (processingJobsCount incremented by the 0 + // processed items; runIds untouched). The row is the one written to the run-ops DB. + const after = await prisma.batchTaskRun.findUnique({ where: { id } }); + expect(after).not.toBeNull(); + expect(after!.processingJobsCount).toBe(0); + expect(after!.runIds).toEqual([]); + + await engine.quit(); + } + ); + + // Single-DB passthrough (self-host collapse): with no `store` injected, the engine + // defaults to a PostgresRunStore over the one client, byte-identical to pre-routing. + containerTest( + "single-DB passthrough uses the default PostgresRunStore", + async ({ prisma, redisOptions }) => { + dbHolder.prisma = prisma; + const engine = buildEngine(prisma, redisOptions); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new RunEngineBatchTriggerService("sequential", prisma, engine); + + // The default store is a plain PostgresRunStore (no RoutingRunStore, no second client). + expect(service["_engine"].runStore).toBeInstanceOf(PostgresRunStore); + expect(service["_engine"].runStore).not.toBeInstanceOf(RoutingRunStore); + + const { id, friendlyId } = BatchId.generate(); + await service["_engine"].runStore.createBatchTaskRun( + batchCreateData({ + id, + friendlyId, + runtimeEnvironmentId: authenticatedEnvironment.id, + runCount: 1, + payload: "[]", + }) + ); + + await service.processBatchTaskRun({ + batchId: id, + processingId: "0", + range: { start: 0, count: 50 }, + attemptCount: 0, + strategy: "sequential", + }); + + const after = await prisma.batchTaskRun.findUnique({ where: { id } }); + expect(after).not.toBeNull(); + expect(after!.processingJobsCount).toBe(0); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/runsRepository.readthrough.test.ts b/apps/webapp/test/runsRepository.readthrough.test.ts new file mode 100644 index 00000000000..706600db8fd --- /dev/null +++ b/apps/webapp/test/runsRepository.readthrough.test.ts @@ -0,0 +1,435 @@ +import { describe, expect, vi } from "vitest"; + +// The runsRepository module graph imports `~/v3/runStore.server`, which imports `~/db.server` +// at load. Stub it (the existing runsRepository.part*.test.ts do the same) — the repo under test +// is driven entirely through injected real containers, never the stubbed module singletons. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { PostgresRunStore } from "@internal/run-store"; +import { createPostgresContainer, replicationContainerTest } from "@internal/testcontainers"; +import { PrismaClient } from "@trigger.dev/database"; +import { setTimeout } from "node:timers/promises"; +import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; +import { setupClickhouseReplication } from "./utils/replicationUtils"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** + * Creates the org/project/env parents on a single prisma client. TaskRun FKs require + * these to exist on every DB a run is hydrated from, so we seed identical parents + * (same ids) on both the legacy (PG14) and new (PG17) databases. + */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** Mirrors the org/project/env parents onto a second DB with the SAME ids. */ +async function mirrorParents(prisma: PrismaClient, ctx: SeedContext, slug: string): Promise { + await prisma.organization.create({ + data: { id: ctx.organizationId, title: `org-${slug}`, slug: `org-${slug}` }, + }); + await prisma.project.create({ + data: { + id: ctx.projectId, + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: ctx.organizationId, + externalRef: `proj-${slug}`, + }, + }); + await prisma.runtimeEnvironment.create({ + data: { + id: ctx.environmentId, + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: ctx.projectId, + organizationId: ctx.organizationId, + apiKey: `tr_dev_${slug}_b`, + pkApiKey: `pk_dev_${slug}_b`, + shortcode: `sc-${slug}-b`, + }, + }); +} + +async function createRun( + prisma: PrismaClient, + ctx: SeedContext, + run: { + friendlyId: string; + taskIdentifier?: string; + status?: any; + runTags?: string[]; + createdAt?: Date; + } +) { + return prisma.taskRun.create({ + data: { + friendlyId: run.friendlyId, + taskIdentifier: run.taskIdentifier ?? "my-task", + status: run.status ?? "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runTags: run.runTags ?? [], + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + ...(run.createdAt ? { createdAt: run.createdAt } : {}), + }, + }); +} + +const neverCalled = + (label: string) => + async (id: string): Promise => { + throw new Error(`${label} must not be invoked (called with ${id})`); + }; + +describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", () => { + // --- DoD line + e2e #6: split fan-out across new + legacy-replica with known-migrated filter --- + replicationContainerTest( + "split mode hydrates the CH id-set as the union of NEW + legacy-replica rows, byte-identical and id-desc ordered", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { + // The fixture's PG14 container is the LEGACY read replica AND the replication source that + // feeds the ClickHouse id-set. The dedicated PG17 container is the NEW run-ops DB. + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const { url: newUrl } = await createPostgresContainer(network, { + imageTag: "docker.io/postgres:17", + }); + const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); + + try { + const ctx = await seedParents(prisma, "split1"); + await mirrorParents(prismaNew, ctx, "split1"); + + // Seed all four runs on PG14 (legacy + replication source -> CH gets the full id-set). + const legacyOnlyA = await createRun(prisma, ctx, { friendlyId: "run_legacyA" }); + const legacyOnlyB = await createRun(prisma, ctx, { friendlyId: "run_legacyB" }); + const migratedA = await createRun(prisma, ctx, { friendlyId: "run_newA" }); + const migratedB = await createRun(prisma, ctx, { friendlyId: "run_newB" }); + + // The two "migrated" runs ALSO live on the NEW DB (authoritative during retention). + // Same ids so set-membership and ordering line up with the CH id-set. + await createRun(prismaNew, { ...ctx }, { friendlyId: "run_newA" }); + await createRun(prismaNew, { ...ctx }, { friendlyId: "run_newB" }); + // Force the NEW rows to share the legacy ids exactly. + await prismaNew.taskRun.update({ + where: { friendlyId: "run_newA" }, + data: { id: migratedA.id }, + }); + await prismaNew.taskRun.update({ + where: { friendlyId: "run_newB" }, + data: { id: migratedB.id }, + }); + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ + prisma, // single-DB default handle (unused on the split path here) + clickhouse, + runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), + readThrough: { + splitEnabled: true, + newClient: prismaNew, + legacyReplica: prisma, + // legacy-only ids are NOT known-migrated -> the legacy replica IS probed for them. + isKnownMigrated: async () => false, + }, + }); + + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + // Union of all four, id-desc ordered. + const expectedIds = [migratedA.id, migratedB.id, legacyOnlyA.id, legacyOnlyB.id].sort( + (a, b) => (a < b ? 1 : a > b ? -1 : 0) + ); + expect(runs.map((r) => r.id)).toEqual(expectedIds); + + // Byte-identity for a NEW-served row (from PG17) and a legacy-served row (from PG14). + const newRow = runs.find((r) => r.id === migratedA.id)!; + expect(newRow.friendlyId).toBe("run_newA"); + expect(newRow.taskIdentifier).toBe("my-task"); + const legacyRow = runs.find((r) => r.id === legacyOnlyA.id)!; + expect(legacyRow.friendlyId).toBe("run_legacyA"); + + // Order parity with single-DB: a pure id-desc sort of the same ids. + expect(runs.map((r) => r.id)).toEqual( + [...runs.map((r) => r.id)].sort((a, b) => (a < b ? 1 : a > b ? -1 : 0)) + ); + } finally { + await prismaNew.$disconnect(); + } + } + ); + + // --- Known-migrated filter avoids re-probing legacy --- + replicationContainerTest( + "a known-migrated id missing from the new probe is NOT re-probed against the legacy replica", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const { url: newUrl } = await createPostgresContainer(network, { + imageTag: "docker.io/postgres:17", + }); + const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); + + try { + const ctx = await seedParents(prisma, "migfilter"); + await mirrorParents(prismaNew, ctx, "migfilter"); + + // Seed the run on the LEGACY/source DB (so CH has the id) but withhold it from NEW, + // simulating replication lag where the new probe misses a freshly-migrated row. + const migrated = await createRun(prisma, ctx, { friendlyId: "run_migrated" }); + + await setTimeout(1500); + + // legacyReplica hydrate must NEVER run for this id because isKnownMigrated is true. + const legacySpyPrisma = new Proxy(prisma, { + get(target, prop) { + if (prop === "taskRun") { + return new Proxy((target as any).taskRun, { + get(trTarget, trProp) { + if (trProp === "findMany") { + return async () => { + throw new Error("legacy replica hydrate must not be invoked for migrated id"); + }; + } + return (trTarget as any)[trProp]; + }, + }); + } + return (target as any)[prop]; + }, + }) as unknown as PrismaClient; + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), + readThrough: { + splitEnabled: true, + newClient: prismaNew, + legacyReplica: legacySpyPrisma, + isKnownMigrated: async (id) => id === migrated.id, + }, + }); + + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + // Not on NEW, known-migrated -> served from neither => not-found (filtered). + expect(runs).toHaveLength(0); + } finally { + await prismaNew.$disconnect(); + } + } + ); + + // --- Passthrough (single-DB): one plain store read, legacy + isKnownMigrated never touched --- + replicationContainerTest( + "single-DB passthrough hydrates from one store read and never touches the legacy/known-migrated boundaries", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "passthrough"); + const run = await createRun(prisma, ctx, { friendlyId: "run_passthrough" }); + + await setTimeout(1500); + + // No readThrough (splitEnabled defaults false). Inject throwing boundaries to prove the + // split branch is never entered. + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + readThrough: { + splitEnabled: false, + legacyReplica: prisma, + isKnownMigrated: neverCalled("isKnownMigrated"), + }, + }); + + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(runs).toHaveLength(1); + expect(runs[0].id).toBe(run.id); + + const friendlyIds = await runsRepository.listFriendlyRunIds({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + expect(friendlyIds).toEqual(["run_passthrough"]); + } + ); + + // --- Ordering: the hydrated page follows the ClickHouse keyset (created_at desc), NOT raw id --- + replicationContainerTest( + "listRuns orders by the ClickHouse created_at keyset, not by raw id", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "ordering"); + // Make chronological order the OPPOSITE of id order: the run created FIRST (smaller + // time-prefixed cuid id) is given the MOST-RECENT created_at. A correct list returns + // [mostRecent, oldest] (created_at desc); the old id-desc hydrate would invert it. + // created_at is set at insert time (not via update) so ClickHouse never holds a second + // ReplacingMergeTree version that could surface as a duplicate. + const now = Date.now(); + const mostRecent = await createRun(prisma, ctx, { + friendlyId: "run_orderA", + createdAt: new Date(now), + }); + const oldest = await createRun(prisma, ctx, { + friendlyId: "run_orderB", + createdAt: new Date(now - 3_600_000), + }); + expect(mostRecent.id < oldest.id).toBe(true); // raw id-desc would yield [oldest, mostRecent] + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ prisma, clickhouse }); + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(runs.map((r) => r.id)).toEqual([mostRecent.id, oldest.id]); + } + ); + + // --- listFriendlyRunIds parity: split union, id projected away to a plain string[] --- + replicationContainerTest( + "listFriendlyRunIds returns the union of friendly ids across new + legacy, projecting id away", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const { url: newUrl } = await createPostgresContainer(network, { + imageTag: "docker.io/postgres:17", + }); + const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); + + try { + const ctx = await seedParents(prisma, "friendly"); + await mirrorParents(prismaNew, ctx, "friendly"); + + const legacy = await createRun(prisma, ctx, { friendlyId: "run_fLegacy" }); + const migrated = await createRun(prisma, ctx, { friendlyId: "run_fNew" }); + await createRun(prismaNew, ctx, { friendlyId: "run_fNew" }); + await prismaNew.taskRun.update({ + where: { friendlyId: "run_fNew" }, + data: { id: migrated.id }, + }); + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), + readThrough: { + splitEnabled: true, + newClient: prismaNew, + legacyReplica: prisma, + isKnownMigrated: async () => false, + }, + }); + + const friendlyIds = await runsRepository.listFriendlyRunIds({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(friendlyIds.every((f) => typeof f === "string")).toBe(true); + expect([...friendlyIds].sort()).toEqual(["run_fLegacy", "run_fNew"]); + // id projected away: a friendlyId is never a run internal id. + expect(friendlyIds).not.toContain(legacy.id); + } finally { + await prismaNew.$disconnect(); + } + } + ); +}); diff --git a/apps/webapp/test/runsRepositoryCpres.test.ts b/apps/webapp/test/runsRepositoryCpres.test.ts new file mode 100644 index 00000000000..e43f9d96b93 --- /dev/null +++ b/apps/webapp/test/runsRepositoryCpres.test.ts @@ -0,0 +1,260 @@ +import { describe, expect, vi } from "vitest"; + +// The runsRepository module graph imports `~/v3/runStore.server`, which imports `~/db.server` +// at load. Stub it (the existing runsRepository.*.test.ts do the same) — the function under +// test is driven entirely through a RunStore built from the injected real containers, never +// the stubbed module singletons. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { heteroRunOpsPostgresTest } from "@internal/testcontainers"; +import { buildRunStore } from "~/v3/runStore.server"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import { PrismaClient } from "@trigger.dev/database"; +import { BulkActionId, RunId } from "@trigger.dev/core/v3/isomorphic"; +import { convertRunListInputOptionsToFilterRunsOptions } from "~/services/runsRepository/runsRepository.server"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** Seeds org/project/env parents on the control-plane client. */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** A batch on the NEW (dedicated run-ops) DB — the residency the single control-plane client silently missed. */ +async function seedNewBatch( + prisma: RunOpsPrismaClient, + friendlyId: string, + runtimeEnvironmentId: string +) { + return prisma.batchTaskRun.create({ data: { friendlyId, runtimeEnvironmentId } }); +} + +/** A batch on the LEGACY (control-plane) DB. */ +async function seedLegacyBatch( + prisma: PrismaClient, + friendlyId: string, + runtimeEnvironmentId: string +) { + return prisma.batchTaskRun.create({ data: { friendlyId, runtimeEnvironmentId } }); +} + +async function seedSchedule(prisma: PrismaClient, friendlyId: string, projectId: string) { + return prisma.taskSchedule.create({ + data: { friendlyId, projectId, taskIdentifier: "my-task", generatorExpression: "* * * * *" }, + }); +} + +describe("convertRunListInputOptionsToFilterRunsOptions cross-DB filter resolution (control-plane + run-ops)", () => { + // --- A NEW-resident batch must resolve via the store's NEW->LEGACY probe --- + // Previously the single control-plane client missed it, leaving the friendlyId in the + // ClickHouse `batch_id` filter -> zero runs. Schedule (control-plane) resolves off prisma14. + heteroRunOpsPostgresTest( + "split: a NEW-resident batch resolves via the run-ops store; schedule resolves on control-plane", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test1"); + + const batch = await seedNewBatch(prisma17, "batch_test1", ctx.environmentId); + const schedule = await seedSchedule(prisma14, "sched_test1", ctx.projectId); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test1", + scheduleId: "sched_test1", + }, + prisma14, // control-plane client (used for the schedule lookup) + store + ); + + expect(result.batchId).toBe(batch.id); + expect(result.scheduleId).toBe(schedule.id); + } + ); + + // --- A LEGACY-resident batch still resolves via the NEW->LEGACY fallback --- + heteroRunOpsPostgresTest( + "split: a LEGACY-resident batch resolves via the store's legacy fallback", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test2"); + + const batch = await seedLegacyBatch(prisma14, "batch_test2", ctx.environmentId); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test2", + }, + prisma14, + store + ); + + expect(result.batchId).toBe(batch.id); + } + ); + + // --- An unknown batch friendlyId is retained unchanged (no spurious match) --- + heteroRunOpsPostgresTest( + "split: an unknown batch friendlyId is retained (resolves on neither DB)", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test2b"); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_missing", + }, + prisma14, + store + ); + + expect(result.batchId).toBe("batch_missing"); + } + ); + + // --- Single-DB passthrough: a passthrough store resolves the batch off the one client --- + heteroRunOpsPostgresTest( + "single-DB passthrough: the batch + schedule resolve off the one client", + async ({ prisma14 }) => { + const ctx = await seedParents(prisma14, "test3"); + const batch = await seedLegacyBatch(prisma14, "batch_test3", ctx.environmentId); + const schedule = await seedSchedule(prisma14, "sched_test3", ctx.projectId); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test3", + scheduleId: "sched_test3", + }, + prisma14, + store + ); + + expect(result.batchId).toBe(batch.id); + expect(result.scheduleId).toBe(schedule.id); + } + ); + + // --- Pure-conversion non-regression (period, bulkId, runId, rootOnly) --- + heteroRunOpsPostgresTest( + "pure conversions unchanged: period, bulkId, runId, rootOnly in a single-DB call", + async ({ prisma14 }) => { + const ctx = await seedParents(prisma14, "test4"); + const batch = await seedLegacyBatch(prisma14, "batch_test4", ctx.environmentId); + + const bulkFriendly = BulkActionId.generate().friendlyId; // real "bulk_..." friendlyId + const internalRunId = RunId.generate().id; // internal id to be converted to a friendlyId + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + period: "1h", + bulkId: bulkFriendly, + runId: [internalRunId], + batchId: "batch_test4", + rootOnly: true, + }, + prisma14, + store + ); + + // period: "1h" -> 3600000 ms via parseDuration. + expect(result.period).toBe(3600000); + // bulkId: round-tripped through BulkActionId.toId. + expect(result.bulkId).toBe(BulkActionId.toId(bulkFriendly)); + // runId: each element mapped via RunId.toFriendlyId. + expect(result.runId).toEqual([RunId.toFriendlyId(internalRunId)]); + // batchId still resolved off the single client. + expect(result.batchId).toBe(batch.id); + // rootOnly forced false because batchId/runId are present (even though caller passed true). + expect(result.rootOnly).toBe(false); + } + ); +}); diff --git a/apps/webapp/test/sessions.readthrough.test.ts b/apps/webapp/test/sessions.readthrough.test.ts new file mode 100644 index 00000000000..02835d5976f --- /dev/null +++ b/apps/webapp/test/sessions.readthrough.test.ts @@ -0,0 +1,347 @@ +import { describe, expect, vi } from "vitest"; + +// The sessions.server module graph imports `~/db.server` (and the run-store +// singleton) at load. Stub `~/db.server` so importing the module under test does +// not construct the real boot clients — the serializer is driven entirely through +// an explicitly injected RunStore built from the real test containers. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { heteroRunOpsPostgresTest, postgresTest } from "@internal/testcontainers"; +import { buildRunStore } from "~/v3/runStore.server"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import { PrismaClient } from "@trigger.dev/database"; +import { + resolveSessionByIdOrExternalId, + serializeSessionsWithFriendlyRunIds, + serializeSessionWithFriendlyRunId, +} from "~/services/realtime/sessions.server"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** + * Creates the org/project/env parents on the control-plane client. `Session` + * and the legacy `TaskRun` both need these FK parents; the dedicated run-ops + * schema (`prisma17`) is FK-free, so NEW runs only need the scalar tenant ids. + */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** + * Create a legacy (control-plane) TaskRun. A default cuid id classifies LEGACY. + */ +async function createLegacyRun( + prisma: PrismaClient, + ctx: SeedContext, + run: { friendlyId: string } +) { + return prisma.taskRun.create({ + data: { + friendlyId: run.friendlyId, + taskIdentifier: "my-task", + status: "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +/** + * Create a NEW (dedicated run-ops) TaskRun with a ksuid id — classifies NEW and + * lives only on the run-ops DB. Scalar tenant columns only (the subset schema is + * FK-free, so no org/project/env rows are required here). + */ +async function createNewRun( + prisma: RunOpsPrismaClient, + ctx: SeedContext, + run: { friendlyId: string; id: string } +) { + return prisma.taskRun.create({ + data: { + id: run.id, + friendlyId: run.friendlyId, + taskIdentifier: "my-task", + status: "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +async function createSession( + prisma: PrismaClient, + ctx: SeedContext, + session: { friendlyId: string; externalId?: string; currentRunId?: string | null } +) { + return prisma.session.create({ + data: { + friendlyId: session.friendlyId, + externalId: session.externalId, + type: "chat", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + environmentType: "DEVELOPMENT", + organizationId: ctx.organizationId, + taskIdentifier: "my-task", + triggerConfig: {}, + currentRunId: session.currentRunId ?? null, + }, + }); +} + +describe("sessions serializer currentRunId resolution", () => { + // --- Passthrough single-run (single-DB) --- + postgresTest( + "single-run passthrough resolves currentRunId -> friendlyId; null stays null", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "single-pass"); + const run = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_single" }); + const session = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_single", + currentRunId: run.id, + }); + const nullSession = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_null", + currentRunId: null, + }); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma as PrismaClient, + singleReplica: prisma as PrismaClient, + }); + + const item = await serializeSessionWithFriendlyRunId(session, store); + expect(item.currentRunId).toBe("run_single"); + + const nullItem = await serializeSessionWithFriendlyRunId(nullSession, store); + expect(nullItem.currentRunId).toBeNull(); + } + ); + + // --- Passthrough batched (single-DB) + tenant scope --- + postgresTest( + "batched passthrough resolves each currentRunId; null stays null; cross-env is dropped", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "batch-pass"); + const otherCtx = await seedParents(prisma as PrismaClient, "batch-pass-other"); + + const runA = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_A" }); + const runB = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_B" }); + // A run in a DIFFERENT env — pointer must not resolve under our scope. + const crossEnvRun = await createLegacyRun(prisma as PrismaClient, otherCtx, { + friendlyId: "run_cross", + }); + + const sessionA = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_A", + currentRunId: runA.id, + }); + const sessionB = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_B", + currentRunId: runB.id, + }); + const sessionNull = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_n", + currentRunId: null, + }); + const sessionCross = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_x", + currentRunId: crossEnvRun.id, + }); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma as PrismaClient, + singleReplica: prisma as PrismaClient, + }); + + const items = await serializeSessionsWithFriendlyRunIds( + [sessionA, sessionB, sessionNull, sessionCross], + { projectId: ctx.projectId, runtimeEnvironmentId: ctx.environmentId }, + store + ); + + const byFriendly = new Map(items.map((i) => [i.id, i.currentRunId])); + expect(byFriendly.get("session_A")).toBe("run_A"); + expect(byFriendly.get("session_B")).toBe("run_B"); + expect(byFriendly.get("session_n")).toBeNull(); + // cross-env run exists, but the tenant-scoped find drops it -> null. + expect(byFriendly.get("session_x")).toBeNull(); + } + ); + + // --- Control-plane Session resolve is not routed --- + postgresTest( + "resolveSessionByIdOrExternalId resolves the Session row by friendlyId and by externalId", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "controlplane"); + const session = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_cp", + externalId: "ext-cp-1", + currentRunId: null, + }); + + const byFriendly = await resolveSessionByIdOrExternalId( + prisma as PrismaClient, + ctx.environmentId, + session.friendlyId + ); + expect(byFriendly?.id).toBe(session.id); + + const byExternal = await resolveSessionByIdOrExternalId( + prisma as PrismaClient, + ctx.environmentId, + "ext-cp-1" + ); + expect(byExternal?.id).toBe(session.id); + } + ); + + // --- Split single-run across two physical DBs (the production-shaped break) --- + // ksuid (NEW-DB) session run must serialize a non-null friendlyId, and a cuid + // (LEGACY) run must still resolve — proving the asymmetry is gone. + heteroRunOpsPostgresTest( + "split single-run resolves a NEW-ksuid run from the run-ops DB and a LEGACY-cuid run from control-plane", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "split-single"); + + const newRun = await createNewRun(prisma17, ctx, { + friendlyId: "run_new", + id: generateKsuidId(), + }); + const legacyRun = await createLegacyRun(prisma14, ctx, { friendlyId: "run_legacy" }); + + const newSession = await createSession(prisma14, ctx, { + friendlyId: "session_new", + currentRunId: newRun.id, + }); + const legacySession = await createSession(prisma14, ctx, { + friendlyId: "session_legacy", + currentRunId: legacyRun.id, + }); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const newItem = await serializeSessionWithFriendlyRunId(newSession, store); + expect(newItem.currentRunId).toBe("run_new"); + + const legacyItem = await serializeSessionWithFriendlyRunId(legacySession, store); + expect(legacyItem.currentRunId).toBe("run_legacy"); + } + ); + + // --- Split batched — NEW + legacy union; null + cross-env dropped --- + heteroRunOpsPostgresTest( + "split batched resolves runs across NEW + legacy; null stays null; cross-env dropped", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "split-batch"); + const otherCtx = await seedParents(prisma14, "split-batch-other"); + + const newRun = await createNewRun(prisma17, ctx, { + friendlyId: "run_bnew", + id: generateKsuidId(), + }); + const legacyRun = await createLegacyRun(prisma14, ctx, { friendlyId: "run_blegacy" }); + const crossEnvRun = await createLegacyRun(prisma14, otherCtx, { friendlyId: "run_bcross" }); + + const sessionNew = await createSession(prisma14, ctx, { + friendlyId: "session_bnew", + currentRunId: newRun.id, + }); + const sessionLegacy = await createSession(prisma14, ctx, { + friendlyId: "session_blegacy", + currentRunId: legacyRun.id, + }); + const sessionNull = await createSession(prisma14, ctx, { + friendlyId: "session_bnull", + currentRunId: null, + }); + const sessionCross = await createSession(prisma14, ctx, { + friendlyId: "session_bcross", + currentRunId: crossEnvRun.id, + }); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const items = await serializeSessionsWithFriendlyRunIds( + [sessionNew, sessionLegacy, sessionNull, sessionCross], + { projectId: ctx.projectId, runtimeEnvironmentId: ctx.environmentId }, + store + ); + + const byFriendly = new Map(items.map((i) => [i.id, i.currentRunId])); + expect(byFriendly.get("session_bnew")).toBe("run_bnew"); + expect(byFriendly.get("session_blegacy")).toBe("run_blegacy"); + expect(byFriendly.get("session_bnull")).toBeNull(); + expect(byFriendly.get("session_bcross")).toBeNull(); + } + ); +}); diff --git a/apps/webapp/test/streamLoader.controlPlane.test.ts b/apps/webapp/test/streamLoader.controlPlane.test.ts new file mode 100644 index 00000000000..fe9a0c42912 --- /dev/null +++ b/apps/webapp/test/streamLoader.controlPlane.test.ts @@ -0,0 +1,114 @@ +// Dedicated run-ops proof for the run-detail realtime stream loader after dropping its cross-DB +// control-plane include. The TaskRun scalar row lives on the dedicated run-ops client (PG17, subset +// schema, no control-plane tables); env lives on PG14. The DB is never mocked; the .count() proof +// shows the run does not exist on the control-plane side. +import { heteroRunOpsPostgresTest } from "@internal/testcontainers"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { describe, expect } from "vitest"; +import { ControlPlaneCache } from "~/v3/runOpsMigration/controlPlaneCache.server"; +import { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { resolveStreamBasin } from "~/services/realtime/v1StreamsGlobal.server"; + +vi.setConfig({ testTimeout: 60_000, hookTimeout: 60_000 }); + +let n = 0; +async function seedControlPlane(prisma: PrismaClient) { + const s = n++; + const organization = await prisma.organization.create({ + data: { title: `Org ${s}`, slug: `org-${s}`, streamBasinName: `basin-${s}` }, + }); + const project = await prisma.project.create({ + data: { name: `P ${s}`, slug: `p-${s}`, externalRef: `proj_${s}`, organizationId: organization.id }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "PRODUCTION", + slug: `prod-${s}`, + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_${s}`, + pkApiKey: `pk_${s}`, + shortcode: `sc_${s}`, + }, + }); + return { organization, project, environment }; +} + +// [TEST-NEWSEED] The run lives on the dedicated run-ops client; control-plane FKs are synthetic +// scalar ids pointing at PG14 rows (the dedicated DB has no control-plane tables). +async function seedRunOpsRun( + prisma: RunOpsPrismaClient, + ctx: { organizationId: string; projectId: string; environmentId: string } +) { + const s = n++; + return prisma.taskRun.create({ + data: { + friendlyId: `run_2abc${s}defghijklmnopqrst`, + taskIdentifier: "my-task", + status: "PENDING", + payload: "{}", + payloadType: "application/json", + traceId: `trace_${s}`, + spanId: `span_${s}`, + queue: "task/my-task", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "PRODUCTION", + engine: "V2", + realtimeStreamsVersion: "v1", + streamBasinName: null, + }, + }); +} + +describe("run-detail stream loader cross-DB read-through (dedicated run-ops client)", () => { + heteroRunOpsPostgresTest( + "run-ops scalars resolve from the dedicated run-ops DB; env (slug/org/basin) resolves from control-plane with no cross-join", + async ({ prisma14, prisma17 }) => { + const cp = await seedControlPlane(prisma14 as unknown as PrismaClient); + const run = await seedRunOpsRun(prisma17, { + organizationId: cp.organization.id, + projectId: cp.project.id, + environmentId: cp.environment.id, + }); + + const found = await prisma17.taskRun.findFirst({ + where: { friendlyId: run.friendlyId, projectId: cp.project.id }, + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + streamBasinName: true, + runtimeEnvironmentId: true, + projectId: true, + }, + }); + expect(found).not.toBeNull(); + expect(found!.friendlyId).toBe(run.friendlyId); + expect(found!.runtimeEnvironmentId).toBe(cp.environment.id); + + const resolver = new ControlPlaneResolver({ + controlPlanePrimary: prisma14 as unknown as PrismaClient, + controlPlaneReplica: prisma14 as unknown as PrismaClient, + cache: new ControlPlaneCache(), + splitEnabled: () => false, + }); + const environment = await resolver.resolveAuthenticatedEnv(found!.runtimeEnvironmentId); + expect(environment).not.toBeNull(); + expect(environment!.slug).toBe(cp.environment.slug); + expect(environment!.organization.id).toBe(cp.organization.id); + expect(environment!.organization.streamBasinName).toBe(cp.organization.streamBasinName); + + const basin = resolveStreamBasin({ + run: { streamBasinName: found!.streamBasinName }, + organization: { streamBasinName: environment!.organization.streamBasinName }, + }); + expect(basin).toBe(cp.organization.streamBasinName); + + // Inversion proof: no run on PG14 (control-plane). + expect(await (prisma14 as unknown as PrismaClient).taskRun.count()).toBe(0); + } + ); +}); From efb35600b280fe0aefbf0ed48ab18687b8c5da22 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Wed, 1 Jul 2026 18:27:37 +0100 Subject: [PATCH 02/15] fix(run-ops split): resolve parent run through an injectable run store in TriggerFailedTaskService TriggerFailedTaskService read the parent run via the ambient module-singleton store while the engine wrote the run through its own store, so a ksuid parent's row was not found and parentTaskRunId came back null. Add an optional injected runStore (defaults to the shared singleton, preserving production behaviour) and resolve the parent through it at both call sites, mirroring triggerTask.server.ts. Align the three affected webapp tests to read through the same store the engine wrote to: triggerFailedTask.test.ts passes engine.runStore; performTaskRunAlerts routing passes a passthrough store over the seeded container; triggerTask.test.ts stubs the run-ops db handles and pins split mode off so the idempotency dedup uses the container client. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../runEngine/services/triggerFailedTask.server.ts | 13 ++++++++++--- apps/webapp/test/engine/triggerFailedTask.test.ts | 12 ++++++++++++ apps/webapp/test/engine/triggerTask.test.ts | 9 ++++++++- .../test/performTaskRunAlertsStoreRouting.test.ts | 4 ++++ 4 files changed, 34 insertions(+), 4 deletions(-) diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 30dd587aeeb..47d2478603e 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -13,7 +13,8 @@ import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/ import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import { getEventRepository } from "~/v3/eventRepository/index.server"; -import { runStore } from "~/v3/runStore.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; +import type { RunStore } from "@internal/run-store"; import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; @@ -68,6 +69,10 @@ export class TriggerFailedTaskService { // Injected so the migrated-marker read stays off the hot path when split is off // (same guard as RunEngineTriggerTaskService); defaults to the live resolver. private readonly isSplitEnabled: () => Promise; + // Resolves the parent run for depth/root/parent linkage. Defaults to the shared + // singleton (in production the same store the engine writes through). Injected in + // tests so the read resolves on the same store the engine wrote to. + private readonly runStore: RunStore; constructor(opts: { prisma: PrismaClientOrTransaction; @@ -75,12 +80,14 @@ export class TriggerFailedTaskService { replicaPrisma?: PrismaClientOrTransaction; isKnownMigrated?: (runId: string) => Promise; isSplitEnabled?: () => Promise; + runStore?: RunStore; }) { this.prisma = opts.prisma; this.replicaPrisma = opts.replicaPrisma ?? opts.prisma; this.engine = opts.engine; this.isKnownMigrated = opts.isKnownMigrated ?? defaultIsKnownMigrated; this.isSplitEnabled = opts.isSplitEnabled ?? defaultIsSplitEnabled; + this.runStore = opts.runStore ?? defaultRunStore; } // Mint a failed run's friendlyId. The id-kind decides which store the run is @@ -138,7 +145,7 @@ export class TriggerFailedTaskService { // Resolve parent run for rootTaskRunId and depth (same as triggerTask.server.ts) const parentRun = request.parentRunId - ? await runStore.findRun( + ? await this.runStore.findRun( { id: RunId.fromFriendlyId(request.parentRunId), runtimeEnvironmentId: request.environment.id, @@ -344,7 +351,7 @@ export class TriggerFailedTaskService { let depth = 0; if (opts.parentRunId) { - const parentRun = await runStore.findRun( + const parentRun = await this.runStore.findRun( { id: RunId.fromFriendlyId(opts.parentRunId), runtimeEnvironmentId: opts.environmentId, diff --git a/apps/webapp/test/engine/triggerFailedTask.test.ts b/apps/webapp/test/engine/triggerFailedTask.test.ts index 0e282e1931e..73216114ed9 100644 --- a/apps/webapp/test/engine/triggerFailedTask.test.ts +++ b/apps/webapp/test/engine/triggerFailedTask.test.ts @@ -43,6 +43,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async () => false, }); @@ -97,6 +99,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async () => false, }); @@ -154,6 +158,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async () => false, }); @@ -201,6 +207,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async (id: string) => id === parentFriendlyId, }); @@ -247,6 +255,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async () => false, }); @@ -278,6 +288,8 @@ describe("TriggerFailedTaskService — failed run residency", () => { const service = new TriggerFailedTaskService({ prisma, engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, isKnownMigrated: async () => false, }); diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index ffdb05fc192..a4131935070 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -1,11 +1,18 @@ import { describe, expect, vi } from "vitest"; -// Mock the db prisma client +// Mock the db prisma client. The run-ops handles are stubbed so the idempotency +// dedup import resolves; with split off (below) they are never used — the concern's +// constructor prisma is passed through to every store call. vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, })); +// Keep split off so resolveIdempotencyDedupClient returns the passed container client. +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + vi.mock("~/services/platform.v3.server", async (importOriginal) => { const actual = (await importOriginal()) as Record; return { diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts index a34b7958279..03cab6a1d3a 100644 --- a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -338,6 +338,10 @@ describe("PerformTaskRunAlertsService passthrough (single-DB)", () => { const service = new PerformTaskRunAlertsService({ prisma, + // The single-DB default store: a passthrough PostgresRunStore over the one + // container. Injected explicitly so the read resolves on the container the run + // was seeded into, not the ambient module singleton. + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), controlPlaneResolver: buildControlPlaneResolver(prisma), }); await service.call(id).catch(() => {}); From 1bb77ad366fc13a302eea1b3226951ccbe651521 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 12:08:21 +0100 Subject: [PATCH 03/15] refactor(run-ops): drop known-migrated from write-path + read repos; id-shape only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Migration is deferred, so child/batch residency is a pure id-shape check. Remove the isKnownMigrated (and mint-only isSplitEnabled) deps from the mint sites (triggerTask, triggerFailedTask, batchTriggerV3) and call the now- synchronous resolveInheritedMintKind(parentFriendlyId) with no deps arg. Read paths: drop the isKnownMigrated re-probe-avoidance from the ClickHouse runs hydrate (probe all missing on legacy), the runsRepository readThrough options type, resolveWaitpointThroughReadThrough deps, and the BulkActionV2 batch seam adapter — keeping the genuine cross-seam fallback that reads NEW first for unclassifiable/legacy-candidate ids. Delete the injected-marker test cases; the remaining residency tests assert pure id-shape inheritance. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...solveWaitpointThroughReadThrough.server.ts | 3 - .../services/triggerFailedTask.server.ts | 17 +- .../services/triggerTask.server.test.ts | 16 +- .../runEngine/services/triggerTask.server.ts | 26 +--- .../clickhouseRunsRepository.server.ts | 10 +- .../runsRepository/runsRepository.server.ts | 1 - .../app/v3/services/batchTriggerV3.server.ts | 23 +-- ...lkActionV2.batchReadThrough.server.test.ts | 38 +---- .../BulkActionV2.batchReadThrough.server.ts | 29 ++-- ...batchTriggerV3ResidencyInheritance.test.ts | 67 ++------ .../test/bulkActionV2ReadRouting.test.ts | 39 +---- .../cancelDevSessionRunsStoreRouting.test.ts | 17 +- .../test/engine/triggerFailedTask.test.ts | 54 ------- apps/webapp/test/engine/triggerTask.test.ts | 145 +----------------- ...ointThroughReadThrough.readthrough.test.ts | 53 +++---- .../test/runsRepository.readthrough.test.ts | 89 +---------- 16 files changed, 80 insertions(+), 547 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts index d1bcaea9b6e..ffa3c4cb068 100644 --- a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts +++ b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts @@ -1,13 +1,11 @@ import type { PrismaReplicaClient } from "~/db.server"; import { $replica } from "~/db.server"; import { readThroughRun } from "~/v3/runOpsMigration/readThrough.server"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; type ResolveWaitpointDeps = { newClient?: PrismaReplicaClient; legacyReplica?: PrismaReplicaClient; splitEnabled?: boolean; - isKnownMigrated?: (id: string) => Promise; isPastRetention?: (id: string) => boolean; }; @@ -26,7 +24,6 @@ export async function resolveWaitpointThroughReadThrough(opts: { splitEnabled: opts.deps?.splitEnabled, newClient: opts.deps?.newClient ?? $replica, legacyReplica: opts.deps?.legacyReplica ?? $replica, - isKnownMigrated: opts.deps?.isKnownMigrated ?? defaultIsKnownMigrated, isPastRetention: opts.deps?.isPastRetention, }, }); diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 47d2478603e..e42f80fcc1a 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -9,8 +9,6 @@ import type { import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; -import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import { getEventRepository } from "~/v3/eventRepository/index.server"; import { runStore as defaultRunStore } from "~/v3/runStore.server"; @@ -63,12 +61,6 @@ export class TriggerFailedTaskService { private readonly prisma: PrismaClientOrTransaction; private readonly replicaPrisma: PrismaClientOrTransaction; private readonly engine: RunEngine; - // Reports whether a run that is legacy by id-shape has already been moved to - // the new store. Injected for tests; defaults to the live resolver. - private readonly isKnownMigrated: (runId: string) => Promise; - // Injected so the migrated-marker read stays off the hot path when split is off - // (same guard as RunEngineTriggerTaskService); defaults to the live resolver. - private readonly isSplitEnabled: () => Promise; // Resolves the parent run for depth/root/parent linkage. Defaults to the shared // singleton (in production the same store the engine writes through). Injected in // tests so the read resolves on the same store the engine wrote to. @@ -78,15 +70,11 @@ export class TriggerFailedTaskService { prisma: PrismaClientOrTransaction; engine: RunEngine; replicaPrisma?: PrismaClientOrTransaction; - isKnownMigrated?: (runId: string) => Promise; - isSplitEnabled?: () => Promise; runStore?: RunStore; }) { this.prisma = opts.prisma; this.replicaPrisma = opts.replicaPrisma ?? opts.prisma; this.engine = opts.engine; - this.isKnownMigrated = opts.isKnownMigrated ?? defaultIsKnownMigrated; - this.isSplitEnabled = opts.isSplitEnabled ?? defaultIsSplitEnabled; this.runStore = opts.runStore ?? defaultRunStore; } @@ -101,10 +89,7 @@ export class TriggerFailedTaskService { parentRunFriendlyId?: string; }): Promise { const mintKind = args.parentRunFriendlyId - ? await resolveInheritedMintKind(args.parentRunFriendlyId, { - isSplitEnabled: this.isSplitEnabled, - isKnownMigrated: this.isKnownMigrated, - }) + ? resolveInheritedMintKind(args.parentRunFriendlyId) : await resolveRunIdMintKind({ organizationId: args.organizationId, id: args.environmentId, diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts index 931d012acee..4b6a49a4755 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts @@ -207,9 +207,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - // Keep the migrated-parent resolver from reaching the empty ~/db.server - // mock; mint-kind inheritance is not under test here. - isKnownMigrated: async () => false, }); // Trigger a ROOT run first to create a real parent TaskRun. @@ -312,7 +309,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); // A real parent run in envA. @@ -415,7 +411,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); const result = await triggerTaskService.call({ @@ -533,7 +528,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); // ROOT parent first (uses the unproxied prisma via a separate service so @@ -552,7 +546,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); const parentResult = await parentService.call({ taskId: taskIdentifier, @@ -655,7 +648,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); const bogusVersion = "v-does-not-exist-0000"; @@ -757,7 +749,6 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); // Trigger in envB locking to the shared version string. @@ -799,12 +790,15 @@ describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { prisma, payloadProcessor: new MockPayloadProcessor(), queueConcern: new DefaultQueueManager(prisma, engine), - idempotencyKeyConcern: new IdempotencyKeyConcern(prisma, engine, new MockTraceEventConcern()), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), validator, traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - isKnownMigrated: async () => false, }); // Trigger with NO parentRunId. diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index 4d7d58f4ecf..a86cb9e0eda 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -27,8 +27,6 @@ import { parseDelay } from "~/utils/delays"; import { handleMetadataPacket } from "~/utils/packets"; import { startSpan } from "~/v3/tracing.server"; import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; -import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import type { TriggerTaskServiceOptions, @@ -98,14 +96,6 @@ export class RunEngineTriggerTaskService { private readonly evaluateGate: MollifierEvaluateGate; private readonly getMollifierBuffer: MollifierGetBuffer; private readonly isMollifierGloballyEnabled: () => boolean; - // Resolves whether a run that classifies as legacy-by-id-shape has already - // been moved to the new store. Injected so tests can drive the migrated-parent - // case without the split-store infrastructure; defaults to the live resolver. - private readonly isKnownMigrated: (runId: string) => Promise; - // Gates whether the marker-aware inheritance branch runs. With split OFF the - // child residency is a pure id-shape check — zero I/O on the hot path, - // byte-identical to today. Injected so tests can drive split on/off. - private readonly isSplitEnabled: () => Promise; constructor(opts: { prisma: PrismaClientOrTransaction; @@ -121,8 +111,6 @@ export class RunEngineTriggerTaskService { evaluateGate?: MollifierEvaluateGate; getMollifierBuffer?: MollifierGetBuffer; isMollifierGloballyEnabled?: () => boolean; - isKnownMigrated?: (runId: string) => Promise; - isSplitEnabled?: () => Promise; }) { this.prisma = opts.prisma; this.engine = opts.engine; @@ -138,8 +126,6 @@ export class RunEngineTriggerTaskService { this.getMollifierBuffer = opts.getMollifierBuffer ?? defaultGetMollifierBuffer; this.isMollifierGloballyEnabled = opts.isMollifierGloballyEnabled ?? (() => env.TRIGGER_MOLLIFIER_ENABLED === "1"); - this.isKnownMigrated = opts.isKnownMigrated ?? defaultIsKnownMigrated; - this.isSplitEnabled = opts.isSplitEnabled ?? defaultIsSplitEnabled; } // Mint a new run's friendlyId. The id-kind decides which store the run is born @@ -147,19 +133,15 @@ export class RunEngineTriggerTaskService { // must agree. Two cases: // // - ROOT run (no parent): mint by the environment's cutover setting. - // - CHILD run (has a parent): inherit the parent's CURRENT residency, so a - // parent and child never split across stores. A parent that is legacy by - // id-shape but has already been moved to the new store (reported by the - // migrated check) yields a new-store (ksuid) child. + // - CHILD run (has a parent): inherit the parent's residency by id-shape, so a + // parent and child never split across stores (ksuid parent → ksuid child, + // cuid parent → cuid child). private async mintRunFriendlyId( environment: AuthenticatedEnvironment, parentRunFriendlyId?: string ): Promise { const mintKind = parentRunFriendlyId - ? await resolveInheritedMintKind(parentRunFriendlyId, { - isSplitEnabled: this.isSplitEnabled, - isKnownMigrated: this.isKnownMigrated, - }) + ? resolveInheritedMintKind(parentRunFriendlyId) : await resolveRunIdMintKind({ organizationId: environment.organizationId, id: environment.id, diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index 2cecb790361..65df2b41215 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -14,7 +14,6 @@ import parseDuration from "parse-duration"; import { decodeRunsCursor, encodeRunsCursor } from "./runsCursor.server"; import { runStore } from "~/v3/runStore.server"; import { type PrismaClientOrTransaction } from "~/db.server"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; type RunCursorRow = { runId: string; createdAt: number }; @@ -181,18 +180,13 @@ export class ClickHouseRunsRepository implements IRunsRepository { } else { const newClient = this.options.readThrough?.newClient ?? this.options.prisma; const legacyReplica = this.options.readThrough?.legacyReplica ?? this.options.prisma; - const isKnownMigrated = this.options.readThrough?.isKnownMigrated ?? defaultIsKnownMigrated; const newRows = await hydrate(newClient, runIds); const foundIds = new Set(newRows.map((r) => r.id)); const missing = runIds.filter((id) => !foundIds.has(id)); - const toProbeLegacy: string[] = []; - for (const id of missing) { - if (!(await isKnownMigrated(id))) { - toProbeLegacy.push(id); - } - } + // Any id not hydrated from the new store is probed on the legacy replica. + const toProbeLegacy = missing; const legacyRows = toProbeLegacy.length ? await hydrate(legacyReplica, toProbeLegacy) : []; rows = [...newRows, ...legacyRows]; diff --git a/apps/webapp/app/services/runsRepository/runsRepository.server.ts b/apps/webapp/app/services/runsRepository/runsRepository.server.ts index b477ae492de..3e3dbc5f82a 100644 --- a/apps/webapp/app/services/runsRepository/runsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/runsRepository.server.ts @@ -32,7 +32,6 @@ export type RunsRepositoryOptions = { legacyReplica?: PrismaClientOrTransaction; // Resolved boot constant; when false the split branch is never entered. splitEnabled?: boolean; - isKnownMigrated?: (runId: string) => Promise; }; }; diff --git a/apps/webapp/app/v3/services/batchTriggerV3.server.ts b/apps/webapp/app/v3/services/batchTriggerV3.server.ts index 1e7f563adf5..62778778969 100644 --- a/apps/webapp/app/v3/services/batchTriggerV3.server.ts +++ b/apps/webapp/app/v3/services/batchTriggerV3.server.ts @@ -25,8 +25,6 @@ import { logger } from "~/services/logger.server"; import { getEntitlement } from "~/services/platform.v3.server"; import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { resolveRunIdMintKind, type RunIdMintKind } from "~/v3/engineVersion.server"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; -import { isSplitEnabled as defaultIsSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { batchTriggerWorker } from "../batchTriggerWorker.server"; @@ -112,20 +110,13 @@ export class BatchTriggerV3Service extends BaseService { asyncBatchProcessSizeThreshold: number = ASYNC_BATCH_PROCESS_SIZE_THRESHOLD, protected readonly _prisma: PrismaClientOrTransaction = prisma, protected readonly runStore: RunStore = defaultRunStore, - // Injected so tests drive the migrated-parent branch without the split-store - // infrastructure; defaults to the live resolver (same pattern as - // RunEngineTriggerTaskService). - private readonly isKnownMigrated: (runId: string) => Promise = defaultIsKnownMigrated, // Injected so tests force the env-default branch deterministically; defaults // to the live per-env mint resolver. private readonly resolveMintKind: (environment: { organizationId: string; id: string; orgFeatureFlags?: unknown; - }) => Promise = resolveRunIdMintKind, - // Injected so the migrated-marker read stays off the hot path when split is off - // (same guard as RunEngineTriggerTaskService); defaults to the live resolver. - private readonly isSplitEnabled: () => Promise = defaultIsSplitEnabled + }) => Promise = resolveRunIdMintKind ) { super(_prisma); @@ -354,19 +345,15 @@ export class BatchTriggerV3Service extends BaseService { // Mint a child run's friendlyId so it lands in the SAME physical store as its // residency anchor. The caller passes the batch's friendlyId, so a ksuid - // anchor (or a cuid-shaped one already migrated to the new store) yields a ksuid - // (NEW) child and a cuid anchor yields a cuid (LEGACY) child. With no anchor it - // falls back to the env's cutover setting. Mirrors - // RunEngineTriggerTaskService.mintRunFriendlyId. + // (NEW) anchor yields a ksuid (NEW) child and a cuid anchor yields a cuid + // (LEGACY) child. With no anchor it falls back to the env's cutover setting. + // Mirrors RunEngineTriggerTaskService.mintRunFriendlyId. private async mintChildFriendlyId( environment: AuthenticatedEnvironment, anchorFriendlyId?: string ): Promise { const mintKind = anchorFriendlyId - ? await resolveInheritedMintKind(anchorFriendlyId, { - isSplitEnabled: this.isSplitEnabled, - isKnownMigrated: this.isKnownMigrated, - }) + ? resolveInheritedMintKind(anchorFriendlyId) : await this.resolveMintKind({ organizationId: environment.organizationId, id: environment.id, diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts index 0bb3291f633..df6b74753ee 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts @@ -1,9 +1,8 @@ // Real PG14 (legacy replica) + PG17 (new) proof for the bulk batch read-through adapter. // We NEVER mock the DB: each closure runs a real `$queryRaw` against the passed container // (crossing the actual PG14↔PG17 boundary) then filters an in-memory seeded set by id — -// mirroring readThrough.server.test.ts's `realRead`. The only injected fakes are the pure -// boundaries the plan allows (`isKnownMigrated`) plus throwing spies asserting a store was -// NEVER touched. +// mirroring readThrough.server.test.ts's `realRead`. The only injected fakes are throwing +// spies asserting a store was NEVER touched. import { heteroPostgresTest } from "@internal/testcontainers"; import { describe, expect, vi } from "vitest"; import type { PrismaReplicaClient } from "~/db.server"; @@ -51,7 +50,6 @@ describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { splitEnabled: true, newClient: prisma17 as unknown as PrismaReplicaClient, legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: async () => false, }, }); @@ -64,40 +62,12 @@ describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { ); heteroPostgresTest( - "(b) known-migrated short-circuit: legacy-classified id missed by new is not probed and is omitted", - async ({ prisma14, prisma17 }) => { - const onNew = new Set(); // new misses it - const throwingLegacy = vi.fn(async (): Promise => { - throw new Error("readLegacyReplica must never run for a known-migrated id"); - }); - - const rows = await hydrateRunsAcrossSeam({ - runIds: [LEGACY_RUN_ID], - readNew: (client, ids) => realReadFiltered(client, ids, onNew), - readLegacyReplica: throwingLegacy, - deps: { - splitEnabled: true, - newClient: prisma17 as unknown as PrismaReplicaClient, - legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: async () => true, - }, - }); - - expect(rows).toEqual([]); - expect(throwingLegacy).not.toHaveBeenCalled(); - } - ); - - heteroPostgresTest( - "(c) passthrough: splitEnabled false reads only the single client; legacy + filter never touched", + "(c) passthrough: splitEnabled false reads only the single client; legacy never touched", async ({ prisma14, prisma17 }) => { const onNew = new Set([NEW_RUN_ID, LEGACY_RUN_ID]); const throwingLegacy = vi.fn(async (): Promise => { throw new Error("readLegacyReplica must never run in single-DB mode"); }); - const throwingFilter = vi.fn(async (): Promise => { - throw new Error("isKnownMigrated must never run in single-DB mode"); - }); const readNew = vi.fn((client: PrismaReplicaClient, ids: string[]) => realReadFiltered(client, ids, onNew) ); @@ -111,7 +81,6 @@ describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { // single collapsed store (use prisma17 here as the "new"/primary analog) newClient: prisma17 as unknown as PrismaReplicaClient, legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: throwingFilter, }, }); @@ -119,7 +88,6 @@ describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { expect(ids).toEqual([LEGACY_RUN_ID, NEW_RUN_ID].sort()); expect(readNew).toHaveBeenCalledTimes(1); expect(throwingLegacy).not.toHaveBeenCalled(); - expect(throwingFilter).not.toHaveBeenCalled(); } ); }); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts index 53ff0e1304d..05e2965287e 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts @@ -5,16 +5,15 @@ * per-id read-through ordering as SET reads: * * 1. single-DB passthrough (splitEnabled === false): ONE read against the collapsed - * store, no residency classification, no legacy probe, no known-migrated filter. + * store, no residency classification, no legacy probe. * 2. split on: classify each id's residency via `ownerEngine`, read NEW for every id * that could be on new (residency NEW *and* legacy-candidates — read-through is - * new-FIRST for legacy too), apply the known-migrated short-circuit only to - * legacy-candidates the new read missed, then probe the LEGACY READ REPLICA ONLY - * for whatever remains. + * new-FIRST for legacy too), then probe the LEGACY READ REPLICA ONLY for the + * legacy-candidates the new read missed. * * Like the per-id layer this NEVER touches a legacy primary/writer — there is no such * handle. An id is read from new OR legacy, never both: legacy is only probed for ids - * new missed AND that are not known-migrated, so the returned set needs no dedupe. + * new missed, so the returned set needs no dedupe. */ import type { PrismaReplicaClient } from "~/db.server"; import { @@ -22,7 +21,6 @@ import { runOpsNewReplica as defaultNewClient, } from "~/db.server"; import { ownerEngine, UnclassifiableRunId } from "@trigger.dev/core/v3/isomorphic"; -import { isKnownMigrated as defaultIsKnownMigrated } from "~/v3/runOpsMigration/knownMigratedFilter.server"; export type SeamReadDeps = { /** @@ -30,7 +28,6 @@ export type SeamReadDeps = { * request via `isSplitEnabled()`; this adapter never awaits it itself. */ splitEnabled: boolean; - isKnownMigrated?: (runId: string) => Promise; newClient?: PrismaReplicaClient; legacyReplica?: PrismaReplicaClient; logger?: { warn: (m: string, meta?: unknown) => void }; @@ -48,9 +45,7 @@ function getId(row: unknown): string { return (row as { id: string }).id; } -export async function hydrateRunsAcrossSeam( - input: HydrateRunsAcrossSeamInput -): Promise { +export async function hydrateRunsAcrossSeam(input: HydrateRunsAcrossSeamInput): Promise { const { runIds, deps } = input; if (runIds.length === 0) { @@ -60,9 +55,8 @@ export async function hydrateRunsAcrossSeam( const newClient = deps.newClient ?? defaultNewClient; // Passthrough: one plain read against the single collapsed store. No residency - // classification, no legacy probe, no known-migrated filter, no second connection. - // When the caller passes its own `_replica` as `newClient`, this is byte-identical to - // the pre-migration single-DB read. + // classification, no legacy probe, no second connection. When the caller passes its + // own `_replica` as `newClient`, this is byte-identical to the pre-migration single-DB read. if (deps.splitEnabled === false) { return input.readNew(newClient, runIds); } @@ -98,13 +92,8 @@ export async function hydrateRunsAcrossSeam( const newRows = await input.readNew(newClient, [...newIds, ...legacyCandidateIds]); const foundOnNew = new Set(newRows.map(getId)); - // Legacy-candidates the new read missed: apply the known-migrated short-circuit. A - // known-migrated id lives on new but the read missed it (lag) — drop it, do NOT probe - // legacy. Run the checks concurrently, and only for these missed candidates. - const isMigrated = deps.isKnownMigrated ?? defaultIsKnownMigrated; - const missedCandidates = legacyCandidateIds.filter((id) => !foundOnNew.has(id)); - const migratedFlags = await Promise.all(missedCandidates.map((id) => isMigrated(id))); - const legacyToProbe = missedCandidates.filter((_, i) => !migratedFlags[i]); + // Legacy-candidates the new read missed are probed on the legacy read replica. + const legacyToProbe = legacyCandidateIds.filter((id) => !foundOnNew.has(id)); // Legacy READ REPLICA only — never a legacy writer/primary (no such handle exists). // A member absent from both DBs is simply not hydrated (matching today's `findMany`, diff --git a/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts index 2f193f4c3b4..d5a4f0ccda9 100644 --- a/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts +++ b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts @@ -34,74 +34,42 @@ function fakeEnv(): AuthenticatedEnvironment { } as unknown as AuthenticatedEnvironment; } -// Build the service with the two mint deps injected so the test drives both -// inheritance branches without the split-store infrastructure. resolveRunIdMintKind -// is forced to "cuid" (its production default when split is off / org not cut over), -// proving the CHILD branch overrides the env default purely from the parent's id-shape. -function buildService(isKnownMigrated: (id: string) => Promise) { - return new BatchTriggerV3Service( - undefined, - undefined, - {} as any, - {} as any, - isKnownMigrated, - async () => "cuid" - ); +// Build the service with resolveMintKind forced to "cuid" (its production default +// when split is off / org not cut over), proving the CHILD branch overrides the env +// default purely from the parent's id-shape. +function buildService() { + return new BatchTriggerV3Service(undefined, undefined, {} as any, {} as any, async () => "cuid"); } describe("BatchTriggerV3Service child-residency inheritance", () => { it("a ksuid parent yields ksuid (NEW) child friendlyIds", async () => { - const service = buildService(async () => false); + const service = buildService(); const parentFriendlyId = RunId.toFriendlyId( // 27-char ksuid internal id → NEW residency parent "a".repeat(KSUID_LEN) ); expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("NEW"); - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - parentFriendlyId - ); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), parentFriendlyId); expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); }); it("a cuid parent yields cuid (LEGACY) child friendlyIds", async () => { - const service = buildService(async () => false); + const service = buildService(); const parentFriendlyId = RunId.generate().friendlyId; // cuid (25) → LEGACY parent expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("LEGACY"); - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - parentFriendlyId - ); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), parentFriendlyId); expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); }); - it("a cuid-shaped but migrated parent yields ksuid (NEW) children", async () => { - // Parent is legacy by id-shape but already swept to the NEW DB: the migrated - // check wins and children are born ksuid/NEW (mirrors triggerTask resolveInheritedMintKind). - const service = buildService(async () => true); - const parentFriendlyId = RunId.generate().friendlyId; // cuid shape - - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - parentFriendlyId - ); - - expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); - expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); - }); - it("a ROOT batch (no parentRunId) mints by the env setting (cuid default here)", async () => { - const service = buildService(async () => false); - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - undefined - ); + const service = buildService(); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), undefined); expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); }); @@ -111,14 +79,11 @@ describe("BatchTriggerV3Service child-residency inheritance", () => { // away from the batch's residency), a ksuid batch anchor yields ksuid children — so // batch + children stay co-resident and TaskRun.batchId never crosses the seam. it("a ksuid batch anchor yields ksuid children even when the env flag resolves cuid", async () => { - const service = buildService(async () => false); // resolveMintKind forced to "cuid" + const service = buildService(); // resolveMintKind forced to "cuid" const batchFriendlyId = BatchId.toFriendlyId(generateKsuidId()); // ksuid (NEW) batch expect(ownerEngine(batchFriendlyId)).toBe("NEW"); - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - batchFriendlyId - ); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), batchFriendlyId); expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); @@ -131,16 +96,12 @@ describe("BatchTriggerV3Service child-residency inheritance", () => { undefined, {} as any, {} as any, - async () => false, async () => "ksuid" // env flag flipped ON mid-batch ); const batchFriendlyId = BatchId.generate().friendlyId; // cuid (LEGACY) batch expect(ownerEngine(batchFriendlyId)).toBe("LEGACY"); - const childFriendlyId = await (service as any).mintChildFriendlyId( - fakeEnv(), - batchFriendlyId - ); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), batchFriendlyId); expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); diff --git a/apps/webapp/test/bulkActionV2ReadRouting.test.ts b/apps/webapp/test/bulkActionV2ReadRouting.test.ts index e8a51aeb3b3..314a6df6ca2 100644 --- a/apps/webapp/test/bulkActionV2ReadRouting.test.ts +++ b/apps/webapp/test/bulkActionV2ReadRouting.test.ts @@ -133,7 +133,6 @@ describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 splitEnabled: true, newClient: prisma17 as unknown as PrismaReplicaClient, legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: async () => false, }, }); @@ -162,7 +161,6 @@ describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 splitEnabled: true, newClient: prisma17 as unknown as PrismaReplicaClient, legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: async () => false, }, }); @@ -176,37 +174,7 @@ describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 ); heteroPostgresTest( - "known-migrated member is served from new and the legacy replica is never queried for it", - async ({ prisma14, prisma17 }) => { - // A legacy-classified id that lives on new (read hits it). Even with isKnownMigrated=true, - // because new HITS, legacy is never probed. - const migratedRunId = legacyId("e"); - const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "migrated-new"); - await seedRun(prisma17 as unknown as PrismaClient, newCtx, migratedRunId); - - const throwingLegacy = vi.fn(() => { - throw new Error("legacy replica must never be queried for a known-migrated member"); - }); - - const runs = await hydrateRunsAcrossSeam({ - runIds: [migratedRunId], - readNew: cancelReadNew, - readLegacyReplica: throwingLegacy as never, - deps: { - splitEnabled: true, - newClient: prisma17 as unknown as PrismaReplicaClient, - legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: async () => true, - }, - }); - - expect(runs.map((r) => r.id)).toEqual([migratedRunId]); - expect(throwingLegacy).not.toHaveBeenCalled(); - } - ); - - heteroPostgresTest( - "single-DB passthrough hydrates all members from one client; legacy + filter never invoked", + "single-DB passthrough hydrates all members from one client; legacy never invoked", async ({ prisma14, prisma17 }) => { // In single-DB mode the service passes its _replica as newClient. Seed everything there. const idA = newId("f"); @@ -218,9 +186,6 @@ describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 const throwingLegacy = vi.fn(() => { throw new Error("legacy replica must never run in single-DB mode"); }); - const throwingFilter = vi.fn(async () => { - throw new Error("isKnownMigrated must never run in single-DB mode"); - }); const runs = await hydrateRunsAcrossSeam({ runIds: [idA, idB], @@ -230,13 +195,11 @@ describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 splitEnabled: false, newClient: prisma17 as unknown as PrismaReplicaClient, legacyReplica: prisma14 as unknown as PrismaReplicaClient, - isKnownMigrated: throwingFilter, }, }); expect(runs.map((r) => r.id).sort()).toEqual([idA, idB].sort()); expect(throwingLegacy).not.toHaveBeenCalled(); - expect(throwingFilter).not.toHaveBeenCalled(); } ); }); diff --git a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts index 3da312a23c6..81bec60318b 100644 --- a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts +++ b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts @@ -1,6 +1,6 @@ // Real PG14 (legacy) + PG17 (new) proof for the dev-session-cancel TaskRun read. -// The DB is never mocked: reads hit the two real containers. Only pure boundaries -// (splitEnabled, isKnownMigrated) and recording client wrappers are injected. +// The DB is never mocked: reads hit the two real containers. Only the pure +// splitEnabled boundary and recording client wrappers are injected. import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; import type { PrismaClient } from "@trigger.dev/database"; import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; @@ -96,7 +96,10 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { expect(id.length).toBe(27); const friendlyId = `run_${id}`; - const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv(prisma17, "new"); + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma17, + "new" + ); await seedRun( prisma17, { id, friendlyId }, @@ -117,7 +120,6 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { splitEnabled: true, newClient: newClient.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => false, }, }); await service.call({ @@ -140,7 +142,6 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { splitEnabled: true, newClient: newClient.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => false, }, }); await service.call({ @@ -183,7 +184,6 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { splitEnabled: true, newClient: newClient.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => false, }, }); @@ -226,7 +226,10 @@ describe("CancelDevSessionRunsService passthrough (single-DB)", () => { // control-plane read runs on the same prisma. const service = new CancelDevSessionRunsService({ prisma, - readThroughDeps: { splitEnabled: false, newClient: prisma as unknown as PrismaReplicaClient }, + readThroughDeps: { + splitEnabled: false, + newClient: prisma as unknown as PrismaReplicaClient, + }, }); await service.call({ diff --git a/apps/webapp/test/engine/triggerFailedTask.test.ts b/apps/webapp/test/engine/triggerFailedTask.test.ts index 73216114ed9..21fbaeb83e2 100644 --- a/apps/webapp/test/engine/triggerFailedTask.test.ts +++ b/apps/webapp/test/engine/triggerFailedTask.test.ts @@ -45,7 +45,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { engine, // Read the parent through the same store the engine wrote it to. runStore: engine.runStore, - isKnownMigrated: async () => false, }); const friendlyId = await service.call({ @@ -101,7 +100,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { engine, // Read the parent through the same store the engine wrote it to. runStore: engine.runStore, - isKnownMigrated: async () => false, }); const friendlyId = await service.call({ @@ -160,7 +158,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { engine, // Read the parent through the same store the engine wrote it to. runStore: engine.runStore, - isKnownMigrated: async () => false, }); const friendlyId = await service.call({ @@ -177,55 +174,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { } ); - containerTest( - "failed child of a migrated LEGACY parent mints ksuid (call)", - async ({ prisma, redisOptions }) => { - const engine = makeEngine(prisma, redisOptions); - const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); - const taskIdentifier = "failed-residency-task"; - await setupBackgroundWorker(engine, environment, taskIdentifier); - - const parentFriendlyId = RunId.generate().friendlyId; - expect(classifyKind(parentFriendlyId)).toBe("cuid"); - await engine.trigger( - { - friendlyId: parentFriendlyId, - environment, - taskIdentifier, - payload: "{}", - payloadType: "application/json", - traceId: "00000000000000000000000000000000", - spanId: "0000000000000000", - workerQueue: "main", - queue: `task/${taskIdentifier}`, - isTest: false, - tags: [], - } as any, - prisma - ); - - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - isKnownMigrated: async (id: string) => id === parentFriendlyId, - }); - - const friendlyId = await service.call({ - taskId: taskIdentifier, - environment, - payload: { test: "child" }, - errorMessage: "boom", - parentRunId: parentFriendlyId, - }); - - expect(classifyKind(friendlyId!)).toBe("ksuid"); - - await engine.quit(); - } - ); - containerTest( "failed child of a NEW parent mints ksuid (callWithoutTraceEvents)", async ({ prisma, redisOptions }) => { @@ -257,7 +205,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { engine, // Read the parent through the same store the engine wrote it to. runStore: engine.runStore, - isKnownMigrated: async () => false, }); const friendlyId = await service.callWithoutTraceEvents({ @@ -290,7 +237,6 @@ describe("TriggerFailedTaskService — failed run residency", () => { engine, // Read the parent through the same store the engine wrote it to. runStore: engine.runStore, - isKnownMigrated: async () => false, }); // A well-formed ksuid parent friendlyId that was NEVER triggered → no row. diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index a4131935070..ba1452abceb 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -700,7 +700,6 @@ describe("RunEngineTriggerTaskService", () => { tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, // 1MB triggerRacepointSystem, - isKnownMigrated: async () => false, }); const idempotencyKey = "test-idempotency-key"; @@ -1216,7 +1215,6 @@ describe("RunEngineTriggerTaskService", () => { tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, // 1MB triggerRacepointSystem, - isKnownMigrated: async () => false, }); const idempotencyKey = "test-preserve-friendly-id"; @@ -2291,17 +2289,9 @@ describe("DefaultQueueManager task metadata cache", () => { describe("RunEngineTriggerTaskService — child run residency inheritance", () => { // Helper: stand up an engine + service wired for a single (real) Postgres/Redis - // pair, with an injectable marker boundary so the migrated-parent case can be - // driven without the split-DB infrastructure. Returns the service plus the - // authenticated environment and a registered task identifier. - async function setupResidencyService( - prisma: any, - redisOptions: any, - opts?: { - isKnownMigrated?: (runId: string) => Promise; - isSplitEnabled?: () => Promise; - } - ) { + // pair. Returns the service plus the authenticated environment and a registered + // task identifier. + async function setupResidencyService(prisma: any, redisOptions: any) { const engine = new RunEngine({ prisma, worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 }, @@ -2343,11 +2333,6 @@ describe("RunEngineTriggerTaskService — child run residency inheritance", () = traceEventConcern: new MockTraceEventConcern(), tracer: trace.getTracer("test", "0.0.0"), metadataMaximumSize: 1024 * 1024 * 1, - // Default: nothing is migrated. Individual tests override. - isKnownMigrated: opts?.isKnownMigrated ?? (async () => false), - // Default split OFF in CI — matches split-off semantics and keeps the - // existing tests deterministic. Tests that exercise the marker pass true. - isSplitEnabled: opts?.isSplitEnabled ?? (async () => false), }); return { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService }; @@ -2436,131 +2421,11 @@ describe("RunEngineTriggerTaskService — child run residency inheritance", () = } ); - containerTest( - "child of a migrated LEGACY (cuid) parent is minted ksuid (born NEW)", - async ({ prisma, redisOptions }) => { - // The parent's id is a cuid (LEGACY by shape) but the migrated marker says - // it now lives on NEW. The child MUST inherit NEW. We drive this with an - // injected isKnownMigrated boundary that reports the parent as migrated. - let migratedParentFriendlyId = ""; - const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = - await setupResidencyService(prisma, redisOptions, { - isKnownMigrated: async (id: string) => id === migratedParentFriendlyId, - isSplitEnabled: async () => true, - }); - - const parent = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "parent" } }, - }); - migratedParentFriendlyId = parent!.run.friendlyId; - expect(classifyKind(migratedParentFriendlyId)).toBe("cuid"); // LEGACY by shape - - const child = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "child" }, - options: { parentRunId: migratedParentFriendlyId }, - }, - }); - - // Marker says parent is on NEW ⇒ child inherits NEW ⇒ ksuid. - expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); - - await engine.quit(); - } - ); - - containerTest( - "child trigger does not consult the migrated marker when split is off", - async ({ prisma, redisOptions }) => { - let consulted = false; - const spy = async (_id: string) => { - consulted = true; - return false; - }; - const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = - await setupResidencyService(prisma, redisOptions, { - isKnownMigrated: spy, - isSplitEnabled: async () => false, - }); - - const parent = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "parent" } }, - }); - expect(classifyKind(parent!.run.friendlyId)).toBe("cuid"); - - const child = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "child" }, options: { parentRunId: parent!.run.friendlyId } }, - }); - - // Split off ⇒ pure id-shape inheritance, byte-identical to today. - expect(classifyKind(child!.run.friendlyId)).toBe("cuid"); - // The marker resolver was NOT called when split is off. - expect(consulted).toBe(false); - - await engine.quit(); - } - ); - - containerTest( - "child trigger consults the migrated marker when split is on", - async ({ prisma, redisOptions }) => { - let consulted = false; - let migratedParentFriendlyId = ""; - const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = - await setupResidencyService(prisma, redisOptions, { - isKnownMigrated: async (id: string) => { - consulted = true; - return id === migratedParentFriendlyId; - }, - isSplitEnabled: async () => true, - }); - - const parent = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { payload: { test: "parent" } }, - }); - migratedParentFriendlyId = parent!.run.friendlyId; - expect(classifyKind(migratedParentFriendlyId)).toBe("cuid"); - - const child = await triggerTaskService.call({ - taskId: taskIdentifier, - environment: authenticatedEnvironment, - body: { - payload: { test: "child" }, - options: { parentRunId: migratedParentFriendlyId }, - }, - }); - - // Split on ⇒ marker consulted; marker says migrated ⇒ ksuid. - expect(consulted).toBe(true); - expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); - - await engine.quit(); - } - ); - containerTest( "caller-supplied runFriendlyId wins verbatim and skips residency inheritance", async ({ prisma, redisOptions }) => { - let consulted = false; - const spy = async (_id: string) => { - consulted = true; - return false; - }; const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = - await setupResidencyService(prisma, redisOptions, { - isKnownMigrated: spy, - isSplitEnabled: async () => true, - }); + await setupResidencyService(prisma, redisOptions); // Explicit cuid id for the run, and a ksuid/NEW parent id. const explicitFriendlyId = RunId.toFriendlyId(generateInternalId()); @@ -2577,8 +2442,6 @@ describe("RunEngineTriggerTaskService — child run residency inheritance", () = // Caller-supplied id wins verbatim — NOT re-minted to ksuid despite the NEW parent. expect(result!.run.friendlyId).toBe(explicitFriendlyId); - // The supplied-id short-circuit skips the mint/marker entirely. - expect(consulted).toBe(false); await engine.quit(); } diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts index d92497953c4..ada61193074 100644 --- a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -17,10 +17,7 @@ function generateLegacyCuid() { return `c${suffix}`; } -function recording( - client: PrismaClient | RunOpsPrismaClient, - opts: { forbidden?: boolean } = {} -) { +function recording(client: PrismaClient | RunOpsPrismaClient, opts: { forbidden?: boolean } = {}) { const calls: unknown[] = []; const waitpoint = { findFirst: (args: unknown) => { @@ -109,7 +106,6 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run splitEnabled: true, newClient: newClient.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => false, }, }); @@ -145,7 +141,6 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run splitEnabled: true, newClient: newClient.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => false, }, }); @@ -156,30 +151,26 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run } ); - heteroRunOpsPostgresTest( - "not-found maps to null (no throw)", - async ({ prisma17, prisma14 }) => { - const id = generateLegacyCuid(); - const { environment } = await seedOrgProjectEnv(prisma14, "nf"); - - const result = await resolveWaitpointThroughReadThrough({ - waitpointId: id, - environmentId: environment.id, - read: read(id, environment.id), - deps: { - splitEnabled: true, - newClient: recording(prisma17).handle, - legacyReplica: recording(prisma14).handle, - isKnownMigrated: async () => false, - }, - }); - - expect(result).toBeNull(); - } - ); + heteroRunOpsPostgresTest("not-found maps to null (no throw)", async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + const { environment } = await seedOrgProjectEnv(prisma14, "nf"); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: true, + newClient: recording(prisma17).handle, + legacyReplica: recording(prisma14).handle, + }, + }); + + expect(result).toBeNull(); + }); postgresTest( - "passthrough (single-DB): one plain read; legacy + isKnownMigrated never invoked", + "passthrough (single-DB): one plain read; legacy never invoked", async ({ prisma }) => { const id = generateKsuidId(); const { project, environment } = await seedOrgProjectEnv(prisma, "pt"); @@ -190,7 +181,6 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run const single = recording(prisma); const legacy = recording(prisma, { forbidden: true }); - let knownMigratedInvoked = false; const result = await resolveWaitpointThroughReadThrough({ waitpointId: id, @@ -199,10 +189,6 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run deps: { newClient: single.handle, legacyReplica: legacy.handle, - isKnownMigrated: async () => { - knownMigratedInvoked = true; - return false; - }, }, }); @@ -210,7 +196,6 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run expect(result!.id).toBe(seeded.id); expect(single.calls.length).toBe(1); expect(legacy.calls.length).toBe(0); - expect(knownMigratedInvoked).toBe(false); } ); }); diff --git a/apps/webapp/test/runsRepository.readthrough.test.ts b/apps/webapp/test/runsRepository.readthrough.test.ts index 706600db8fd..e5dc97f2d8b 100644 --- a/apps/webapp/test/runsRepository.readthrough.test.ts +++ b/apps/webapp/test/runsRepository.readthrough.test.ts @@ -118,12 +118,6 @@ async function createRun( }); } -const neverCalled = - (label: string) => - async (id: string): Promise => { - throw new Error(`${label} must not be invoked (called with ${id})`); - }; - describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", () => { // --- DoD line + e2e #6: split fan-out across new + legacy-replica with known-migrated filter --- replicationContainerTest( @@ -177,8 +171,6 @@ describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", splitEnabled: true, newClient: prismaNew, legacyReplica: prisma, - // legacy-only ids are NOT known-migrated -> the legacy replica IS probed for them. - isKnownMigrated: async () => false, }, }); @@ -212,81 +204,9 @@ describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", } ); - // --- Known-migrated filter avoids re-probing legacy --- - replicationContainerTest( - "a known-migrated id missing from the new probe is NOT re-probed against the legacy replica", - async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { - const { clickhouse } = await setupClickhouseReplication({ - prisma, - databaseUrl: postgresContainer.getConnectionUri(), - clickhouseUrl: clickhouseContainer.getConnectionUrl(), - redisOptions, - }); - - const { url: newUrl } = await createPostgresContainer(network, { - imageTag: "docker.io/postgres:17", - }); - const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); - - try { - const ctx = await seedParents(prisma, "migfilter"); - await mirrorParents(prismaNew, ctx, "migfilter"); - - // Seed the run on the LEGACY/source DB (so CH has the id) but withhold it from NEW, - // simulating replication lag where the new probe misses a freshly-migrated row. - const migrated = await createRun(prisma, ctx, { friendlyId: "run_migrated" }); - - await setTimeout(1500); - - // legacyReplica hydrate must NEVER run for this id because isKnownMigrated is true. - const legacySpyPrisma = new Proxy(prisma, { - get(target, prop) { - if (prop === "taskRun") { - return new Proxy((target as any).taskRun, { - get(trTarget, trProp) { - if (trProp === "findMany") { - return async () => { - throw new Error("legacy replica hydrate must not be invoked for migrated id"); - }; - } - return (trTarget as any)[trProp]; - }, - }); - } - return (target as any)[prop]; - }, - }) as unknown as PrismaClient; - - const runsRepository = new RunsRepository({ - prisma, - clickhouse, - runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), - readThrough: { - splitEnabled: true, - newClient: prismaNew, - legacyReplica: legacySpyPrisma, - isKnownMigrated: async (id) => id === migrated.id, - }, - }); - - const { runs } = await runsRepository.listRuns({ - page: { size: 10 }, - projectId: ctx.projectId, - environmentId: ctx.environmentId, - organizationId: ctx.organizationId, - }); - - // Not on NEW, known-migrated -> served from neither => not-found (filtered). - expect(runs).toHaveLength(0); - } finally { - await prismaNew.$disconnect(); - } - } - ); - - // --- Passthrough (single-DB): one plain store read, legacy + isKnownMigrated never touched --- + // --- Passthrough (single-DB): one plain store read, legacy never touched --- replicationContainerTest( - "single-DB passthrough hydrates from one store read and never touches the legacy/known-migrated boundaries", + "single-DB passthrough hydrates from one store read and never touches the legacy boundary", async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { const { clickhouse } = await setupClickhouseReplication({ prisma, @@ -300,15 +220,13 @@ describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", await setTimeout(1500); - // No readThrough (splitEnabled defaults false). Inject throwing boundaries to prove the - // split branch is never entered. + // splitEnabled false → the split branch is never entered (one plain store read). const runsRepository = new RunsRepository({ prisma, clickhouse, readThrough: { splitEnabled: false, legacyReplica: prisma, - isKnownMigrated: neverCalled("isKnownMigrated"), }, }); @@ -412,7 +330,6 @@ describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", splitEnabled: true, newClient: prismaNew, legacyReplica: prisma, - isKnownMigrated: async () => false, }, }); From 804a6a6d6883e472f2e33367fb31075d98ea63c9 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 14:03:09 +0100 Subject: [PATCH 04/15] chore(run-ops split): strip test-enumeration labels from pr07 comments and test names Review hygiene only: remove the NEW-1 label, Test X: name prefixes, and [TEST-NEWSEED] comment label. No product logic or test behavior changed. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts | 2 +- apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts | 6 +++--- apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts | 4 ++-- .../resolveWaitpointThroughReadThrough.readthrough.test.ts | 2 +- apps/webapp/test/streamLoader.controlPlane.test.ts | 2 +- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts index ea4c24514f3..f8d6dcc6555 100644 --- a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts +++ b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts @@ -70,7 +70,7 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { return; } - // NEW-1 defense-in-depth: the open-predicate findRuns fan-out can select runs from + // Defense-in-depth: the open-predicate findRuns fan-out can select runs from // either DB, but the status flip below is a single control-plane updateMany. A // ksuid (NEW-resident) run can only reach WAITING_FOR_DEPLOY via a misconfiguration // (it is a V1/cuid-only status — V2 uses PENDING_VERSION). Surface it loudly rather diff --git a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts index 81bec60318b..6f0abc432b8 100644 --- a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts +++ b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts @@ -90,7 +90,7 @@ function recording(client: PrismaClient, opts: { forbidden?: boolean } = {}) { describe("CancelDevSessionRunsService store routing (hetero)", () => { heteroPostgresTest( - "Test B: a NEW run (ksuid) resolves on the new store via read-through, by friendlyId and by id", + "a NEW run (ksuid) resolves on the new store via read-through, by friendlyId and by id", async ({ prisma17, prisma14 }) => { const id = generateKsuidId(); expect(id.length).toBe(27); @@ -156,7 +156,7 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { ); heteroPostgresTest( - "Test C: an OLD in-retention run (cuid) resolves off the LEGACY replica, never a legacy primary", + "an OLD in-retention run (cuid) resolves off the LEGACY replica, never a legacy primary", async ({ prisma17, prisma14 }) => { const id = generateLegacyCuid(); expect(id.length).toBe(25); @@ -202,7 +202,7 @@ describe("CancelDevSessionRunsService store routing (hetero)", () => { describe("CancelDevSessionRunsService passthrough (single-DB)", () => { postgresTest( - "Test A: with no read-through deps, the run is read from the single DB and session reads stay on it", + "with no read-through deps, the run is read from the single DB and session reads stay on it", async ({ prisma }) => { const id = generateKsuidId(); const friendlyId = `run_${id}`; diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts index 03cab6a1d3a..26f13b1c767 100644 --- a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -216,7 +216,7 @@ async function dropTaskRunCrossSeamFks(prisma: PrismaClient) { describe("PerformTaskRunAlertsService store routing (hetero)", () => { heteroPostgresTest( - "Test B: env type resolves via the control-plane resolver (distinct DB) while the run resolves on the run-ops store", + "env type resolves via the control-plane resolver (distinct DB) while the run resolves on the run-ops store", async ({ prisma17, prisma14 }) => { const id = generateKsuidId(); const friendlyId = `run_${id}`; @@ -308,7 +308,7 @@ describe("PerformTaskRunAlertsService store routing (hetero)", () => { describe("PerformTaskRunAlertsService passthrough (single-DB)", () => { postgresTest( - "Test A: with the default store, run read + alert-channel read both resolve on the single DB", + "with the default store, run read + alert-channel read both resolve on the single DB", async ({ prisma }) => { const id = generateKsuidId(); const friendlyId = `run_${id}`; diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts index ada61193074..e580023c1b2 100644 --- a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -89,7 +89,7 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run const id = generateKsuidId(); expect(id.length).toBe(27); - // [TEST-NEWSEED] The dedicated run-ops DB has no control-plane tables; the waitpoint's + // The dedicated run-ops DB has no control-plane tables; the waitpoint's // environment/project FKs are synthetic scalar ids. const environmentId = generateKsuidId(); const projectId = generateKsuidId(); diff --git a/apps/webapp/test/streamLoader.controlPlane.test.ts b/apps/webapp/test/streamLoader.controlPlane.test.ts index fe9a0c42912..0c7e4c2be6c 100644 --- a/apps/webapp/test/streamLoader.controlPlane.test.ts +++ b/apps/webapp/test/streamLoader.controlPlane.test.ts @@ -35,7 +35,7 @@ async function seedControlPlane(prisma: PrismaClient) { return { organization, project, environment }; } -// [TEST-NEWSEED] The run lives on the dedicated run-ops client; control-plane FKs are synthetic +// The run lives on the dedicated run-ops client; control-plane FKs are synthetic // scalar ids pointing at PG14 rows (the dedicated DB has no control-plane tables). async function seedRunOpsRun( prisma: RunOpsPrismaClient, From 8ff98c283949bf3408a5754f22f3ee687ef1b1f7 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 15:32:15 +0100 Subject: [PATCH 05/15] style(run-ops): apply oxfmt Co-Authored-By: Claude Opus 4.8 (1M context) --- .../v3/services/bulk/BulkActionV2.server.ts | 5 +-- .../app/v3/services/resumeBatchRun.server.ts | 1 - .../test/idempotencyDedupResidency.test.ts | 7 +++- ...empotencyKeyConcernLegacyAuthority.test.ts | 7 +++- .../performTaskRunAlertsStoreRouting.test.ts | 4 +- .../streamRegistrationRouting.test.ts | 40 +++++++++---------- ...resetIdempotencyKeyLegacyAuthority.test.ts | 5 ++- .../test/streamLoader.controlPlane.test.ts | 7 +++- 8 files changed, 46 insertions(+), 30 deletions(-) diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index cb5c7cf0b98..093c9408eb7 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -27,10 +27,7 @@ import { formatDateTime } from "~/components/primitives/DateTime"; import pMap from "p-map"; import { type PrismaReplicaClient } from "~/db.server"; import { isSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; -import { - hydrateRunsAcrossSeam, - type SeamReadDeps, -} from "./BulkActionV2.batchReadThrough.server"; +import { hydrateRunsAcrossSeam, type SeamReadDeps } from "./BulkActionV2.batchReadThrough.server"; export type ProcessToCompletionOptions = { /** Absolute timestamp (ms) after which processing stops and returns incomplete. */ diff --git a/apps/webapp/app/v3/services/resumeBatchRun.server.ts b/apps/webapp/app/v3/services/resumeBatchRun.server.ts index 81a59f8a073..a7e42407d34 100644 --- a/apps/webapp/app/v3/services/resumeBatchRun.server.ts +++ b/apps/webapp/app/v3/services/resumeBatchRun.server.ts @@ -397,4 +397,3 @@ export class ResumeBatchRunService extends BaseService { } } } - diff --git a/apps/webapp/test/idempotencyDedupResidency.test.ts b/apps/webapp/test/idempotencyDedupResidency.test.ts index 585503ebf93..d8ab8d934cb 100644 --- a/apps/webapp/test/idempotencyDedupResidency.test.ts +++ b/apps/webapp/test/idempotencyDedupResidency.test.ts @@ -4,7 +4,12 @@ import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; import { describe, expect, vi } from "vitest"; // Stub so the runStore singleton doesn't eagerly connect at import. -vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, runOpsNewPrisma: {}, runOpsLegacyPrisma: {} })); +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, +})); // Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); diff --git a/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts index fd007c8821b..5434567d42e 100644 --- a/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts +++ b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts @@ -9,7 +9,12 @@ import { describe, expect, vi } from "vitest"; // exercised — the passed client runs the query. Mirrors the shipped // `mollifierClaimResolution` test: env-wiring mock only; the DB under test is // the real PG14 + PG17 hetero-fixture containers. -vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, runOpsNewPrisma: {}, runOpsLegacyPrisma: {} })); +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, +})); // Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts index 26f13b1c767..e6ef8105579 100644 --- a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -210,7 +210,9 @@ const TASK_RUN_CROSS_SEAM_FKS = [ async function dropTaskRunCrossSeamFks(prisma: PrismaClient) { for (const constraint of TASK_RUN_CROSS_SEAM_FKS) { - await prisma.$executeRawUnsafe(`ALTER TABLE "TaskRun" DROP CONSTRAINT IF EXISTS "${constraint}"`); + await prisma.$executeRawUnsafe( + `ALTER TABLE "TaskRun" DROP CONSTRAINT IF EXISTS "${constraint}"` + ); } } diff --git a/apps/webapp/test/realtime/streamRegistrationRouting.test.ts b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts index e086b6ce53d..6a4f5960f80 100644 --- a/apps/webapp/test/realtime/streamRegistrationRouting.test.ts +++ b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts @@ -174,26 +174,26 @@ describe("realtime stream registration — run-ops store routed writes", () => { "completed run guard issues no push", { timeout: 60_000 }, async ({ prisma17 }) => { - const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); - - const runId = "run_routed_push_completed"; - await seedRun(prisma17, { - runId, - slugSuffix: "completed17", - completedAt: new Date("2026-06-01T00:00:00.000Z"), - }); - - const streamId = "stream-late"; - const result = await routedRegisterStream(store, prisma17, runId, streamId); - - // The completedAt guard blocks the push (route returns 400). - expect(result.pushed).toBe(false); - - const row = await prisma17.taskRun.findFirst({ - where: { id: runId }, - select: { realtimeStreams: true }, - }); - expect(row?.realtimeStreams).toEqual([]); + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_completed"; + await seedRun(prisma17, { + runId, + slugSuffix: "completed17", + completedAt: new Date("2026-06-01T00:00:00.000Z"), + }); + + const streamId = "stream-late"; + const result = await routedRegisterStream(store, prisma17, runId, streamId); + + // The completedAt guard blocks the push (route returns 400). + expect(result.pushed).toBe(false); + + const row = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(row?.realtimeStreams).toEqual([]); } ); diff --git a/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts index 36e4c3f9e34..04c442ca187 100644 --- a/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts +++ b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts @@ -32,7 +32,10 @@ function makeService(legacyPrisma: PrismaClient) { } function makeEnv(opts: { id: string; organizationId: string }): AuthenticatedEnvironment { - return { id: opts.id, organizationId: opts.organizationId } as unknown as AuthenticatedEnvironment; + return { + id: opts.id, + organizationId: opts.organizationId, + } as unknown as AuthenticatedEnvironment; } async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { diff --git a/apps/webapp/test/streamLoader.controlPlane.test.ts b/apps/webapp/test/streamLoader.controlPlane.test.ts index 0c7e4c2be6c..372ecda31c7 100644 --- a/apps/webapp/test/streamLoader.controlPlane.test.ts +++ b/apps/webapp/test/streamLoader.controlPlane.test.ts @@ -19,7 +19,12 @@ async function seedControlPlane(prisma: PrismaClient) { data: { title: `Org ${s}`, slug: `org-${s}`, streamBasinName: `basin-${s}` }, }); const project = await prisma.project.create({ - data: { name: `P ${s}`, slug: `p-${s}`, externalRef: `proj_${s}`, organizationId: organization.id }, + data: { + name: `P ${s}`, + slug: `p-${s}`, + externalRef: `proj_${s}`, + organizationId: organization.id, + }, }); const environment = await prisma.runtimeEnvironment.create({ data: { From 0d687d3728cd0fc6319427ab756694aeffe7c6bd Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 18:05:01 +0100 Subject: [PATCH 06/15] fix(run-ops split): self-default resolveWaitpointThroughReadThrough to safe run-ops clients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read-through concern defaulted both newClient and legacyReplica to $replica (control-plane), so a bare caller that omits `deps` — the waitpoints wait route — never queried the dedicated run-ops replica. A co-located, NEW-resident waitpoint minted by streams.input().wait() lives on the run-ops-new DB, so the read missed, returned null, and the route 404'd (re-serialized to 500). Match the deps the complete/callback routes pass: default newClient to runOpsNewReplica, legacyReplica to $replica, and splitEnabled to runOpsSplitReadEnabled — mirroring readThroughRun's own self-defaulting. This immunizes any bare caller (present or future) against the control-plane pin, without touching the wait route. The wait/complete/callback call sites live on a higher branch and are unchanged; complete/callback keep their explicit deps (now redundant but harmless). Adds a heteroRunOps regression case driving the concern with no `deps` via the `defaults` DI seam: proves the old $replica default misses a NEW-resident waitpoint (null) while the safe run-ops default finds it. No mocks; the fallback is exercised against real PG14/PG17 containers. Co-Authored-By: Claude Opus 4.8 (1M context) --- ...solveWaitpointThroughReadThrough.server.ts | 29 +++++++++++-- ...ointThroughReadThrough.readthrough.test.ts | 43 +++++++++++++++++++ 2 files changed, 68 insertions(+), 4 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts index ffa3c4cb068..284f842f569 100644 --- a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts +++ b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts @@ -1,5 +1,9 @@ import type { PrismaReplicaClient } from "~/db.server"; -import { $replica } from "~/db.server"; +import { + $replica as defaultLegacyReplica, + runOpsNewReplica as defaultNewClient, + runOpsSplitReadEnabled as defaultSplitReadEnabled, +} from "~/db.server"; import { readThroughRun } from "~/v3/runOpsMigration/readThrough.server"; type ResolveWaitpointDeps = { @@ -9,21 +13,38 @@ type ResolveWaitpointDeps = { isPastRetention?: (id: string) => boolean; }; +// Safe defaults matching the deps `complete`/`callback` pass, so a bare caller still fans +// out to the dedicated run-ops replica (NEW-resident waitpoints) before control-plane. +export type ResolveWaitpointReadThroughDefaults = { + newClient: PrismaReplicaClient; + legacyReplica: PrismaReplicaClient; + splitEnabled: boolean; +}; + +const productionDefaults: ResolveWaitpointReadThroughDefaults = { + newClient: defaultNewClient, + legacyReplica: defaultLegacyReplica, + splitEnabled: defaultSplitReadEnabled, +}; + export async function resolveWaitpointThroughReadThrough(opts: { waitpointId: string; environmentId: string; read: (client: PrismaReplicaClient) => Promise; deps?: ResolveWaitpointDeps; + defaults?: ResolveWaitpointReadThroughDefaults; }): Promise { + const defaults = opts.defaults ?? productionDefaults; + const result = await readThroughRun({ runId: opts.waitpointId, environmentId: opts.environmentId, readNew: (client) => opts.read(client), readLegacy: (replica) => opts.read(replica), deps: { - splitEnabled: opts.deps?.splitEnabled, - newClient: opts.deps?.newClient ?? $replica, - legacyReplica: opts.deps?.legacyReplica ?? $replica, + splitEnabled: opts.deps?.splitEnabled ?? defaults.splitEnabled, + newClient: opts.deps?.newClient ?? defaults.newClient, + legacyReplica: opts.deps?.legacyReplica ?? defaults.legacyReplica, isPastRetention: opts.deps?.isPastRetention, }, }); diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts index e580023c1b2..c6df1474886 100644 --- a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -151,6 +151,49 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run } ); + heteroRunOpsPostgresTest( + "bare caller (no deps) resolves a NEW-resident waitpoint via the safe run-ops defaults", + async ({ prisma17, prisma14 }) => { + // The bare wait route passes NO `deps`; the `defaults` DI seam models old vs new + // fallback against containers, avoiding the real db.server topology. + const id = generateKsuidId(); + expect(id.length).toBe(27); + const environmentId = generateKsuidId(); + const projectId = generateKsuidId(); + const seeded = await seedWaitpoint(prisma17, id, { id: environmentId, projectId }); + + // FAIL-BEFORE: old default pinned newClient to control-plane ($replica ≈ prisma14) → miss. + const oldDefaultResult = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + defaults: { + newClient: prisma14 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + splitEnabled: true, + }, + }); + expect(oldDefaultResult).toBeNull(); + + // PASS-AFTER: safe default routes newClient to the run-ops replica (runOpsNewReplica ≈ prisma17). + const safeDefaultResult = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + defaults: { + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + splitEnabled: true, + }, + }); + + expect(safeDefaultResult).not.toBeNull(); + expect(safeDefaultResult!.id).toBe(seeded.id); + expect(safeDefaultResult!.projectId).toBe(projectId); + expect(safeDefaultResult!.environmentId).toBe(environmentId); + } + ); + heteroRunOpsPostgresTest("not-found maps to null (no throw)", async ({ prisma17, prisma14 }) => { const id = generateLegacyCuid(); const { environment } = await seedOrgProjectEnv(prisma14, "nf"); From 1fc0e353dcb2af55bc896a58b0536a7c4b6ab839 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 20:08:45 +0100 Subject: [PATCH 07/15] fix(run-ops split): route idempotent-run waitpoint block and batch-service prisma to the resolved store - Block the idempotent parent run's waitpoint via the residency-resolved dedup client instead of the fallback prisma, so the write lands on the store that owns the parent run. - Pass the caller-provided _prisma into WithRunEngine so a custom store isn't silently overridden by the module singleton. - Throw when a run-backed alert's environment can't be resolved instead of marking it SENT, so a transient replica miss doesn't permanently suppress the alert. - Pin splitEnabled:false in the waitpoint passthrough test so it exercises single-DB behaviour rather than relying on ksuid residency. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../concerns/idempotencyKeys.server.ts | 2 +- .../runEngine/services/batchTrigger.server.ts | 2 +- .../v3/services/alerts/deliverAlert.server.ts | 35 +++++++++++-------- ...ointThroughReadThrough.readthrough.test.ts | 1 + 4 files changed, 23 insertions(+), 17 deletions(-) diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 1a50942b4e5..c856f67af08 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -287,7 +287,7 @@ export class IdempotencyKeyConcern { : undefined, projectId: request.environment.projectId, organizationId: request.environment.organizationId, - tx: this.prisma, + tx: dedupClient, }); } ); diff --git a/apps/webapp/app/runEngine/services/batchTrigger.server.ts b/apps/webapp/app/runEngine/services/batchTrigger.server.ts index 1a54815a26b..772770becc8 100644 --- a/apps/webapp/app/runEngine/services/batchTrigger.server.ts +++ b/apps/webapp/app/runEngine/services/batchTrigger.server.ts @@ -71,7 +71,7 @@ export class RunEngineBatchTriggerService extends WithRunEngine { protected readonly _prisma: PrismaClientOrTransaction = prisma, engine?: RunEngine ) { - super({ prisma, engine }); + super({ prisma: _prisma, engine }); // Eric note: We need to force sequential processing because when doing parallel, we end up with high-contention on the parent run lock // becuase we are triggering a lot of runs at once, and each one is trying to lock the parent run. diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 94e9ab8fc31..86140b4d557 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -190,26 +190,31 @@ export class DeliverAlertService extends BaseService { const env = await this.#controlPlaneResolver.resolveAuthenticatedEnv( resolvedTaskRun.runtimeEnvironmentId ); + + if (!env) { + throw new Error( + `Could not resolve environment ${resolvedTaskRun.runtimeEnvironmentId} for alert ${alertId}` + ); + } + const lockedWorker = await this.#controlPlaneResolver.resolveRunLockedWorker({ lockedById: resolvedTaskRun.lockedById, lockedToVersionId: resolvedTaskRun.lockedToVersionId, }); - if (env) { - taskRun = { - ...resolvedTaskRun, - runtimeEnvironment: { type: env.type, branchName: env.branchName }, - lockedBy: lockedWorker?.lockedBy - ? { - filePath: lockedWorker.lockedBy.filePath, - exportName: lockedWorker.lockedBy.exportName, - } - : null, - lockedToVersion: lockedWorker?.lockedToVersion - ? { version: lockedWorker.lockedToVersion.version } - : null, - }; - } + taskRun = { + ...resolvedTaskRun, + runtimeEnvironment: { type: env.type, branchName: env.branchName }, + lockedBy: lockedWorker?.lockedBy + ? { + filePath: lockedWorker.lockedBy.filePath, + exportName: lockedWorker.lockedBy.exportName, + } + : null, + lockedToVersion: lockedWorker?.lockedToVersion + ? { version: lockedWorker.lockedToVersion.version } + : null, + }; } } diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts index c6df1474886..3b3ec417895 100644 --- a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -230,6 +230,7 @@ describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run environmentId: environment.id, read: read(id, environment.id), deps: { + splitEnabled: false, newClient: single.handle, legacyReplica: legacy.handle, }, From c34fa097f8ed792e0fb6b28128751c3ff54c75e9 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 21:20:58 +0100 Subject: [PATCH 08/15] test(run-ops split): repair mollifier claim-resolution db.server mock The write-path split added static `runOpsLegacyPrisma`/`runOpsNewPrisma` imports to idempotencyKeys.server.ts, which this test loads. vitest validates every named import against the `~/db.server` mock, so the mock now errored on the missing run-ops singletons. Add the four run-ops exports (empty stubs, same boundary pattern as the batchTriggerV3 residency test) and pin isSplitEnabled() to false so the dedup routing deterministically returns the injected fake prisma regardless of the ambient RUN_OPS_SPLIT_ENABLED. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../test/mollifierClaimResolution.test.ts | 20 +++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/apps/webapp/test/mollifierClaimResolution.test.ts b/apps/webapp/test/mollifierClaimResolution.test.ts index 7a2a0c1e546..c35c24c1c84 100644 --- a/apps/webapp/test/mollifierClaimResolution.test.ts +++ b/apps/webapp/test/mollifierClaimResolution.test.ts @@ -3,8 +3,17 @@ import { describe, expect, it, vi } from "vitest"; // Stub `~/db.server` before importing the concern — the real module // eagerly calls `prisma.$connect()` at singleton construction, which // would fail without a database. The concern under test receives its -// prisma via the constructor, so the stub is never used by the code path. -vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); +// prisma via the constructor, so these empty stubs are never used by the +// tested path; the run-ops singletons only satisfy the concern's static +// imports (vitest validates every named import against the mock). +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); // The IdempotencyKeyConcern resolves the pre-gate claim through the // global mollifier buffer (`getMollifierBuffer`), shared by both @@ -22,6 +31,13 @@ vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ vi.mock("~/v3/mollifier/mollifierGate.server", () => ({ makeResolveMollifierFlag: () => async () => h.orgFlag, })); +// Pin the idempotency dedup routing to the injected fake prisma: split OFF +// makes resolveIdempotencyDedupClient return the concern's constructor client, +// so these tests exercise claim resolution deterministically regardless of the +// ambient RUN_OPS_SPLIT_ENABLED (the split path routes to the empty runOps mocks). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ + isSplitEnabled: async () => false, +})); import type { MollifierBuffer } from "@trigger.dev/redis-worker"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; From 43426e9dec85d702f7180045f62975da98b32378 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 22:41:37 +0100 Subject: [PATCH 09/15] test(webapp): mock eager-Redis import-time singletons in global test setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Worker/engine/marqs/pubsub/socket singletons each construct an ioredis client at import time (singleton() + no lazyConnect), so any test importing the service graph opened real Redis connections on import. In CI there is no Redis, so these accumulate infinite-retry clients across a shard and take the suite down (locally they pass only because dev Redis is up). Globally mock the eager-Redis modules to no-op stubs in test/setup.ts: commonWorker, batchTriggerWorker, legacyRunEngineWorker, alertsWorker, the RunEngine and MarQS singletons, devPubSub and the socket.io server. Only these singletons are mocked — never the run store (~/v3/runStore.server, ~/db.server), which store-routing/residency tests need real against testcontainer Postgres. Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/webapp/test/setup.ts | 45 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index 607ad78f3a9..6acf266c082 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -2,5 +2,50 @@ // EnvironmentSchema.parse(process.env) succeeds in vitest workers. import { config } from "dotenv"; import path from "node:path"; +import { vi } from "vitest"; config({ path: path.resolve(__dirname, "../.env") }); + +// Worker singletons construct a RedisWorker at import time whose ioredis client +// connects eagerly, so any test importing the service graph opens real Redis +// connections on import — which floods and fails in CI (no Redis). Mock them to +// no-op stubs. Only the worker modules are mocked, never the run store +// (~/v3/runStore.server, ~/db.server), which store-routing tests need real. +function createWorkerStub() { + return { + start: vi.fn(), + stop: vi.fn(), + enqueue: vi.fn().mockResolvedValue(undefined), + enqueueOnce: vi.fn().mockResolvedValue(undefined), + reschedule: vi.fn().mockResolvedValue(undefined), + cancel: vi.fn().mockResolvedValue(undefined), + ack: vi.fn().mockResolvedValue(undefined), + }; +} + +vi.mock("~/v3/commonWorker.server", () => ({ commonWorker: createWorkerStub() })); +vi.mock("~/v3/batchTriggerWorker.server", () => ({ batchTriggerWorker: createWorkerStub() })); +vi.mock("~/v3/legacyRunEngineWorker.server", () => ({ + legacyRunEngineWorker: createWorkerStub(), +})); +vi.mock("~/v3/alertsWorker.server", () => ({ alertsWorker: createWorkerStub() })); + +// RunEngine, MarQS, devPubSub and the socket.io server are further singletons +// that open eager ioredis connections at import via the same pattern. No test +// uses these app-level singletons directly (store-routing tests build their own +// engine and run store), so stub them to no-op proxies. +const noopProxy = () => + new Proxy( + {}, + { + get: () => vi.fn().mockResolvedValue(undefined), + } + ); + +vi.mock("~/v3/runEngine.server", () => ({ engine: noopProxy() })); +vi.mock("~/v3/marqs/index.server", () => ({ marqs: noopProxy(), MarQS: class {} })); +vi.mock("~/v3/marqs/devPubSub.server", () => ({ devPubSub: noopProxy() })); +vi.mock("~/v3/handleSocketIo.server", () => ({ + socketIo: noopProxy(), + roomFromFriendlyRunId: (id: string) => `room:${id}`, +})); From 3c57e4391a4bc6ebe97fc198746886652b088d45 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Thu, 2 Jul 2026 23:55:15 +0100 Subject: [PATCH 10/15] =?UTF-8?q?test(webapp):=20stop=20CI-fatal=20env-Red?= =?UTF-8?q?is=20dials=20in=20unit=20tests=20=E2=80=94=20force=20lazyConnec?= =?UTF-8?q?t=20+=20stub=20runtime=20Redis=20singletons?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The setup-file mocks of the six eager worker/engine singletons were not enough: CI shards still flooded ECONNREFUSED/maxRetries. Two further classes of env-Redis usage survived them, reproduced locally by running the failing shards with REDIS_PORT pointed at a dead port: 1. Import-time construction: ~15 more singletons (platform cache, billing-limit reconcile queue, alerts rate limiter, DevPresence, auto-increment counter, s2 token cache, v1 streams cache, ...) build ioredis clients at module import, and ioredis dials on construction. A global ioredis mock now forces lazyConnect: true so clients only dial on first command — testcontainer-backed tests are unaffected (their first command connects as before). 2. Runtime commands inside code under test: tracePubSub.publish() (eventRepository writes), alertsRateLimiter.check() (deliverAlert) and the task metadata cache each issue commands against env-configured Redis mid-test; every command burns ~20 reconnect cycles before its error surfaces, which times the tests out. These three modules are now stubbed (metadata cache pinned to its Noop implementation, which is what CI's unset env resolves to anyway). Verified: webapp shards 2/5/6/8 (the ones failing on the pr06+ stack) run green with Redis pointed at a dead port, and shards 2/8 stay green against live Redis (store-routing suites still exercise the real run store). Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/webapp/test/setup.ts | 85 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index 6acf266c082..421ab44d45f 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -42,6 +42,91 @@ const noopProxy = () => } ); +// Beyond the modules mocked above, dozens more app modules construct an +// ioredis client at import time pointed at env-configured Redis, and ioredis +// dials on construction — in CI (no Redis service) that floods ECONNREFUSED at +// shard scale. Force `lazyConnect: true` on every client instead: import-time +// singletons construct but never dial, while anything that actually issues a +// command (tests against live testcontainers) connects on first command +// exactly as before. +vi.mock("ioredis", async (importOriginal) => { + const actual = await importOriginal(); + + // Normalize ioredis's overloaded ctor args — (), (port), (path), + // (port, host), (opts), (port, opts), (port, host, opts), (path, opts) — + // so lazyConnect lands in the options object in every form. + function withLazyConnect(args: unknown[]): unknown[] { + if (args.length === 0) { + return [{ lazyConnect: true }]; + } + const last = args[args.length - 1]; + if (typeof last === "object" && last !== null) { + return [...args.slice(0, -1), { ...last, lazyConnect: true }]; + } + return [...args, { lazyConnect: true }]; + } + + class LazyRedis extends actual.Redis { + constructor(...args: unknown[]) { + // @ts-expect-error – forwarding ioredis's overloaded ctor args + super(...withLazyConnect(args)); + } + } + + class LazyCluster extends actual.Cluster { + constructor(startupNodes: unknown, options?: Record) { + // @ts-expect-error – forwarding ioredis's ctor args + super(startupNodes, { ...options, lazyConnect: true }); + } + } + + // Keep the `Redis.Cluster` static alias (`new Redis.Cluster(...)`) working. + // The base class exposes `Cluster` as a getter-only static, so define our + // own property rather than assigning through the inherited getter. + Object.defineProperty(LazyRedis, "Cluster", { value: LazyCluster }); + + return { + ...actual, + default: LazyRedis, + Redis: LazyRedis, + Cluster: LazyCluster, + }; +}); + +// alertsRateLimiter.check() is invoked at runtime by deliverAlert; against +// env-configured Redis each check burns ~20 reconnect cycles before its +// caught error, stalling alert-path tests into timeouts. Allow everything. +vi.mock("~/v3/alertsRateLimiter.server", () => ({ + alertsRateLimiter: { check: vi.fn().mockResolvedValue({ allowed: true }) }, +})); + +// tracePubSub.publish() runs inside eventRepository writes; each publish to +// env-configured Redis stalls ~20 reconnect cycles (errors are allSettled- +// swallowed but awaited), timing out any test that records trace events. +vi.mock("~/v3/services/tracePubSub.server", async () => { + const { EventEmitter } = await import("node:events"); + return { + tracePubSub: { + publish: vi.fn().mockResolvedValue(undefined), + subscribeToTrace: vi.fn().mockResolvedValue({ + unsubscribe: vi.fn().mockResolvedValue(undefined), + eventEmitter: new EventEmitter(), + }), + }, + TracePubSub: class {}, + }; +}); + +// Same runtime-stall shape for the task metadata cache (queues concern). CI +// leaves TASK_META_CACHE_REDIS_HOST unset and gets the Noop implementation; +// pin the Noop cache here so env-configured local runs behave identically. +vi.mock("~/services/taskMetadataCacheInstance.server", async () => { + const { NoopTaskMetadataCache } = await vi.importActual< + typeof import("~/services/taskMetadataCache.server") + >("~/services/taskMetadataCache.server"); + return { taskMetadataCacheInstance: new NoopTaskMetadataCache() }; +}); + vi.mock("~/v3/runEngine.server", () => ({ engine: noopProxy() })); vi.mock("~/v3/marqs/index.server", () => ({ marqs: noopProxy(), MarQS: class {} })); vi.mock("~/v3/marqs/devPubSub.server", () => ({ devPubSub: noopProxy() })); From 89b4ea9e39fb7e921309ebbf4cb6d29fe4d38da3 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Fri, 3 Jul 2026 00:53:58 +0100 Subject: [PATCH 11/15] test(webapp): stop unit tests reaching env-configured Redis/Postgres in CI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI runners have no .env, no REDIS_HOST/REDIS_PORT, and no Postgres at localhost:5432, which surfaced two failure layers that local runs mask (the dev stack answers on both): - suites transitively importing triggerTaskV1.server failed to collect because autoIncrementCounter.server.ts throws at import when REDIS_HOST/REDIS_PORT are unset (shards 2/5/6). Default the pair in test/setup.ts — the global ioredis lazyConnect mock means nothing dials. - TriggerFailedTaskService.call() resolved its event repository via getEventRepository → global prisma (feature-flag read + Prisma event repo), so in CI the swallowed connect error returned null friendlyIds (shard 8). Allow injecting the repository/store pair and bind the test to an EventRepository over the testcontainer DB. - once the cancelDevSessionRuns suite could collect, findLatestSession's hardwired global $replica was the next masked layer; give it an injectable client (defaulting to $replica) and pass the service's _replica through. Verified by replaying the exact CI env locally (.env hidden, workflow env vars, dead localhost DB, GITHUB_ACTIONS set): all four failing suites and full shards 2/5/6/8 reproduce the CI failures before and pass after. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../app/models/runtimeEnvironment.server.ts | 7 ++- .../services/triggerFailedTask.server.ts | 18 ++++-- .../services/cancelDevSessionRuns.server.ts | 2 +- .../cancelDevSessionRunsStoreRouting.test.ts | 1 + .../test/engine/triggerFailedTask.test.ts | 57 +++++++++---------- apps/webapp/test/setup.ts | 6 ++ 6 files changed, 53 insertions(+), 38 deletions(-) diff --git a/apps/webapp/app/models/runtimeEnvironment.server.ts b/apps/webapp/app/models/runtimeEnvironment.server.ts index 5e6974cb0f1..987394ea40c 100644 --- a/apps/webapp/app/models/runtimeEnvironment.server.ts +++ b/apps/webapp/app/models/runtimeEnvironment.server.ts @@ -358,8 +358,11 @@ export async function disconnectSession(environmentId: string) { return session; } -export async function findLatestSession(environmentId: string) { - const session = await $replica.runtimeEnvironmentSession.findFirst({ +export async function findLatestSession( + environmentId: string, + client: PrismaClientOrTransaction = $replica +) { + const session = await client.runtimeEnvironmentSession.findFirst({ where: { environmentId, }, diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index e42f80fcc1a..811cefd3501 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -13,6 +13,7 @@ import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedM import { getEventRepository } from "~/v3/eventRepository/index.server"; import { runStore as defaultRunStore } from "~/v3/runStore.server"; import type { RunStore } from "@internal/run-store"; +import type { IEventRepository } from "~/v3/eventRepository/eventRepository.types"; import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; @@ -65,17 +66,22 @@ export class TriggerFailedTaskService { // singleton (in production the same store the engine writes through). Injected in // tests so the read resolves on the same store the engine wrote to. private readonly runStore: RunStore; + // Defaults to getEventRepository's org-flag resolution, which reads through the + // global prisma client; tests inject a repository bound to their testcontainer DB. + private readonly eventRepository?: { repository: IEventRepository; store: string }; constructor(opts: { prisma: PrismaClientOrTransaction; engine: RunEngine; replicaPrisma?: PrismaClientOrTransaction; runStore?: RunStore; + eventRepository?: { repository: IEventRepository; store: string }; }) { this.prisma = opts.prisma; this.replicaPrisma = opts.replicaPrisma ?? opts.prisma; this.engine = opts.engine; this.runStore = opts.runStore ?? defaultRunStore; + this.eventRepository = opts.eventRepository; } // Mint a failed run's friendlyId. The id-kind decides which store the run is @@ -122,11 +128,13 @@ export class TriggerFailedTaskService { }); mintedFriendlyId = failedRunFriendlyId; - const { repository, store } = await getEventRepository( - request.environment.organization.id, - request.environment.organization.featureFlags as Record, - undefined - ); + const { repository, store } = + this.eventRepository ?? + (await getEventRepository( + request.environment.organization.id, + request.environment.organization.featureFlags as Record, + undefined + )); // Resolve parent run for rootTaskRunId and depth (same as triggerTask.server.ts) const parentRun = request.parentRunId diff --git a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts index 39271889b56..3575a750521 100644 --- a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts +++ b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts @@ -43,7 +43,7 @@ export class CancelDevSessionRunsService extends BaseService { : undefined; if (cancelledSession) { - const latestSession = await findLatestSession(cancelledSession.environmentId); + const latestSession = await findLatestSession(cancelledSession.environmentId, this._replica); if ( latestSession && diff --git a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts index 6f0abc432b8..ea29821fd19 100644 --- a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts +++ b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts @@ -226,6 +226,7 @@ describe("CancelDevSessionRunsService passthrough (single-DB)", () => { // control-plane read runs on the same prisma. const service = new CancelDevSessionRunsService({ prisma, + replica: prisma, readThroughDeps: { splitEnabled: false, newClient: prisma as unknown as PrismaReplicaClient, diff --git a/apps/webapp/test/engine/triggerFailedTask.test.ts b/apps/webapp/test/engine/triggerFailedTask.test.ts index 21fbaeb83e2..ab6951a5701 100644 --- a/apps/webapp/test/engine/triggerFailedTask.test.ts +++ b/apps/webapp/test/engine/triggerFailedTask.test.ts @@ -6,9 +6,31 @@ import { containerTest } from "@internal/testcontainers"; import { trace } from "@opentelemetry/api"; import { RunId, classifyKind, generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; import { TriggerFailedTaskService } from "../../app/runEngine/services/triggerFailedTask.server"; +import { EventRepository } from "../../app/v3/eventRepository/eventRepository.server"; vi.setConfig?.({ testTimeout: 60_000 }); +// Bind the service's trace-event writes to the testcontainer DB. Without this, +// call() resolves the repository via getEventRepository → global prisma, which +// points at a database that doesn't exist in CI. +function makeService(prisma: any, engine: RunEngine) { + return new TriggerFailedTaskService({ + prisma, + engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, + eventRepository: { + repository: new EventRepository(prisma, prisma, { + batchSize: 100, + batchInterval: 1000, + retentionInDays: 30, + partitioningEnabled: false, + }), + store: "taskEvent", + }, + }); +} + function makeEngine(prisma: any, redisOptions: any) { return new RunEngine({ prisma, @@ -40,12 +62,7 @@ describe("TriggerFailedTaskService — failed run residency", () => { const taskIdentifier = "failed-residency-task"; await setupBackgroundWorker(engine, environment, taskIdentifier); - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - }); + const service = makeService(prisma, engine); const friendlyId = await service.call({ taskId: taskIdentifier, @@ -95,12 +112,7 @@ describe("TriggerFailedTaskService — failed run residency", () => { prisma ); - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - }); + const service = makeService(prisma, engine); const friendlyId = await service.call({ taskId: taskIdentifier, @@ -153,12 +165,7 @@ describe("TriggerFailedTaskService — failed run residency", () => { prisma ); - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - }); + const service = makeService(prisma, engine); const friendlyId = await service.call({ taskId: taskIdentifier, @@ -200,12 +207,7 @@ describe("TriggerFailedTaskService — failed run residency", () => { prisma ); - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - }); + const service = makeService(prisma, engine); const friendlyId = await service.callWithoutTraceEvents({ environmentId: environment.id, @@ -232,12 +234,7 @@ describe("TriggerFailedTaskService — failed run residency", () => { const taskIdentifier = "failed-residency-task"; await setupBackgroundWorker(engine, environment, taskIdentifier); - const service = new TriggerFailedTaskService({ - prisma, - engine, - // Read the parent through the same store the engine wrote it to. - runStore: engine.runStore, - }); + const service = makeService(prisma, engine); // A well-formed ksuid parent friendlyId that was NEVER triggered → no row. // Exercises the missing-parent fallback in callWithoutTraceEvents. diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index 421ab44d45f..1c445e1512f 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -6,6 +6,12 @@ import { vi } from "vitest"; config({ path: path.resolve(__dirname, "../.env") }); +// CI has no .env and no REDIS_HOST/REDIS_PORT, so import-time guards like +// autoIncrementCounter.server.ts throw and their suites fail to collect. Default +// the pair — the ioredis mock below forces lazyConnect, so nothing ever dials. +process.env.REDIS_HOST ??= "localhost"; +process.env.REDIS_PORT ??= "6379"; + // Worker singletons construct a RedisWorker at import time whose ioredis client // connects eagerly, so any test importing the service graph opens real Redis // connections on import — which floods and fails in CI (no Redis). Mock them to From fba183a8c0510dfda09062a160af90bab53d506c Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Fri, 3 Jul 2026 08:51:15 +0100 Subject: [PATCH 12/15] chore: add server-changes for pr06 Co-Authored-By: Claude Opus 4.8 (1M context) --- .server-changes/run-ops-split-webapp-write-path.md | 6 ++++++ 1 file changed, 6 insertions(+) create mode 100644 .server-changes/run-ops-split-webapp-write-path.md diff --git a/.server-changes/run-ops-split-webapp-write-path.md b/.server-changes/run-ops-split-webapp-write-path.md new file mode 100644 index 00000000000..70d97fd09b3 --- /dev/null +++ b/.server-changes/run-ops-split-webapp-write-path.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Route the webapp write path — trigger/batch run minting, idempotency-key resolution, and run lifecycle writes — through the run store so runs can be created and mutated on the dedicated run-ops database. From 30bfdda474257c5899a8db54170384b9faa48ebd Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Fri, 3 Jul 2026 11:23:20 +0100 Subject: [PATCH 13/15] chore(run-ops): fix lint/format for main lint rules Co-Authored-By: Claude Opus 4.8 (1M context) --- .../app/runEngine/services/createBatch.server.ts | 2 +- .../app/runEngine/services/triggerTask.server.test.ts | 7 +++---- .../services/runsRepository/runsRepository.server.ts | 2 +- .../app/v3/services/alerts/deliverAlert.server.ts | 9 ++++----- .../v3/services/alerts/performTaskRunAlerts.server.ts | 6 ++---- .../test/performTaskRunAlertsStoreRouting.test.ts | 2 +- apps/webapp/test/runsRepositoryCpres.test.ts | 2 +- apps/webapp/test/sessions.readthrough.test.ts | 2 +- apps/webapp/test/setup.ts | 10 ++++++---- 9 files changed, 20 insertions(+), 22 deletions(-) diff --git a/apps/webapp/app/runEngine/services/createBatch.server.ts b/apps/webapp/app/runEngine/services/createBatch.server.ts index f738b07997e..0095c48f2b5 100644 --- a/apps/webapp/app/runEngine/services/createBatch.server.ts +++ b/apps/webapp/app/runEngine/services/createBatch.server.ts @@ -1,6 +1,6 @@ import type { InitializeBatchOptions } from "@internal/run-engine"; import { type CreateBatchRequestBody, type CreateBatchResponse } from "@trigger.dev/core/v3"; -import { BatchId, RunId } from "@trigger.dev/core/v3/isomorphic"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; import { type BatchTaskRun, Prisma } from "@trigger.dev/database"; import { Evt } from "evt"; import { prisma, type PrismaClientOrTransaction } from "~/db.server"; diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts index 4b6a49a4755..31c624a3864 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts @@ -28,12 +28,11 @@ import { RunEngine } from "@internal/run-engine"; import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; import { assertNonNullable, containerTest } from "@internal/testcontainers"; import { trace } from "@opentelemetry/api"; -import { IOPacket } from "@trigger.dev/core/v3"; -import { RunId } from "@trigger.dev/core/v3/isomorphic"; -import { TaskRun } from "@trigger.dev/database"; +import type { IOPacket } from "@trigger.dev/core/v3"; +import type { TaskRun } from "@trigger.dev/database"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; -import { +import type { EntitlementValidationParams, MaxAttemptsValidationParams, ParentRunValidationParams, diff --git a/apps/webapp/app/services/runsRepository/runsRepository.server.ts b/apps/webapp/app/services/runsRepository/runsRepository.server.ts index 3e3dbc5f82a..2fadb8c7108 100644 --- a/apps/webapp/app/services/runsRepository/runsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/runsRepository.server.ts @@ -8,7 +8,7 @@ import { type Prisma, TaskRunStatus } from "@trigger.dev/database"; import parseDuration from "parse-duration"; import { z } from "zod"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; -import { type PrismaClient, type PrismaClientOrTransaction } from "~/db.server"; +import { type PrismaClientOrTransaction } from "~/db.server"; import { runStore as defaultRunStore } from "~/v3/runStore.server"; import { startActiveSpan } from "~/v3/tracer.server"; import { ClickHouseRunsRepository } from "./clickhouseRunsRepository.server"; diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 86140b4d557..845430735c8 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -50,10 +50,9 @@ import { generateFriendlyId } from "~/v3/friendlyIdentifiers"; import { fromPromise } from "neverthrow"; import { BaseService } from "../baseService.server"; import { CURRENT_API_VERSION } from "~/api/versions"; -import { - ControlPlaneResolver, - controlPlaneResolver as defaultControlPlaneResolver, -} from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import type { RunStore } from "@internal/run-store"; +import type { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { controlPlaneResolver as defaultControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; // Run-ops scalars read off `alert.taskRun` downstream. The control-plane fields (env type/branch, // lockedBy file/export, lockedToVersion version) are resolved via the resolver and stitched on @@ -137,7 +136,7 @@ export class DeliverAlertService extends BaseService { opts: { prisma?: PrismaClientOrTransaction; replica?: PrismaClientOrTransaction; - runStore?: import("@internal/run-store").RunStore; + runStore?: RunStore; controlPlaneResolver?: ControlPlaneResolver; } = {} ) { diff --git a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts index 8fea2910f02..460ab5a91ed 100644 --- a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts +++ b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts @@ -2,10 +2,8 @@ import { type RunStore } from "@internal/run-store"; import { type Prisma, type ProjectAlertChannel } from "@trigger.dev/database"; import { type PrismaClientOrTransaction, type prisma } from "~/db.server"; import { alertsWorker } from "~/v3/alertsWorker.server"; -import { - ControlPlaneResolver, - controlPlaneResolver as defaultControlPlaneResolver, -} from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import type { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { controlPlaneResolver as defaultControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { BaseService } from "../baseService.server"; import { DeliverAlertService } from "./deliverAlert.server"; diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts index e6ef8105579..707a3546cf8 100644 --- a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -231,7 +231,7 @@ describe("PerformTaskRunAlertsService store routing (hetero)", () => { ); // Org/project/env + a PARENT env + the alert channel are control-plane → the control-plane DB. - const { project, organization, runtimeEnvironment } = await seedProject(prisma14, "cp"); + const { project, organization } = await seedProject(prisma14, "cp"); // A branch env whose parent type drives the channel filter (parentEnvironmentType ?? type). const parentEnv = await prisma14.runtimeEnvironment.create({ data: { diff --git a/apps/webapp/test/runsRepositoryCpres.test.ts b/apps/webapp/test/runsRepositoryCpres.test.ts index e43f9d96b93..29627f79ccd 100644 --- a/apps/webapp/test/runsRepositoryCpres.test.ts +++ b/apps/webapp/test/runsRepositoryCpres.test.ts @@ -12,7 +12,7 @@ vi.mock("~/db.server", () => ({ import { heteroRunOpsPostgresTest } from "@internal/testcontainers"; import { buildRunStore } from "~/v3/runStore.server"; import type { RunOpsPrismaClient } from "@internal/run-ops-database"; -import { PrismaClient } from "@trigger.dev/database"; +import type { PrismaClient } from "@trigger.dev/database"; import { BulkActionId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { convertRunListInputOptionsToFilterRunsOptions } from "~/services/runsRepository/runsRepository.server"; diff --git a/apps/webapp/test/sessions.readthrough.test.ts b/apps/webapp/test/sessions.readthrough.test.ts index 02835d5976f..6496baeb16f 100644 --- a/apps/webapp/test/sessions.readthrough.test.ts +++ b/apps/webapp/test/sessions.readthrough.test.ts @@ -13,7 +13,7 @@ import { heteroRunOpsPostgresTest, postgresTest } from "@internal/testcontainers import { buildRunStore } from "~/v3/runStore.server"; import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; import type { RunOpsPrismaClient } from "@internal/run-ops-database"; -import { PrismaClient } from "@trigger.dev/database"; +import type { PrismaClient } from "@trigger.dev/database"; import { resolveSessionByIdOrExternalId, serializeSessionsWithFriendlyRunIds, diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index 1c445e1512f..ccdd9f1dd0c 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -3,6 +3,8 @@ import { config } from "dotenv"; import path from "node:path"; import { vi } from "vitest"; +import type * as IORedisModule from "ioredis"; +import type * as TaskMetadataCacheModule from "~/services/taskMetadataCache.server"; config({ path: path.resolve(__dirname, "../.env") }); @@ -56,7 +58,7 @@ const noopProxy = () => // command (tests against live testcontainers) connects on first command // exactly as before. vi.mock("ioredis", async (importOriginal) => { - const actual = await importOriginal(); + const actual = await importOriginal(); // Normalize ioredis's overloaded ctor args — (), (port), (path), // (port, host), (opts), (port, opts), (port, host, opts), (path, opts) — @@ -127,9 +129,9 @@ vi.mock("~/v3/services/tracePubSub.server", async () => { // leaves TASK_META_CACHE_REDIS_HOST unset and gets the Noop implementation; // pin the Noop cache here so env-configured local runs behave identically. vi.mock("~/services/taskMetadataCacheInstance.server", async () => { - const { NoopTaskMetadataCache } = await vi.importActual< - typeof import("~/services/taskMetadataCache.server") - >("~/services/taskMetadataCache.server"); + const { NoopTaskMetadataCache } = await vi.importActual( + "~/services/taskMetadataCache.server" + ); return { taskMetadataCacheInstance: new NoopTaskMetadataCache() }; }); From c1371f25fef4e8cd62263aadaa67bacca6dbe2f1 Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Fri, 3 Jul 2026 12:06:33 +0100 Subject: [PATCH 14/15] fix(run-ops test): make engine/marqs no-op mock recursive for nested method access Co-Authored-By: Claude Opus 4.8 (1M context) --- apps/webapp/test/setup.ts | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index ccdd9f1dd0c..5824c15b900 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -42,13 +42,18 @@ vi.mock("~/v3/alertsWorker.server", () => ({ alertsWorker: createWorkerStub() }) // that open eager ioredis connections at import via the same pattern. No test // uses these app-level singletons directly (store-routing tests build their own // engine and run store), so stub them to no-op proxies. -const noopProxy = () => - new Proxy( - {}, - { - get: () => vi.fn().mockResolvedValue(undefined), - } - ); +// Recursive no-op proxy: property access at any depth returns another callable +// no-op proxy, so real service tests reaching nested singleton methods (e.g. +// engine.runQueue.updateEnvConcurrencyLimits) don't break on an intermediate stub. +type NoopProxyFn = ((...args: unknown[]) => Promise) & Record; + +const noopProxy = (): NoopProxyFn => { + const fn = () => Promise.resolve(undefined); + return new Proxy(fn, { + get: (_target, prop) => (prop === "then" ? undefined : noopProxy()), + apply: () => Promise.resolve(undefined), + }) as unknown as NoopProxyFn; +}; // Beyond the modules mocked above, dozens more app modules construct an // ioredis client at import time pointed at env-configured Redis, and ioredis From ea22f52567e137e8afbebe0252ca268692bcbdad Mon Sep 17 00:00:00 2001 From: Daniel Sutton Date: Fri, 3 Jul 2026 17:29:08 +0100 Subject: [PATCH 15/15] test(run-ops split): keyset-order hydrate + terminal-metadata read-seam regressions Two regression tests for the write-path read seams: - runsRepository: paginating the full keyset over interleaved cuid/ksuid runs enumerates every id once, no empty page, in ClickHouse (created_at DESC, run_id DESC) order -- fails if hydration reverts to lexical id desc across the id-space seam. - runReader: a NEW-resident (ksuid) run's terminal metadata hydrates through the owning store, never a generic legacy replica. Co-Authored-By: Claude Opus 4.8 (1M context) --- .../realtime/runReaderReadThrough.test.ts | 39 +++++++++ .../test/runsRepository.readthrough.test.ts | 84 +++++++++++++++++++ 2 files changed, 123 insertions(+) diff --git a/apps/webapp/test/realtime/runReaderReadThrough.test.ts b/apps/webapp/test/realtime/runReaderReadThrough.test.ts index 7f53b4c7d56..88c4e11e27c 100644 --- a/apps/webapp/test/realtime/runReaderReadThrough.test.ts +++ b/apps/webapp/test/realtime/runReaderReadThrough.test.ts @@ -310,6 +310,45 @@ describe("RunHydrator read-route through the runStore seam (legacy + new)", () = } ); + // Terminal-metadata read-seam: a NEW-resident (ksuid) run's final metadata is hydrated through + // the owning (NEW) store, not off a generic legacy replica. Asserts read-seam ROUTING for the + // terminal read; it is not a hard ordering/consistency guarantee about when the terminal marker + // and the row's terminal columns converge. + heteroPostgresTest( + "terminal hydrate reads a NEW-resident run's final metadata through the owning store", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "term17"); + const envId = seed17.environment.id; + const terminalRunId = newId("terminal_run"); + + // A terminal run with its final metadata persisted on the NEW store only. + await seedRun(prisma17, { + runId: terminalRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + output: '{"result":"final"}', + metadata: '{"done":true}', + }); + + // A generic legacy replica would miss the NEW row entirely — the metadata must come off NEW. + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 0 }); + + const snapshot = await hydrator.getRunById(envId, terminalRunId); + expect(snapshot?.id).toBe(terminalRunId); + expect(snapshot?.metadata).toBe('{"done":true}'); + expect(snapshot?.output).toBe('{"result":"final"}'); + // The NEW-residency terminal read never touched the legacy slot. + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); + // A live-migrated run continues streaming across the seam crossing with no gap. heteroPostgresTest( "live-migrated run continues streaming across the seam crossing", diff --git a/apps/webapp/test/runsRepository.readthrough.test.ts b/apps/webapp/test/runsRepository.readthrough.test.ts index e5dc97f2d8b..fd3f342f9c6 100644 --- a/apps/webapp/test/runsRepository.readthrough.test.ts +++ b/apps/webapp/test/runsRepository.readthrough.test.ts @@ -91,6 +91,7 @@ async function createRun( prisma: PrismaClient, ctx: SeedContext, run: { + id?: string; friendlyId: string; taskIdentifier?: string; status?: any; @@ -100,6 +101,7 @@ async function createRun( ) { return prisma.taskRun.create({ data: { + ...(run.id ? { id: run.id } : {}), friendlyId: run.friendlyId, taskIdentifier: run.taskIdentifier ?? "my-task", status: run.status ?? "PENDING", @@ -349,4 +351,86 @@ describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", } } ); + + // Full-keyset walk over interleaved cuid + ksuid ids: hydration must preserve the ClickHouse + // (created_at DESC, run_id DESC) order across the id-space seam. A hydrate that reverts to lexical + // `id desc` splits the two id-spaces into separate blocks, so it would fail this walk. + replicationContainerTest( + "paginating the full keyset enumerates every interleaved cuid/ksuid id once, in CH keyset order, with no empty page", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "keysetwalk"); + + // cuid-shaped ids (25 chars, "c" prefix) and ksuid-shaped ids (27 chars, "2" prefix). Lexical + // `id desc` groups all "c" ids ahead of all "2" ids; the created_at order below interleaves + // them, so the two orders genuinely differ across the seam. + const cuid = (n: number) => `c${String(n).padStart(24, "0")}`; + const ksuid = (n: number) => `2${String(n).padStart(26, "0")}`; + + // created_at DESC order (index 0 = most recent) interleaves the id-spaces: ksuid, cuid, + // ksuid, cuid, ksuid, cuid. + const now = Date.now(); + const seeds = [ + { id: ksuid(6), friendlyId: "run_k6", createdAt: new Date(now - 0 * 60_000) }, + { id: cuid(5), friendlyId: "run_c5", createdAt: new Date(now - 1 * 60_000) }, + { id: ksuid(4), friendlyId: "run_k4", createdAt: new Date(now - 2 * 60_000) }, + { id: cuid(3), friendlyId: "run_c3", createdAt: new Date(now - 3 * 60_000) }, + { id: ksuid(2), friendlyId: "run_k2", createdAt: new Date(now - 4 * 60_000) }, + { id: cuid(1), friendlyId: "run_c1", createdAt: new Date(now - 5 * 60_000) }, + ]; + for (const s of seeds) { + await createRun(prisma, ctx, s); + } + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ prisma, clickhouse }); + + // The authoritative order the hydrate must reproduce: exactly the CH keyset the id-list scan + // returns (created_at DESC, run_id DESC). Lexical id-desc of the same ids differs from this. + const chOrder = await runsRepository.listRunIds({ + page: { size: 100 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + const expectedOrder = chOrder.runIds; + const lexicalIdDesc = [...expectedOrder].sort((a, b) => (a < b ? 1 : a > b ? -1 : 0)); + expect(expectedOrder).not.toEqual(lexicalIdDesc); // the seam actually separates the two orders + + // Walk the whole keyset a page at a time. + const walked: string[] = []; + let cursor: string | undefined; + let pages = 0; + while (true) { + const { runs, pagination } = await runsRepository.listRuns({ + page: { size: 2, cursor }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + pages++; + expect(pages).toBeLessThan(20); // guard against a non-terminating walk + + for (const r of runs) walked.push(r.id); + + if (!pagination.nextCursor) break; + // No empty page may be returned while more pages exist. + expect(runs.length).toBeGreaterThan(0); + cursor = pagination.nextCursor; + } + + // Every seeded id enumerated exactly once. + expect(walked.slice().sort()).toEqual(seeds.map((s) => s.id).sort()); + expect(new Set(walked).size).toBe(seeds.length); + // The emitted order equals the CH keyset order across the id-space seam. + expect(walked).toEqual(expectedOrder); + } + ); });