diff --git a/.server-changes/run-ops-split-webapp-write-path.md b/.server-changes/run-ops-split-webapp-write-path.md new file mode 100644 index 00000000000..70d97fd09b3 --- /dev/null +++ b/.server-changes/run-ops-split-webapp-write-path.md @@ -0,0 +1,6 @@ +--- +area: webapp +type: feature +--- + +Route the webapp write path — trigger/batch run minting, idempotency-key resolution, and run lifecycle writes — through the run store so runs can be created and mutated on the dedicated run-ops database. diff --git a/apps/webapp/app/models/runtimeEnvironment.server.ts b/apps/webapp/app/models/runtimeEnvironment.server.ts index 5e6974cb0f1..987394ea40c 100644 --- a/apps/webapp/app/models/runtimeEnvironment.server.ts +++ b/apps/webapp/app/models/runtimeEnvironment.server.ts @@ -358,8 +358,11 @@ export async function disconnectSession(environmentId: string) { return session; } -export async function findLatestSession(environmentId: string) { - const session = await $replica.runtimeEnvironmentSession.findFirst({ +export async function findLatestSession( + environmentId: string, + client: PrismaClientOrTransaction = $replica +) { + const session = await client.runtimeEnvironmentSession.findFirst({ where: { environmentId, }, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts index 3e049ceb37e..c856f67af08 100644 --- a/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts +++ b/apps/webapp/app/runEngine/concerns/idempotencyKeys.server.ts @@ -11,6 +11,10 @@ import { findRunByIdWithMollifierFallback } from "~/v3/mollifier/readFallback.se import { claimOrAwait } from "~/v3/mollifier/idempotencyClaim.server"; import { makeResolveMollifierFlag } from "~/v3/mollifier/mollifierGate.server"; import { runStore } from "~/v3/runStore.server"; +import { runOpsLegacyPrisma, runOpsNewPrisma } from "~/db.server"; +import { isSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { resolveIdempotencyDedupClient } from "./idempotencyResidency.server"; import type { TraceEventConcern, TriggerTaskRequest } from "../types"; // In-memory per-org mollifier-enabled check, shared with `evaluateGate` @@ -147,6 +151,28 @@ export class IdempotencyKeyConcern { return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; } + // Probe and clears must hit the DB where the would-be run will physically live. + const dedupClient = await resolveIdempotencyDedupClient( + { + environmentForMint: { + organizationId: request.environment.organizationId, + id: request.environment.id, + orgFeatureFlags: request.environment.organization?.featureFlags, + }, + parentRunFriendlyId: request.body.options?.parentRunId, + }, + { + isSplitEnabled, + fallbackClient: this.prisma, + newClient: runOpsNewPrisma, + legacyClient: runOpsLegacyPrisma, + resolveMintKind: resolveRunIdMintKind, + // `isMigrated` is intentionally omitted: until a child of a swept + // legacy-id parent can be born on the new DB, the swept-marker override + // would never change the answer, so a child routes by parent id-shape. + } + ); + const existingRun = idempotencyKey ? await runStore.findRun( { @@ -159,7 +185,7 @@ export class IdempotencyKeyConcern { associatedWaitpoint: true, }, }, - this.prisma + dedupClient ) : undefined; @@ -193,7 +219,7 @@ export class IdempotencyKeyConcern { // Update the existing run to remove the idempotency key await runStore.clearIdempotencyKey( { byId: { runId: existingRun.id, idempotencyKey } }, - this.prisma + dedupClient ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; @@ -210,7 +236,7 @@ export class IdempotencyKeyConcern { // Update the existing run to remove the idempotency key await runStore.clearIdempotencyKey( { byId: { runId: existingRun.id, idempotencyKey } }, - this.prisma + dedupClient ); return { isCached: false, idempotencyKey, idempotencyKeyExpiresAt }; @@ -249,7 +275,6 @@ export class IdempotencyKeyConcern { ? `${event.traceparent.spanId}:${event.spanId}` : event.spanId; - //block run with waitpoint await this.engine.blockRunWithWaitpoint({ runId: RunId.fromFriendlyId(parentRunId), waitpoints: associatedWaitpoint!.id, @@ -262,7 +287,7 @@ export class IdempotencyKeyConcern { : undefined, projectId: request.environment.projectId, organizationId: request.environment.organizationId, - tx: this.prisma, + tx: dedupClient, }); } ); @@ -277,24 +302,13 @@ export class IdempotencyKeyConcern { // (resumeParentOnCompletion) — that path bypasses the gate entirely // and its existing PG-side dedup is sufficient. // - // Also gated on the same per-org mollifier flag the gate uses: when - // `TRIGGER_MOLLIFIER_ENABLED=1` globally for staged rollout, the buffer - // singleton is constructed and `claimOrAwait` would otherwise issue a - // Redis SETNX for EVERY idempotency-keyed trigger — including orgs - // that haven't opted in. Those orgs never enter the mollify branch - // (the gate always returns pass_through for them), so there's no - // buffer activity to serialise against; PG's unique constraint - // already deduplicates concurrent same-key races. Resolving the org - // flag is a pure in-memory read of `Organization.featureFlags` — no - // DB query, same predicate the gate uses — keeping the claim's Redis - // RTT off the hot path for non-opted-in orgs during incremental - // rollout. - // Match the gate's bypass list (`mollifierGate.server.ts:158-175`). - // debounce + oneTimeUseToken triggers always return pass_through from - // the gate, so claiming a Redis SETNX here is wasted RTT on the - // trigger hot path. Excluding them keeps the claim aligned with the - // gate — if the gate would never mollify the request, there's no - // buffer to serialise against. + // Gated on the same per-org mollifier flag the gate uses, and the same + // bypass list (debounce + oneTimeUseToken): if the gate would never mollify + // the request, there's no buffer to serialise against and PG's unique + // constraint already deduplicates concurrent same-key races. Skipping the + // claim's Redis SETNX keeps its RTT off the hot path for those requests + // during staged rollout. The org-flag check is a pure in-memory read of + // `Organization.featureFlags`, no DB query. const claimEligible = !request.body.options?.resumeParentOnCompletion && !request.body.options?.debounce && @@ -336,7 +350,7 @@ export class IdempotencyKeyConcern { taskIdentifier: request.taskId, }, { include: { associatedWaitpoint: true } }, - this.prisma + dedupClient ); if (writerRun) { return { isCached: true, run: writerRun }; @@ -350,27 +364,18 @@ export class IdempotencyKeyConcern { if (buffered) { return { isCached: true, run: buffered }; } - // Claim resolved to a runId nothing can find — the run was - // genuinely lost (claimant errored after publish, drain failed, - // or both the PG row and buffer entry TTL'd out). This is - // terminal, not transient: `lookupIdempotency` self-heals a - // dangling pointer, and `ack` keeps the entry hash as a - // read-fallback past the PG write, so re-polling cannot conjure - // a run that is gone. Falling through to a fresh trigger is the - // correct recovery. + // Claim resolved to a runId nothing can find — the run was genuinely + // lost (claimant errored after publish, or both the PG row and buffer + // entry TTL'd out). Terminal, not transient, so falling through to a + // fresh trigger is the correct recovery. // - // Why falling through claimless is safe (no duplicate runs): - // concurrent triggers that also fall through here converge on a - // single run via the same dedup backstops the claim layer relies - // on — the PG unique constraint on the idempotency key - // (RunDuplicateIdempotencyKeyError → retry resolves to the - // winner) for the pass-through path, and `accept`'s idempotency - // SETNX (`duplicate_idempotency`) for the mollify path. Once the - // first fall-through commits a run, later callers find it via the - // writer-PG / buffer lookups above despite the stale `resolved:` - // slot, which the slot's TTL clears within ~30s. The residual - // cost is a few redundant (deduped) trigger attempts in that - // window, not duplicate runs. + // Falling through claimless doesn't duplicate runs: concurrent + // fall-throughs converge on one run via the same dedup backstops the + // claim layer relies on — PG's unique constraint on the idempotency key + // (pass-through path) and `accept`'s SETNX (mollify path). Once the + // first commits, later callers find it via the writer-PG / buffer + // lookups above despite the stale `resolved:` slot (cleared by its ~30s + // TTL). Residual cost is a few deduped trigger attempts, not dup runs. logger.warn("idempotency claim resolved but runId not findable", { envId: request.environment.id, taskIdentifier: request.taskId, diff --git a/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts new file mode 100644 index 00000000000..ee128224d10 --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.test.ts @@ -0,0 +1,100 @@ +import { describe, expect, it } from "vitest"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { + resolveIdempotencyDedupClient, + type ResolveIdempotencyClientDeps, +} from "./idempotencyResidency.server"; + +// Distinct sentinel objects so we can assert WHICH client was selected by reference. +const FALLBACK = { __tag: "fallback" } as never; +const NEW_CLIENT = { __tag: "new" } as never; +const LEGACY_CLIENT = { __tag: "legacy" } as never; + +function makeDeps(over: Partial): ResolveIdempotencyClientDeps { + return { + isSplitEnabled: async () => true, + fallbackClient: FALLBACK, + newClient: NEW_CLIENT, + legacyClient: LEGACY_CLIENT, + resolveMintKind: async () => "ksuid", + classify: (id) => { + if (id.length === 27) return "NEW"; + if (id.length === 25) return "LEGACY"; + throw new Error(`unclassifiable: ${id.length}`); + }, + isMigrated: undefined, + ...over, + }; +} + +const env = { organizationId: "org_1", id: "env_1", orgFeatureFlags: {} }; + +describe("resolveIdempotencyDedupClient", () => { + it("returns the fallback client unchanged when split is disabled", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ isSplitEnabled: async () => false }) + ); + expect(client).toBe(FALLBACK); + }); + + it("routes a root run to the NEW client when the env mints ksuid", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ resolveMintKind: async () => "ksuid" }) + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a root run to the LEGACY client when the env mints cuid", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: undefined }, + makeDeps({ resolveMintKind: async () => "cuid" }) + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("routes a child to the NEW client when the ksuid parent is NEW-resident", async () => { + const ksuidParent = RunId.toFriendlyId("a".repeat(27)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: ksuidParent }, + makeDeps({ resolveMintKind: async () => "cuid" }) // mint flag must NOT win for a child + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a child to the LEGACY client when the cuid parent is LEGACY-resident", async () => { + const cuidParent = RunId.toFriendlyId("b".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ resolveMintKind: async () => "ksuid" }) // mint flag must NOT win for a child + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("routes a swept (migrated) cuid-parent child to the NEW client", async () => { + const cuidParent = RunId.toFriendlyId("c".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ isMigrated: async () => true }) + ); + expect(client).toBe(NEW_CLIENT); + }); + + it("routes a non-migrated cuid-parent child to the LEGACY client even when isMigrated is provided", async () => { + const cuidParent = RunId.toFriendlyId("d".repeat(25)); + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: cuidParent }, + makeDeps({ isMigrated: async () => false }) + ); + expect(client).toBe(LEGACY_CLIENT); + }); + + it("falls back to the fallback client when a present parent id is unclassifiable", async () => { + const client = await resolveIdempotencyDedupClient( + { environmentForMint: env, parentRunFriendlyId: "run_not-a-valid-length" }, + makeDeps({}) + ); + expect(client).toBe(FALLBACK); + }); +}); diff --git a/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts new file mode 100644 index 00000000000..38ef4755844 --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/idempotencyResidency.server.ts @@ -0,0 +1,56 @@ +import { ownerEngine, RunId, type Residency } from "@trigger.dev/core/v3/isomorphic"; +import type { PrismaClientOrTransaction } from "@trigger.dev/database"; + +type MintKind = "cuid" | "ksuid"; + +export type ResolveIdempotencyClientDeps = { + isSplitEnabled: () => Promise; + fallbackClient: PrismaClientOrTransaction; + newClient: PrismaClientOrTransaction; + legacyClient: PrismaClientOrTransaction; + resolveMintKind: (environment: { + organizationId: string; + id: string; + orgFeatureFlags?: unknown; + }) => Promise; + classify?: (id: string) => Residency; + isMigrated?: (id: string) => Promise; +}; + +export async function resolveIdempotencyDedupClient( + args: { + environmentForMint: { organizationId: string; id: string; orgFeatureFlags?: unknown }; + parentRunFriendlyId: string | undefined; + }, + deps: ResolveIdempotencyClientDeps +): Promise { + if (!(await deps.isSplitEnabled())) { + return deps.fallbackClient; + } + + const classify = deps.classify ?? ownerEngine; + const clientFor = (residency: Residency): PrismaClientOrTransaction => + residency === "NEW" ? deps.newClient : deps.legacyClient; + + if (args.parentRunFriendlyId) { + let parentInternalId: string; + try { + parentInternalId = RunId.fromFriendlyId(args.parentRunFriendlyId); + } catch { + return deps.fallbackClient; + } + let residency: Residency; + try { + residency = classify(parentInternalId); + } catch { + return deps.fallbackClient; + } + if (residency === "LEGACY" && deps.isMigrated && (await deps.isMigrated(parentInternalId))) { + return deps.newClient; + } + return clientFor(residency); + } + + const kind = await deps.resolveMintKind(args.environmentForMint); + return clientFor(kind === "ksuid" ? "NEW" : "LEGACY"); +} diff --git a/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts new file mode 100644 index 00000000000..284f842f569 --- /dev/null +++ b/apps/webapp/app/runEngine/concerns/resolveWaitpointThroughReadThrough.server.ts @@ -0,0 +1,53 @@ +import type { PrismaReplicaClient } from "~/db.server"; +import { + $replica as defaultLegacyReplica, + runOpsNewReplica as defaultNewClient, + runOpsSplitReadEnabled as defaultSplitReadEnabled, +} from "~/db.server"; +import { readThroughRun } from "~/v3/runOpsMigration/readThrough.server"; + +type ResolveWaitpointDeps = { + newClient?: PrismaReplicaClient; + legacyReplica?: PrismaReplicaClient; + splitEnabled?: boolean; + isPastRetention?: (id: string) => boolean; +}; + +// Safe defaults matching the deps `complete`/`callback` pass, so a bare caller still fans +// out to the dedicated run-ops replica (NEW-resident waitpoints) before control-plane. +export type ResolveWaitpointReadThroughDefaults = { + newClient: PrismaReplicaClient; + legacyReplica: PrismaReplicaClient; + splitEnabled: boolean; +}; + +const productionDefaults: ResolveWaitpointReadThroughDefaults = { + newClient: defaultNewClient, + legacyReplica: defaultLegacyReplica, + splitEnabled: defaultSplitReadEnabled, +}; + +export async function resolveWaitpointThroughReadThrough(opts: { + waitpointId: string; + environmentId: string; + read: (client: PrismaReplicaClient) => Promise; + deps?: ResolveWaitpointDeps; + defaults?: ResolveWaitpointReadThroughDefaults; +}): Promise { + const defaults = opts.defaults ?? productionDefaults; + + const result = await readThroughRun({ + runId: opts.waitpointId, + environmentId: opts.environmentId, + readNew: (client) => opts.read(client), + readLegacy: (replica) => opts.read(replica), + deps: { + splitEnabled: opts.deps?.splitEnabled ?? defaults.splitEnabled, + newClient: opts.deps?.newClient ?? defaults.newClient, + legacyReplica: opts.deps?.legacyReplica ?? defaults.legacyReplica, + isPastRetention: opts.deps?.isPastRetention, + }, + }); + + return result.source === "new" || result.source === "legacy-replica" ? result.value : null; +} diff --git a/apps/webapp/app/runEngine/services/batchTrigger.server.ts b/apps/webapp/app/runEngine/services/batchTrigger.server.ts index 54a819770ef..772770becc8 100644 --- a/apps/webapp/app/runEngine/services/batchTrigger.server.ts +++ b/apps/webapp/app/runEngine/services/batchTrigger.server.ts @@ -13,13 +13,17 @@ import { Evt } from "evt"; import { z } from "zod"; import { prisma, type PrismaClientOrTransaction } from "~/db.server"; import { env } from "~/env.server"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { batchTriggerWorker } from "~/v3/batchTriggerWorker.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { downloadPacketFromObjectStore, uploadPacketToObjectStore, } from "../../v3/objectStore.server"; +import type { RunEngine } from "../../v3/runEngine.server"; import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseService.server"; import { TriggerTaskService } from "../../v3/services/triggerTask.server"; import { startActiveSpan } from "../../v3/tracer.server"; @@ -64,9 +68,10 @@ export class RunEngineBatchTriggerService extends WithRunEngine { constructor( batchProcessingStrategy?: BatchProcessingStrategy, - protected readonly _prisma: PrismaClientOrTransaction = prisma + protected readonly _prisma: PrismaClientOrTransaction = prisma, + engine?: RunEngine ) { - super({ prisma }); + super({ prisma: _prisma, engine }); // Eric note: We need to force sequential processing because when doing parallel, we end up with high-contention on the parent run lock // becuase we are triggering a lot of runs at once, and each one is trying to lock the parent run. @@ -84,11 +89,17 @@ export class RunEngineBatchTriggerService extends WithRunEngine { "call()", environment, async (span) => { - const { id: _id, friendlyId } = BatchId.generate(); + const { friendlyId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", friendlyId); - // Upload to object store const payloadPacket = await this.#handlePayloadPacket( body.items, `batch/${friendlyId}`, @@ -155,20 +166,22 @@ export class RunEngineBatchTriggerService extends WithRunEngine { body: BatchTriggerTaskV2RequestBody, options: BatchTriggerTaskServiceOptions = {} ) { + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side (covers both create arms below). + await controlPlaneResolver.assertEnvExists(environment.id); + if (body.items.length <= ASYNC_BATCH_PROCESS_SIZE_THRESHOLD) { - const batch = await this._prisma.batchTaskRun.create({ - data: { - id: BatchId.fromFriendlyId(batchId), - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - runCount: body.items.length, - runIds: [], - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "runengine:v1", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this._engine.runStore.createBatchTaskRun({ + id: BatchId.fromFriendlyId(batchId), + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + runCount: body.items.length, + runIds: [], + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "runengine:v1", + oneTimeUseToken: options.oneTimeUseToken, }); this.onBatchTaskRunCreated.post(batch); @@ -249,19 +262,17 @@ export class RunEngineBatchTriggerService extends WithRunEngine { } } } else { - const batch = await this._prisma.batchTaskRun.create({ - data: { - id: BatchId.fromFriendlyId(batchId), - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - runCount: body.items.length, - runIds: [], - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "runengine:v1", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this._engine.runStore.createBatchTaskRun({ + id: BatchId.fromFriendlyId(batchId), + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + runCount: body.items.length, + runIds: [], + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "runengine:v1", + oneTimeUseToken: options.oneTimeUseToken, }); this.onBatchTaskRunCreated.post(batch); @@ -336,7 +347,6 @@ export class RunEngineBatchTriggerService extends WithRunEngine { const $attemptCount = options.attemptCount + 1; - // Add early return if max attempts reached if ($attemptCount > MAX_ATTEMPTS) { logger.error("[RunEngineBatchTrigger][processBatchTaskRun] Max attempts reached", { options, @@ -346,23 +356,22 @@ export class RunEngineBatchTriggerService extends WithRunEngine { return; } - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { id: options.batchId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - }, - }); + const batch = await this._engine.runStore.findBatchTaskRunById(options.batchId); if (!batch) { return; } - // Check to make sure the currentIndex is not greater than the runCount + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batch.runtimeEnvironmentId); + if (!environment) { + logger.error("[RunEngineBatchTrigger][processBatchTaskRun] Environment not found", { + batchId: batch.id, + runtimeEnvironmentId: batch.runtimeEnvironmentId, + }); + return; + } + if (options.range.start >= batch.runCount) { logger.debug( "[RunEngineBatchTrigger][processBatchTaskRun] currentIndex is greater than runCount", @@ -377,13 +386,12 @@ export class RunEngineBatchTriggerService extends WithRunEngine { return; } - // Resolve the payload const payloadPacket = await downloadPacketFromObjectStore( { data: batch.payload ?? undefined, dataType: batch.payloadType, }, - batch.runtimeEnvironment + environment ); const payload = await parsePacket(payloadPacket); @@ -404,7 +412,7 @@ export class RunEngineBatchTriggerService extends WithRunEngine { const result = await this.#processBatchTaskRunItems({ batch, - environment: batch.runtimeEnvironment, + environment, currentIndex: options.range.start, batchSize: options.range.count, items: $payload, @@ -609,8 +617,7 @@ export class RunEngineBatchTriggerService extends WithRunEngine { workingIndex++; } - //add the run ids to the batch - const updatedBatch = await this._prisma.batchTaskRun.update({ + const updatedBatch = await this._engine.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { runIds: { @@ -626,7 +633,6 @@ export class RunEngineBatchTriggerService extends WithRunEngine { }, }); - //triggered all the runs if (updatedBatch.processingJobsCount >= updatedBatch.runCount) { logger.debug("[RunEngineBatchTrigger][processBatchTaskRun] All runs created", { batchId: batch.friendlyId, diff --git a/apps/webapp/app/runEngine/services/createBatch.server.ts b/apps/webapp/app/runEngine/services/createBatch.server.ts index 0653e1ef1c2..0095c48f2b5 100644 --- a/apps/webapp/app/runEngine/services/createBatch.server.ts +++ b/apps/webapp/app/runEngine/services/createBatch.server.ts @@ -1,11 +1,13 @@ import type { InitializeBatchOptions } from "@internal/run-engine"; import { type CreateBatchRequestBody, type CreateBatchResponse } from "@trigger.dev/core/v3"; -import { BatchId, RunId } from "@trigger.dev/core/v3/isomorphic"; +import { RunId } from "@trigger.dev/core/v3/isomorphic"; import { type BatchTaskRun, Prisma } from "@trigger.dev/database"; import { Evt } from "evt"; import { prisma, type PrismaClientOrTransaction } from "~/db.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseService.server"; import { BatchRateLimitExceededError, getBatchLimits } from "../concerns/batchLimits.server"; import { DefaultQueueManager } from "../concerns/queues.server"; @@ -58,12 +60,18 @@ export class CreateBatchService extends WithRunEngine { "createBatch()", environment, async (span) => { - const { id, friendlyId } = BatchId.generate(); + const { id, friendlyId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", friendlyId); span.setAttribute("runCount", body.runCount); - // Validate entitlement const entitlementValidation = await this.validator.validateEntitlement({ environment, }); @@ -72,14 +80,11 @@ export class CreateBatchService extends WithRunEngine { throw entitlementValidation.error; } - // Extract plan type from entitlement validation for billing tracking const planType = entitlementValidation.plan?.type; - // Get batch limits for this organization const { config, rateLimiter } = await getBatchLimits(environment.organization); - // Check rate limit BEFORE creating the batch - // This prevents burst creation of batches that exceed the rate limit + // Rate-limit before creating the batch, to stop bursts exceeding the limit. const rateResult = await rateLimiter.limit(environment.id, body.runCount); if (!rateResult.success) { @@ -94,23 +99,23 @@ export class CreateBatchService extends WithRunEngine { // Note: Queue size limits are validated per-queue when batch items are processed, // since we don't know which queues items will go to until they're streamed. - // Create BatchTaskRun in Postgres with PENDING status - // The batch will be sealed (status -> PROCESSING) when items are streamed - const batch = await this._prisma.batchTaskRun.create({ - data: { - id, - friendlyId, - runtimeEnvironmentId: environment.id, - status: "PENDING", - runCount: body.runCount, - expectedCount: body.runCount, - runIds: [], - batchVersion: "runengine:v2", // 2-phase streaming batch API - oneTimeUseToken: options.oneTimeUseToken, - idempotencyKey: body.idempotencyKey, - // Not sealed yet - will be sealed when items stream completes - sealed: false, - }, + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side (passthrough when split is off). + await controlPlaneResolver.assertEnvExists(environment.id); + + // Created PENDING; sealed (status -> PROCESSING) once items are streamed. + const batch = await this._engine.runStore.createBatchTaskRun({ + id, + friendlyId, + runtimeEnvironmentId: environment.id, + status: "PENDING", + runCount: body.runCount, + expectedCount: body.runCount, + runIds: [], + batchVersion: "runengine:v2", // 2-phase streaming batch API + oneTimeUseToken: options.oneTimeUseToken, + idempotencyKey: body.idempotencyKey, + sealed: false, }); this.onBatchTaskRunCreated.post(batch); diff --git a/apps/webapp/app/runEngine/services/streamBatchItems.server.ts b/apps/webapp/app/runEngine/services/streamBatchItems.server.ts index fd229777c10..0011975d6d0 100644 --- a/apps/webapp/app/runEngine/services/streamBatchItems.server.ts +++ b/apps/webapp/app/runEngine/services/streamBatchItems.server.ts @@ -13,7 +13,7 @@ import { ServiceValidationError, WithRunEngine } from "../../v3/services/baseSer import { BatchPayloadProcessor } from "../concerns/batchPayloads.server"; /** - * Phase 2 retry idempotency check (TRI-9944). + * Phase 2 retry idempotency check. * * Returns true when the batch is in a state that means the Phase 2 stream's * job has already been done — every item has a TaskRun record (real or @@ -128,24 +128,12 @@ export class StreamBatchItemsService extends WithRunEngine { // Convert friendly ID to internal ID const batchId = this.parseBatchFriendlyId(batchFriendlyId); - // Validate batch exists and belongs to this environment - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { - id: batchId, - runtimeEnvironmentId: environment.id, - }, - select: { - id: true, - friendlyId: true, - status: true, - runCount: true, - sealed: true, - batchVersion: true, - processingCompletedAt: true, - }, - }); + // Validate batch exists and belongs to this environment. Routed by batch id so a + // ksuid (NEW-resident) batch is found on the owning DB; the env-ownership check that + // was in the where clause is enforced app-side below. + const batch = await this._engine.runStore.findBatchTaskRunById(batchId); - if (!batch) { + if (!batch || batch.runtimeEnvironmentId !== environment.id) { throw new ServiceValidationError(`Batch ${batchFriendlyId} not found`); } @@ -215,10 +203,7 @@ export class StreamBatchItemsService extends WithRunEngine { // milliseconds between the loop ending and getBatchEnqueuedCount() being called. // Check both sealed (sealed by this endpoint on a concurrent request) and // COMPLETED (sealed by the BatchQueue completion path before we got here). - const currentBatch = await this._prisma.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { sealed: true, status: true, processingCompletedAt: true }, - }); + const currentBatch = await this._engine.runStore.findBatchTaskRunById(batchId); if ( isIdempotentRetrySuccess( @@ -279,7 +264,7 @@ export class StreamBatchItemsService extends WithRunEngine { // Seal the batch - use conditional update to prevent TOCTOU race // Another concurrent request may have already sealed this batch const now = new Date(); - const sealResult = await this._prisma.batchTaskRun.updateMany({ + const sealResult = await this._engine.runStore.updateManyBatchTaskRun({ where: { id: batchId, sealed: false, @@ -306,16 +291,7 @@ export class StreamBatchItemsService extends WithRunEngine { // batch-queue/index.ts. // Either way the goal — a durable batch that the SDK stops retrying — // has been achieved, so we return sealed: true. - const currentBatch = await this._prisma.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { - id: true, - friendlyId: true, - status: true, - sealed: true, - processingCompletedAt: true, - }, - }); + const currentBatch = await this._engine.runStore.findBatchTaskRunById(batchId); if ( isIdempotentRetrySuccess( diff --git a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts index 2fc0fb750b9..811cefd3501 100644 --- a/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerFailedTask.server.ts @@ -1,6 +1,6 @@ import type { RunEngine } from "@internal/run-engine"; import { TaskRunErrorCodes, type TaskRunError } from "@trigger.dev/core/v3"; -import { RunId } from "@trigger.dev/core/v3/isomorphic"; +import { RunId, generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; import type { PrismaClientOrTransaction, RuntimeEnvironmentType, @@ -8,8 +8,12 @@ import type { } from "@trigger.dev/database"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import { getEventRepository } from "~/v3/eventRepository/index.server"; -import { runStore } from "~/v3/runStore.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; +import type { RunStore } from "@internal/run-store"; +import type { IEventRepository } from "~/v3/eventRepository/eventRepository.types"; import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; import { DefaultQueueManager } from "../concerns/queues.server"; import type { TriggerTaskRequest } from "../types"; @@ -58,35 +62,83 @@ export class TriggerFailedTaskService { private readonly prisma: PrismaClientOrTransaction; private readonly replicaPrisma: PrismaClientOrTransaction; private readonly engine: RunEngine; + // Resolves the parent run for depth/root/parent linkage. Defaults to the shared + // singleton (in production the same store the engine writes through). Injected in + // tests so the read resolves on the same store the engine wrote to. + private readonly runStore: RunStore; + // Defaults to getEventRepository's org-flag resolution, which reads through the + // global prisma client; tests inject a repository bound to their testcontainer DB. + private readonly eventRepository?: { repository: IEventRepository; store: string }; constructor(opts: { prisma: PrismaClientOrTransaction; engine: RunEngine; replicaPrisma?: PrismaClientOrTransaction; + runStore?: RunStore; + eventRepository?: { repository: IEventRepository; store: string }; }) { this.prisma = opts.prisma; this.replicaPrisma = opts.replicaPrisma ?? opts.prisma; this.engine = opts.engine; + this.runStore = opts.runStore ?? defaultRunStore; + this.eventRepository = opts.eventRepository; + } + + // Mint a failed run's friendlyId. The id-kind decides which store the run is + // born in (cuid → legacy store, ksuid → new store); the whole subgraph of a + // run must agree. Root failed runs mint by the environment's setting; child + // failed runs inherit the parent's current store so they never split. + private async mintFailedRunFriendlyId(args: { + organizationId: string; + environmentId: string; + orgFeatureFlags?: unknown; + parentRunFriendlyId?: string; + }): Promise { + const mintKind = args.parentRunFriendlyId + ? resolveInheritedMintKind(args.parentRunFriendlyId) + : await resolveRunIdMintKind({ + organizationId: args.organizationId, + id: args.environmentId, + orgFeatureFlags: args.orgFeatureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; } async call(request: TriggerFailedTaskRequest): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; const taskRunError: TaskRunError = { type: "INTERNAL_ERROR" as const, code: request.errorCode ?? TaskRunErrorCodes.UNSPECIFIED_ERROR, message: request.errorMessage, }; + // Held for the catch's log line; the in-try `const` is what consumers use. + let mintedFriendlyId: string | undefined; + try { - const { repository, store } = await getEventRepository( - request.environment.organization.id, - request.environment.organization.featureFlags as Record, - undefined - ); + // Mint inside the try: classifying a user-supplied parentRunId throws on + // an unclassifiable id, so keep it within the catch's null-return contract. + const failedRunFriendlyId = await this.mintFailedRunFriendlyId({ + organizationId: request.environment.organizationId, + environmentId: request.environment.id, + orgFeatureFlags: request.environment.organization.featureFlags, + parentRunFriendlyId: request.parentRunId, + }); + mintedFriendlyId = failedRunFriendlyId; + + const { repository, store } = + this.eventRepository ?? + (await getEventRepository( + request.environment.organization.id, + request.environment.organization.featureFlags as Record, + undefined + )); // Resolve parent run for rootTaskRunId and depth (same as triggerTask.server.ts) const parentRun = request.parentRunId - ? await runStore.findRun( + ? await this.runStore.findRun( { id: RunId.fromFriendlyId(request.parentRunId), runtimeEnvironmentId: request.environment.id, @@ -243,7 +295,7 @@ export class TriggerFailedTaskService { createError instanceof Error ? createError.message : String(createError); logger.error("TriggerFailedTaskService: failed to create pre-failed TaskRun", { taskId: request.taskId, - friendlyId: failedRunFriendlyId, + friendlyId: mintedFriendlyId, originalError: request.errorMessage, createError: createErrorMsg, }); @@ -270,16 +322,29 @@ export class TriggerFailedTaskService { batch?: { id: string; index: number }; errorCode?: TaskRunErrorCodes; }): Promise { - const failedRunFriendlyId = RunId.generate().friendlyId; + // Held for the catch's log line; the in-try `const` is what consumers use. + let mintedFriendlyId: string | undefined; try { + // Mint inside the try: classifying a user-supplied parentRunId throws on + // an unclassifiable id, so keep it within the catch's null-return contract. + const failedRunFriendlyId = await this.mintFailedRunFriendlyId({ + organizationId: opts.organizationId, + environmentId: opts.environmentId, + // No loaded org flags in this path; resolveRunIdMintKind falls back to a + // single replica lookup by organizationId only when there is no parent. + orgFeatureFlags: undefined, + parentRunFriendlyId: opts.parentRunId, + }); + mintedFriendlyId = failedRunFriendlyId; + // Best-effort parent run lookup for rootTaskRunId/depth let parentTaskRunId: string | undefined; let rootTaskRunId: string | undefined; let depth = 0; if (opts.parentRunId) { - const parentRun = await runStore.findRun( + const parentRun = await this.runStore.findRun( { id: RunId.fromFriendlyId(opts.parentRunId), runtimeEnvironmentId: opts.environmentId, @@ -347,7 +412,7 @@ export class TriggerFailedTaskService { } catch (createError) { logger.error("TriggerFailedTaskService: failed to create pre-failed TaskRun (no trace)", { taskId: opts.taskId, - friendlyId: failedRunFriendlyId, + friendlyId: mintedFriendlyId, originalError: opts.errorMessage, createError: createError instanceof Error ? createError.message : String(createError), }); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.test.ts b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts new file mode 100644 index 00000000000..31c624a3864 --- /dev/null +++ b/apps/webapp/app/runEngine/services/triggerTask.server.test.ts @@ -0,0 +1,825 @@ +import { describe, expect, vi } from "vitest"; + +// Mock the db prisma client. The service is constructed against a real +// testcontainer prisma instead — these empty singletons only satisfy the +// module-level imports of the production wiring (infrastructure boundary). +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); +// Inherited harness boilerplate. The parent read under test takes the +// findRun(where, client) overload with this.prisma, so it does not consult this +// flag; the mock only satisfies other wiring imported transitively. +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +vi.mock("~/services/platform.v3.server", async (importOriginal) => { + const actual = (await importOriginal()) as Record; + return { + ...actual, + getEntitlement: vi.fn(), + }; +}); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { assertNonNullable, containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import type { IOPacket } from "@trigger.dev/core/v3"; +import type { TaskRun } from "@trigger.dev/database"; +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import { DefaultQueueManager } from "~/runEngine/concerns/queues.server"; +import type { + EntitlementValidationParams, + MaxAttemptsValidationParams, + ParentRunValidationParams, + PayloadProcessor, + TagValidationParams, + TracedEventSpan, + TraceEventConcern, + TriggerTaskRequest, + TriggerTaskValidator, + ValidationResult, +} from "~/runEngine/types"; +import { RunEngineTriggerTaskService } from "./triggerTask.server"; + +vi.setConfig({ testTimeout: 60_000 }); // 60 seconds timeout + +class MockPayloadProcessor implements PayloadProcessor { + async process(request: TriggerTaskRequest): Promise { + return { + data: JSON.stringify(request.body.payload), + dataType: "application/json", + }; + } +} + +// Captures the `parentRun` the service resolved (via runStore.findRun) and +// passed into validation, so a test can assert on the resolved parent without +// mocking the read itself. Returns ok so the child triggers regardless. +class CapturingParentRunValidator implements TriggerTaskValidator { + public capturedParentRun: ParentRunValidationParams["parentRun"] | "unset" = "unset"; + + validateTags(_params: TagValidationParams): ValidationResult { + return { ok: true }; + } + validateEntitlement(_params: EntitlementValidationParams): Promise { + return Promise.resolve({ ok: true }); + } + validateMaxAttempts(_params: MaxAttemptsValidationParams): ValidationResult { + return { ok: true }; + } + validateParentRun(params: ParentRunValidationParams): ValidationResult { + this.capturedParentRun = params.parentRun; + return { ok: true }; + } +} + +class MockTraceEventConcern implements TraceEventConcern { + async traceRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceIdempotentRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + _options: { + existingRun: TaskRun; + idempotencyKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } + + async traceDebouncedRun( + _request: TriggerTaskRequest, + _parentStore: string | undefined, + _options: { + existingRun: TaskRun; + debounceKey: string; + incomplete: boolean; + isError: boolean; + }, + callback: (span: TracedEventSpan, store: string) => Promise + ): Promise { + return await callback( + { + traceId: "test", + spanId: "test", + traceContext: {}, + traceparent: undefined, + setAttribute: () => {}, + failWithError: () => {}, + stop: () => {}, + }, + "test" + ); + } +} + +function buildEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + }, + queue: { + redis: redisOptions, + }, + runLock: { + redis: redisOptions, + }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +describe("RunEngineTriggerTaskService parent + locked-worker reads", () => { + containerTest( + "resolves the parent run through the run-ops store by minted run id", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Trigger a ROOT run first to create a real parent TaskRun. + const parentResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // Trigger a CHILD pointing at the parent's friendlyId. The service must + // resolve the parent via runStore.findRun (minted RunId, env-scoped). + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "child" }, + options: { parentRunId: parentResult.run.friendlyId }, + }, + }); + assertNonNullable(childResult); + + // The capturing validator observed the resolved parent — proving the + // read ran (against the container DB) and returned the right row. + expect(validator.capturedParentRun).not.toBe("unset"); + const capturedParent = validator.capturedParentRun; + assertNonNullable(capturedParent); + expect(capturedParent.id).toBe(parentResult.run.id); + expect(capturedParent.friendlyId).toBe(parentResult.run.friendlyId); + + // depth and root carry through — proving parentRun.depth and the parent + // id were read off the resolved row and threaded into the child. + const parentRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: parentResult.run.id }, + }); + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + + expect(childRow.depth).toBe(parentRow.depth + 1); + expect(childRow.parentTaskRunId).toBe(parentRow.id); + expect(childRow.rootTaskRunId).toBe(parentRow.id); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "scopes the parent lookup to the run's environment (cross-env parent is not resolved)", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + // Two independent authenticated environments. The setup helper hardcodes + // several globally-unique fields (org/project slug, env apiKey/pkApiKey, + // worker-group token hash), so rename envA's before the second call to + // avoid unique-constraint collisions. + const envA = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + await prisma.organization.update({ + where: { id: envA.organizationId }, + data: { slug: `${envA.organization.slug}-a` }, + }); + await prisma.project.update({ + where: { id: envA.projectId }, + data: { slug: `${envA.project.slug}-a`, externalRef: `${envA.project.externalRef}-a` }, + }); + await prisma.runtimeEnvironment.update({ + where: { id: envA.id }, + data: { apiKey: `${envA.apiKey}-a`, pkApiKey: `${envA.pkApiKey}-a` }, + }); + await prisma.workerGroupToken.updateMany({ + where: { tokenHash: "token_hash" }, + data: { tokenHash: "token_hash_a" }, + }); + await prisma.workerInstanceGroup.updateMany({ + where: { masterQueue: "default" }, + data: { masterQueue: "default_a" }, + }); + const envB = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + expect(envA.id).not.toBe(envB.id); + expect(envA.organizationId).not.toBe(envB.organizationId); + + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, envA, taskIdentifier); + await setupBackgroundWorker(engine, envB, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // A real parent run in envA. + const parentResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envA, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // Trigger a child in envB pointing at the envA parent's friendlyId. The + // env guard in runStore.findRun's `where` rejects the cross-env parent + // in a single query, so the resolved parentRun is null. + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envB, + body: { + payload: { kind: "child" }, + options: { parentRunId: parentResult.run.friendlyId }, + }, + }); + assertNonNullable(childResult); + + // validateParentRun was called with no resolved parent. + expect(validator.capturedParentRun).not.toBe("unset"); + expect(validator.capturedParentRun ?? null).toBeNull(); + + // The child still triggered, at the root depth with no parent linkage — + // confirming the cross-env parent was dropped, not silently joined. + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + expect(childRow.depth).toBe(0); + expect(childRow.parentTaskRunId).toBeNull(); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "resolves the locked background worker on the control-plane client with no cross-DB join", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + const { worker } = await setupBackgroundWorker(engine, environment, taskIdentifier); + + // Read the seeded worker row to get its real version/id. + const workerRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: worker.id }, + }); + + // Counting proxy over the control-plane client. `this.prisma` is ALWAYS + // the control-plane client; the locked-worker lookup is a DIRECT + // backgroundWorker.findFirst on it. The parent read uses a DIFFERENT + // call (runStore.findRun → taskRun), so a single call() issues two + // separate single-table reads — never one cross-seam join. Here we count + // the findFirst calls and capture their args to assert no include/join. + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + // The queue manager gets the real (unproxied) prisma so the counting + // proxy only observes reads issued by the service itself. + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: workerRow.version }, + }, + }); + assertNonNullable(result); + + // Observable proof the locked worker was resolved on the control-plane + // client: the created run records the worker id in lockedToVersionId. + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + expect(runRow.lockedToVersionId).toBe(workerRow.id); + expect(runRow.taskVersion).toBe(workerRow.version); + + // Exactly one backgroundWorker.findFirst fired for the locked-worker read. + expect(backgroundWorkerFindFirstCalls).toBe(1); + + // NO-JOIN assertion: the read referenced ONLY the backgroundWorker table. + // No `include` (which would join into another table); the `select` lists + // only backgroundWorker scalar columns. + const args = findFirstArgs[0]; + assertNonNullable(args); + expect(args.include).toBeUndefined(); + expect(Object.keys(args.select ?? {}).sort()).toEqual([ + "cliVersion", + "id", + "sdkVersion", + "version", + ]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "issues two independent single-table reads when one call supplies both parentRunId and lockToVersion", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + const { worker } = await setupBackgroundWorker(engine, environment, taskIdentifier); + + const workerRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: worker.id }, + }); + + // Count BOTH reads issued by the service on the control-plane client: + // the parent read (runStore.findRun → taskRun.findFirst) and the + // locked-worker read (backgroundWorker.findFirst). Capture every + // findFirst arg so we can assert no read carries a cross-seam include. + let taskRunFindFirstCalls = 0; + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + if (prop === "taskRun") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(trTarget, trProp, trReceiver) { + if (trProp === "findFirst") { + return async (args: any) => { + taskRunFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(trTarget, trProp, trReceiver); + return typeof value === "function" ? value.bind(trTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + // queueConcern/idempotency get the real unproxied prisma so the + // counting proxy only observes reads issued by the service itself. + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // ROOT parent first (uses the unproxied prisma via a separate service so + // its internal reads don't pollute the child's counts). + const parentService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + const parentResult = await parentService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "parent" } }, + }); + assertNonNullable(parentResult); + + // CHILD supplying BOTH parentRunId AND lockToVersion in one call. + const childResult = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "child" }, + options: { + parentRunId: parentResult.run.friendlyId, + lockToVersion: workerRow.version, + }, + }, + }); + assertNonNullable(childResult); + + const parentRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: parentResult.run.id }, + }); + const childRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: childResult.run.id }, + }); + + // Child resolved the parent (single-table parent read). + expect(childRow.parentTaskRunId).toBe(parentRow.id); + expect(childRow.depth).toBe(parentRow.depth + 1); + + // Child locked to the worker (single-table worker read). + expect(childRow.lockedToVersionId).toBe(workerRow.id); + expect(childRow.taskVersion).toBe(workerRow.version); + + // Exactly one backgroundWorker.findFirst fired for the locked-worker read, + // and at least one taskRun.findFirst fired for the parent read. + expect(backgroundWorkerFindFirstCalls).toBe(1); + expect(taskRunFindFirstCalls).toBeGreaterThanOrEqual(1); + + // NO-JOIN proof: no captured read carried an `include` joining + // taskRun <-> backgroundWorker. Every findFirst arg has include undefined. + for (const args of findFirstArgs) { + expect(args?.include).toBeUndefined(); + } + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "lockToVersion matching no worker rejects the trigger after a single scalar-only worker read", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + let backgroundWorkerFindFirstCalls = 0; + const findFirstArgs: any[] = []; + const countingPrisma = new Proxy(prisma, { + get(target, prop, receiver) { + if (prop === "backgroundWorker") { + const delegate = Reflect.get(target, prop, receiver); + return new Proxy(delegate, { + get(bwTarget, bwProp, bwReceiver) { + if (bwProp === "findFirst") { + return async (args: any) => { + backgroundWorkerFindFirstCalls += 1; + findFirstArgs.push(args); + return (delegate as any).findFirst(args); + }; + } + const value = Reflect.get(bwTarget, bwProp, bwReceiver); + return typeof value === "function" ? value.bind(bwTarget) : value; + }, + }); + } + const value = Reflect.get(target, prop, receiver); + return typeof value === "function" ? value.bind(target) : value; + }, + }) as typeof prisma; + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma: countingPrisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + const bogusVersion = "v-does-not-exist-0000"; + // The no-match worker read returns null; the queue concern then rejects + // the trigger rather than silently locking the run to a phantom version. + await expect( + triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: bogusVersion }, + }, + }) + ).rejects.toThrow(/no worker found with that version/); + + // No run was locked to the bogus version (none was created). + const lockedRuns = await prisma.taskRun.findMany({ + where: { runtimeEnvironmentId: environment.id, taskVersion: bogusVersion }, + }); + expect(lockedRuns).toEqual([]); + + // The lone worker read fired exactly once with the scalar-only select and + // no cross-seam include. + expect(backgroundWorkerFindFirstCalls).toBe(1); + const args = findFirstArgs[0]; + assertNonNullable(args); + expect(args.include).toBeUndefined(); + expect(Object.keys(args.select ?? {}).sort()).toEqual([ + "cliVersion", + "id", + "sdkVersion", + "version", + ]); + } finally { + await engine.quit(); + } + } + ); + + containerTest( + "does not resolve a locked worker from a different environment", + async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + // Two independent authenticated environments. Rename envA's globally-unique + // fields before the second setup call to avoid unique-constraint collisions. + const envA = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + await prisma.organization.update({ + where: { id: envA.organizationId }, + data: { slug: `${envA.organization.slug}-a` }, + }); + await prisma.project.update({ + where: { id: envA.projectId }, + data: { slug: `${envA.project.slug}-a`, externalRef: `${envA.project.externalRef}-a` }, + }); + await prisma.runtimeEnvironment.update({ + where: { id: envA.id }, + data: { apiKey: `${envA.apiKey}-a`, pkApiKey: `${envA.pkApiKey}-a` }, + }); + await prisma.workerGroupToken.updateMany({ + where: { tokenHash: "token_hash" }, + data: { tokenHash: "token_hash_a" }, + }); + await prisma.workerInstanceGroup.updateMany({ + where: { masterQueue: "default" }, + data: { masterQueue: "default_a" }, + }); + const envB = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + expect(envA.id).not.toBe(envB.id); + expect(envA.organizationId).not.toBe(envB.organizationId); + + const taskIdentifier = "test-task"; + const { worker: workerA } = await setupBackgroundWorker(engine, envA, taskIdentifier); + const { worker: workerB } = await setupBackgroundWorker(engine, envB, taskIdentifier); + + const workerARow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: workerA.id }, + }); + const workerBRow = await prisma.backgroundWorker.findUniqueOrThrow({ + where: { id: workerB.id }, + }); + // Both seeded workers share the same version string. + expect(workerARow.version).toBe(workerBRow.version); + expect(workerARow.id).not.toBe(workerBRow.id); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator: new CapturingParentRunValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Trigger in envB locking to the shared version string. + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: envB, + body: { + payload: { kind: "locked" }, + options: { lockToVersion: workerBRow.version }, + }, + }); + assertNonNullable(result); + + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + // The projectId + runtimeEnvironmentId guard in the single-table worker + // read resolves envB's worker, never envA's same-version worker. + expect(runRow.lockedToVersionId).toBe(workerBRow.id); + expect(runRow.lockedToVersionId).not.toBe(workerARow.id); + expect(runRow.taskVersion).toBe(workerBRow.version); + } finally { + await engine.quit(); + } + } + ); + + containerTest("a root trigger issues no parent lookup", async ({ prisma, redisOptions }) => { + const engine = buildEngine(prisma, redisOptions); + + try { + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "test-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const validator = new CapturingParentRunValidator(); + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: new DefaultQueueManager(prisma, engine), + idempotencyKeyConcern: new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ), + validator, + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + // Trigger with NO parentRunId. + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment, + body: { payload: { kind: "root" } }, + }); + assertNonNullable(result); + + // The validator ran but received no resolved parent: the parent read was + // skipped because no parentRunId was supplied. + expect(validator.capturedParentRun).not.toBe("unset"); + expect(validator.capturedParentRun).toBeUndefined(); + + const runRow = await prisma.taskRun.findUniqueOrThrow({ + where: { id: result.run.id }, + }); + expect(runRow.depth).toBe(0); + expect(runRow.parentTaskRunId).toBeNull(); + } finally { + await engine.quit(); + } + }); +}); diff --git a/apps/webapp/app/runEngine/services/triggerTask.server.ts b/apps/webapp/app/runEngine/services/triggerTask.server.ts index e8eb9945f38..a86cb9e0eda 100644 --- a/apps/webapp/app/runEngine/services/triggerTask.server.ts +++ b/apps/webapp/app/runEngine/services/triggerTask.server.ts @@ -14,6 +14,7 @@ import { TriggerTraceContext, } from "@trigger.dev/core/v3"; import { + generateKsuidId, parseTraceparent, RunId, serializeTraceparent, @@ -25,6 +26,8 @@ import { logger } from "~/services/logger.server"; import { parseDelay } from "~/utils/delays"; import { handleMetadataPacket } from "~/utils/packets"; import { startSpan } from "~/v3/tracing.server"; +import { resolveRunIdMintKind } from "~/v3/engineVersion.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; import type { TriggerTaskServiceOptions, TriggerTaskServiceResult, @@ -125,6 +128,31 @@ export class RunEngineTriggerTaskService { opts.isMollifierGloballyEnabled ?? (() => env.TRIGGER_MOLLIFIER_ENABLED === "1"); } + // Mint a new run's friendlyId. The id-kind decides which store the run is born + // in (cuid → legacy store, ksuid → new store), so the whole subgraph of a run + // must agree. Two cases: + // + // - ROOT run (no parent): mint by the environment's cutover setting. + // - CHILD run (has a parent): inherit the parent's residency by id-shape, so a + // parent and child never split across stores (ksuid parent → ksuid child, + // cuid parent → cuid child). + private async mintRunFriendlyId( + environment: AuthenticatedEnvironment, + parentRunFriendlyId?: string + ): Promise { + const mintKind = parentRunFriendlyId + ? resolveInheritedMintKind(parentRunFriendlyId) + : await resolveRunIdMintKind({ + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; + } + public async call({ taskId, environment, @@ -150,7 +178,12 @@ export class RunEngineTriggerTaskService { span.setAttribute("taskId", taskId); span.setAttribute("attempt", attempt); - const runFriendlyId = options?.runFriendlyId ?? RunId.generate().friendlyId; + // Mint the run id. A caller-supplied id (idempotent retry) wins; + // otherwise mint by residency — inheriting the parent's store when a + // parent is present, else the environment's setting. + const runFriendlyId = + options?.runFriendlyId ?? + (await this.mintRunFriendlyId(environment, body.options?.parentRunId)); const triggerRequest = { taskId, friendlyId: runFriendlyId, @@ -159,7 +192,6 @@ export class RunEngineTriggerTaskService { options, } satisfies TriggerTaskRequest; - // Validate max attempts const maxAttemptsValidation = this.validator.validateMaxAttempts({ taskId, attempt, @@ -169,7 +201,6 @@ export class RunEngineTriggerTaskService { throw maxAttemptsValidation.error; } - // Validate tags const tagValidation = this.validator.validateTags({ tags: body.options?.tags, }); @@ -178,7 +209,6 @@ export class RunEngineTriggerTaskService { throw tagValidation.error; } - // Validate entitlement (unless skipChecks is enabled) let planType: string | undefined; if (!options.skipChecks) { @@ -190,7 +220,6 @@ export class RunEngineTriggerTaskService { throw entitlementValidation.error; } - // Extract plan type from entitlement response planType = entitlementValidation.plan?.type; } else { // When skipChecks is enabled, planType should be passed via options @@ -239,7 +268,6 @@ export class RunEngineTriggerTaskService { } } - // Get parent run if specified const parentRun = body.options?.parentRunId ? await runStore.findRun( { @@ -250,7 +278,6 @@ export class RunEngineTriggerTaskService { ) : undefined; - // Validate parent run const parentRunValidation = this.validator.validateParentRun({ taskId, parentRun: parentRun ?? undefined, @@ -390,7 +417,6 @@ export class RunEngineTriggerTaskService { envType: environment.type, }); - // Build annotations for this run const triggerSource = options.triggerSource ?? "api"; const triggerAction = options.triggerAction ?? "trigger"; const parentAnnotations = RunAnnotations.safeParse(parentRun?.annotations).data; diff --git a/apps/webapp/app/services/archiveBranch.server.ts b/apps/webapp/app/services/archiveBranch.server.ts index 3372ac87229..c7c8af5860b 100644 --- a/apps/webapp/app/services/archiveBranch.server.ts +++ b/apps/webapp/app/services/archiveBranch.server.ts @@ -81,6 +81,9 @@ export class ArchiveBranchService { }; } + // Branch archive is a SOFT update — do NOT hard-delete run-ops rows here (it would destroy a + // retained branch's history). RunOpsCascadeCleanupService.cleanupEnvironment belongs on the + // env hard-delete/purge path (owned by the cloud env-purge runbook), which has no site today. const slug = `${environment.slug}-${nanoid(6)}`; const shortcode = slug; diff --git a/apps/webapp/app/services/dashboardAgent.server.ts b/apps/webapp/app/services/dashboardAgent.server.ts index a66882b72cc..14eb51fcbef 100644 --- a/apps/webapp/app/services/dashboardAgent.server.ts +++ b/apps/webapp/app/services/dashboardAgent.server.ts @@ -3,6 +3,7 @@ import { TriggerClient } from "@trigger.dev/sdk"; import { chat } from "@trigger.dev/sdk/ai"; import { prisma } from "~/db.server"; import { env } from "~/env.server"; +import { runStore } from "~/v3/runStore.server"; import { githubApp } from "./gitHub.server"; import { logger } from "./logger.server"; @@ -211,10 +212,10 @@ export async function resolveRunCommit( environmentId: string, runFriendlyId: string ): Promise<{ sha: string; version: string; dirty: boolean } | null> { - const run = await prisma.taskRun.findFirst({ - where: { friendlyId: runFriendlyId, runtimeEnvironmentId: environmentId }, - select: { lockedToVersionId: true }, - }); + const run = await runStore.findRun( + { friendlyId: runFriendlyId, runtimeEnvironmentId: environmentId }, + { select: { lockedToVersionId: true } } + ); if (!run?.lockedToVersionId) return null; const deployment = await prisma.workerDeployment.findFirst({ diff --git a/apps/webapp/app/services/deleteProject.server.ts b/apps/webapp/app/services/deleteProject.server.ts index bbce896a57f..8db0a6ede04 100644 --- a/apps/webapp/app/services/deleteProject.server.ts +++ b/apps/webapp/app/services/deleteProject.server.ts @@ -3,6 +3,7 @@ import { prisma } from "~/db.server"; import { marqs } from "~/v3/marqs/index.server"; import { engine } from "~/v3/runEngine.server"; import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { RunOpsCascadeCleanupService } from "~/v3/runOpsMigration/runOpsCascadeCleanup.server"; type Options = ({ projectId: string } | { projectSlug: string }) & { userId: string; @@ -50,6 +51,10 @@ export class DeleteProjectService { }); } + // Hard-delete the project's run-ops rows across both run-ops DBs (replaces the cloud-only + // dropped cross-seam FK cascades). Idempotent; uses the run-ops writers, not #prismaClient. + await new RunOpsCascadeCleanupService().cleanupProject(project.id); + // Mark the project as deleted (do this last because it makes it impossible to try again) // - This disables all API keys // - This disables all schedules from being scheduled diff --git a/apps/webapp/app/services/realtime/runReader.server.ts b/apps/webapp/app/services/realtime/runReader.server.ts index 952280e7749..c6a34d7de7b 100644 --- a/apps/webapp/app/services/realtime/runReader.server.ts +++ b/apps/webapp/app/services/realtime/runReader.server.ts @@ -95,7 +95,7 @@ export type RunHydratorOptions = { const DEFAULT_CACHE_TTL_MS = 250; const DEFAULT_MAX_CACHE_ENTRIES = 5_000; -/** Hydrates runs by id from the read replica, projected to the realtime columns; concurrent same-run refetches are single-flighted + short-TTL cached. */ +/** Hydrates runs by id through the runStore seam (split routing lives in the store, below this file), projected to the realtime columns; concurrent same-run refetches are single-flighted + short-TTL cached. */ export class RunHydrator { readonly #inflight = new Map>(); readonly #cache: BoundedTtlCache; diff --git a/apps/webapp/app/services/realtime/sessions.server.ts b/apps/webapp/app/services/realtime/sessions.server.ts index a7129830e71..71170c322f2 100644 --- a/apps/webapp/app/services/realtime/sessions.server.ts +++ b/apps/webapp/app/services/realtime/sessions.server.ts @@ -1,7 +1,8 @@ import type { PrismaClient, Session } from "@trigger.dev/database"; import type { SessionItem } from "@trigger.dev/core/v3"; +import type { RunStore } from "@internal/run-store"; import { $replica, prisma } from "~/db.server"; -import { runStore } from "~/v3/runStore.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; /** * Prefix that {@link SessionId.generate} attaches to every Session friendlyId. @@ -18,6 +19,9 @@ const SESSION_FRIENDLY_ID_PREFIX = "session_"; * friendlyIds, anything else is looked up against `externalId` scoped to * the caller's environment. */ +// CONTROL-PLANE: `Session` lives on the control-plane DB; these reads are NOT +// routed to run-ops read-through — only the `TaskRun` currentRunId resolves in +// this file are run-ops read-through routed. export async function resolveSessionByIdOrExternalId( prisma: Pick, runtimeEnvironmentId: string, @@ -119,18 +123,27 @@ export function serializeSession(session: Session): SessionItem { * this so the wire-side `currentRunId` is consistent with the rest of * the public API (which only accepts friendlyIds for run lookups). * - * Skips the lookup when `currentRunId` is null. The read goes through - * `$replica` — a TaskRun's `friendlyId` is immutable so replica lag is - * harmless, and serializing on the writer would just add hot-path load. + * Skips the lookup when `currentRunId` is null. + * + * Resolves `currentRunId` -> `friendlyId` through `runStore.findRun` so a + * ksuid (NEW-DB) session run resolves from its owning store rather than the + * control-plane replica. Mirrors `sessionRunManager.server.ts`. + * Tenant-scoped because `Session.currentRunId` is a no-FK pointer. */ -export async function serializeSessionWithFriendlyRunId(session: Session): Promise { +export async function serializeSessionWithFriendlyRunId( + session: Session, + runStore: RunStore = defaultRunStore +): Promise { const base = serializeSession(session); if (!session.currentRunId) return base; const run = await runStore.findRun( - { id: session.currentRunId }, - { select: { friendlyId: true } }, - $replica + { + id: session.currentRunId, + projectId: session.projectId, + runtimeEnvironmentId: session.runtimeEnvironmentId, + }, + { select: { friendlyId: true } } ); return { @@ -148,27 +161,28 @@ export async function serializeSessionWithFriendlyRunId(session: Session): Promi */ export async function serializeSessionsWithFriendlyRunIds( sessions: Session[], - scope: { projectId: string; runtimeEnvironmentId: string } + scope: { projectId: string; runtimeEnvironmentId: string }, + runStore: RunStore = defaultRunStore ): Promise { const runIds = [ ...new Set(sessions.map((s) => s.currentRunId).filter((id): id is string => !!id)), ]; - // `currentRunId` is a plain string pointer (no FK), so scope the lookup to - // the caller's tenant — a stale value must not resolve a run in another env. - const runs = runIds.length - ? await runStore.findRuns( - { + // `runStore.findRuns` fans out across both stores under split (NEW + LEGACY + // replica merge) and is a plain `$replica` find when split is off. Tenant- + // scoped: `Session.currentRunId` is a no-FK pointer, so a stale id must never + // resolve a run in another env. + const runs = + runIds.length > 0 + ? await runStore.findRuns({ where: { id: { in: runIds }, projectId: scope.projectId, runtimeEnvironmentId: scope.runtimeEnvironmentId, }, select: { id: true, friendlyId: true }, - }, - $replica - ) - : []; + }) + : []; const friendlyIdByRunId = new Map(runs.map((run) => [run.id, run.friendlyId])); return sessions.map((session) => ({ diff --git a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts index 9bf67314779..65df2b41215 100644 --- a/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/clickhouseRunsRepository.server.ts @@ -13,9 +13,21 @@ import { import parseDuration from "parse-duration"; import { decodeRunsCursor, encodeRunsCursor } from "./runsCursor.server"; import { runStore } from "~/v3/runStore.server"; +import { type PrismaClientOrTransaction } from "~/db.server"; type RunCursorRow = { runId: string; createdAt: number }; +/** + * Hydrates a set of rows for a ClickHouse-derived run-id set against the given + * read client. The closure MUST select `id` so `#hydrateRunsByIds` can key + * set-membership and re-impose ordering; the call site projects `id` away if its + * result type excludes it. + */ +type HydrateFn = ( + client: PrismaClientOrTransaction, + ids: string[] +) => Promise; + export class ClickHouseRunsRepository implements IRunsRepository { constructor(private readonly options: RunsRepositoryOptions) {} @@ -38,7 +50,11 @@ export class ClickHouseRunsRepository implements IRunsRepository { const queryBuilder = this.options.clickhouse.taskRuns.queryBuilder(); applyRunFiltersToQueryBuilder( queryBuilder, - await convertRunListInputOptionsToFilterRunsOptions(options, this.options.prisma) + await convertRunListInputOptionsToFilterRunsOptions( + options, + this.options.prisma, + this.options.runStore ?? runStore + ) ); const forward = options.page.direction === "forward" || !options.page.direction; @@ -140,6 +156,51 @@ export class ClickHouseRunsRepository implements IRunsRepository { return { runIds, pagination: { nextCursor, previousCursor } }; } + /** + * Hydrates a ClickHouse-derived run-id set from the run-ops store. + * Split ON: new run-ops client first, then the LEGACY RUN-OPS READ REPLICA ONLY + * for ids not known-migrated — never the legacy primary. The mixed-residency + * fan-out lives here because `RoutingRunStore.findRuns` punts it. + * Split OFF (single-DB / self-host): one plain `store.findRuns(args, prisma)` + * (passthrough) — no legacy read, no known-migrated probe, no second connection. + */ + async #hydrateRunsByIds( + runIds: string[], + hydrate: HydrateFn + ): Promise { + if (runIds.length === 0) { + return []; + } + + const splitEnabled = this.options.readThrough?.splitEnabled ?? false; + + let rows: T[]; + if (!splitEnabled) { + rows = await hydrate(this.options.prisma, runIds); + } else { + const newClient = this.options.readThrough?.newClient ?? this.options.prisma; + const legacyReplica = this.options.readThrough?.legacyReplica ?? this.options.prisma; + + const newRows = await hydrate(newClient, runIds); + const foundIds = new Set(newRows.map((r) => r.id)); + const missing = runIds.filter((id) => !foundIds.has(id)); + + // Any id not hydrated from the new store is probed on the legacy replica. + const toProbeLegacy = missing; + + const legacyRows = toProbeLegacy.length ? await hydrate(legacyReplica, toProbeLegacy) : []; + rows = [...newRows, ...legacyRows]; + } + + // Preserve the ClickHouse keyset order (created_at desc, run_id desc) by re-ordering the + // hydrated rows to match the input `runIds`. Sorting by raw `id` was only ~chronological + // when every id was a time-prefixed cuid; a mixed cuid/ksuid page sorts the two id-spaces + // into separate blocks, burying recent runs. Rows whose PG row is gone (e.g. past + // retention) drop out, exactly as before. + const byId = new Map(rows.map((r) => [r.id, r] as const)); + return runIds.map((id) => byId.get(id)).filter((r): r is T => r !== undefined); + } + async listFriendlyRunIds(options: ListRunsOptions) { // First get internal IDs from ClickHouse const { runIds } = await this.listRunIds(options); @@ -148,19 +209,18 @@ export class ClickHouseRunsRepository implements IRunsRepository { return []; } - // Then get friendly IDs from Prisma - const runs = await runStore.findRuns( - { - where: { - id: { - in: runIds, - }, - }, - select: { - friendlyId: true, + const store = this.options.runStore ?? runStore; + + // Then get friendly IDs from the run-ops store (id added for set-membership; + // projected away below so the returned shape stays `string[]`). + const runs = await this.#hydrateRunsByIds(runIds, (client, ids) => + store.findRuns( + { + where: { id: { in: ids } }, + select: { id: true, friendlyId: true }, }, - }, - this.options.prisma + client + ) ); return runs.map((run) => run.friendlyId); @@ -169,51 +229,55 @@ export class ClickHouseRunsRepository implements IRunsRepository { async listRuns(options: ListRunsOptions) { const { runIds, pagination } = await this.listRunIds(options); - let runs = await runStore.findRuns( - { - where: { - id: { - in: runIds, + const store = this.options.runStore ?? runStore; + + let runs = await this.#hydrateRunsByIds(runIds, (client, ids) => + store.findRuns( + { + where: { + id: { + in: ids, + }, + }, + orderBy: { + id: "desc", + }, + select: { + id: true, + friendlyId: true, + taskIdentifier: true, + taskVersion: true, + runtimeEnvironmentId: true, + status: true, + createdAt: true, + startedAt: true, + lockedAt: true, + delayUntil: true, + updatedAt: true, + completedAt: true, + isTest: true, + spanId: true, + idempotencyKey: true, + ttl: true, + expiredAt: true, + costInCents: true, + baseCostInCents: true, + usageDurationMs: true, + runTags: true, + depth: true, + rootTaskRunId: true, + batchId: true, + metadata: true, + metadataType: true, + machinePreset: true, + queue: true, + workerQueue: true, + region: true, + annotations: true, }, }, - orderBy: { - id: "desc", - }, - select: { - id: true, - friendlyId: true, - taskIdentifier: true, - taskVersion: true, - runtimeEnvironmentId: true, - status: true, - createdAt: true, - startedAt: true, - lockedAt: true, - delayUntil: true, - updatedAt: true, - completedAt: true, - isTest: true, - spanId: true, - idempotencyKey: true, - ttl: true, - expiredAt: true, - costInCents: true, - baseCostInCents: true, - usageDurationMs: true, - runTags: true, - depth: true, - rootTaskRunId: true, - batchId: true, - metadata: true, - metadataType: true, - machinePreset: true, - queue: true, - workerQueue: true, - region: true, - annotations: true, - }, - }, - this.options.prisma + client + ) ); // ClickHouse is slightly delayed, so we're going to do in-memory status filtering too @@ -231,7 +295,11 @@ export class ClickHouseRunsRepository implements IRunsRepository { const queryBuilder = this.options.clickhouse.taskRuns.countQueryBuilder(); applyRunFiltersToQueryBuilder( queryBuilder, - await convertRunListInputOptionsToFilterRunsOptions(options, this.options.prisma) + await convertRunListInputOptionsToFilterRunsOptions( + options, + this.options.prisma, + this.options.runStore ?? runStore + ) ); const [queryError, result] = await queryBuilder.execute(); diff --git a/apps/webapp/app/services/runsRepository/runsRepository.server.ts b/apps/webapp/app/services/runsRepository/runsRepository.server.ts index b256738c465..2fadb8c7108 100644 --- a/apps/webapp/app/services/runsRepository/runsRepository.server.ts +++ b/apps/webapp/app/services/runsRepository/runsRepository.server.ts @@ -1,4 +1,5 @@ import { type ClickHouse } from "@internal/clickhouse"; +import { type RunStore } from "@internal/run-store"; import { type Tracer } from "@internal/tracing"; import { type Logger, type LogLevel } from "@trigger.dev/core/logger"; import { MachinePresetName } from "@trigger.dev/core/v3"; @@ -8,6 +9,7 @@ import parseDuration from "parse-duration"; import { z } from "zod"; import { timeFilters } from "~/components/runs/v3/SharedFilters"; import { type PrismaClientOrTransaction } from "~/db.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; import { startActiveSpan } from "~/v3/tracer.server"; import { ClickHouseRunsRepository } from "./clickhouseRunsRepository.server"; @@ -17,6 +19,20 @@ export type RunsRepositoryOptions = { logger?: Logger; logLevel?: LogLevel; tracer?: Tracer; + + // Injectable run-ops store; defaults to the `~/v3/runStore.server` singleton + // (passthrough). The list-hydrate fan-out below does not depend on the store + // routing mixed-residency id sets — it applies the read-through fan-out itself. + runStore?: RunStore; + + // Run-ops read-through wiring for the list hydrate. Omitted => passthrough. + readThrough?: { + // `legacyReplica` is a READ REPLICA handle only — there is no legacy-primary field. + newClient?: PrismaClientOrTransaction; + legacyReplica?: PrismaClientOrTransaction; + // Resolved boot constant; when false the split branch is never entered. + splitEnabled?: boolean; + }; }; const RunStatus = z.enum(Object.values(TaskRunStatus) as [TaskRunStatus, ...TaskRunStatus[]]); @@ -195,6 +211,7 @@ export class RunsRepository implements IRunsRepository { { attributes: { "repository.name": "clickhouse", + "readThrough.split": Boolean(this.options.readThrough?.splitEnabled), organizationId: options.organizationId, projectId: options.projectId, environmentId: options.environmentId, @@ -216,6 +233,7 @@ export class RunsRepository implements IRunsRepository { { attributes: { "repository.name": "clickhouse", + "readThrough.split": Boolean(this.options.readThrough?.splitEnabled), organizationId: options.organizationId, projectId: options.projectId, environmentId: options.environmentId, @@ -261,7 +279,8 @@ export function parseRunListInputOptions(data: any): RunListInputOptions { export async function convertRunListInputOptionsToFilterRunsOptions( options: RunListInputOptions, - prisma: RunsRepositoryOptions["prisma"] + prisma: RunsRepositoryOptions["prisma"], + store: RunStore = defaultRunStore ): Promise { const convertedOptions: FilterRunsOptions = { ...options, @@ -276,24 +295,20 @@ export async function convertRunListInputOptionsToFilterRunsOptions( }); convertedOptions.period = time.period ? (parseDuration(time.period) ?? undefined) : undefined; - // Batch friendlyId to id + // Cross-DB resolution: BatchTaskRun is a RUN-OPS table. A ksuid batch resident on the + // dedicated run-ops DB must resolve via the store's NEW->LEGACY probe — a single control-plane + // client would miss it and leave the friendlyId in the ClickHouse `batch_id` filter, matching + // nothing. Split off / self-host: the store is a passthrough over the one client. if (options.batchId && options.batchId.startsWith("batch_")) { - const batch = await prisma.batchTaskRun.findFirst({ - select: { - id: true, - }, - where: { - friendlyId: options.batchId, - runtimeEnvironmentId: options.environmentId, - }, - }); + const batch = await store.findBatchTaskRunByFriendlyId(options.batchId, options.environmentId); if (batch) { convertedOptions.batchId = batch.id; } } - // ScheduleId can be a friendlyId + // ScheduleId can be a friendlyId. TaskSchedule is a CONTROL-PLANE table, so this stays on + // the passed `prisma` (the control-plane client) in both single-DB and split modes. if (options.scheduleId && options.scheduleId.startsWith("sched_")) { const schedule = await prisma.taskSchedule.findFirst({ select: { diff --git a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts index 5982fd4460c..845430735c8 100644 --- a/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts +++ b/apps/webapp/app/v3/services/alerts/deliverAlert.server.ts @@ -15,7 +15,11 @@ import { type RunFailedWebhook, TaskRunError, } from "@trigger.dev/core/v3"; -import { type ProjectAlertChannelType, type ProjectAlertType } from "@trigger.dev/database"; +import { + type ProjectAlertChannelType, + type ProjectAlertType, + type RuntimeEnvironmentType, +} from "@trigger.dev/database"; import assertNever from "assert-never"; import { subtle } from "crypto"; import { environmentTitle } from "~/components/environments/EnvironmentLabel"; @@ -46,6 +50,44 @@ import { generateFriendlyId } from "~/v3/friendlyIdentifiers"; import { fromPromise } from "neverthrow"; import { BaseService } from "../baseService.server"; import { CURRENT_API_VERSION } from "~/api/versions"; +import type { RunStore } from "@internal/run-store"; +import type { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { controlPlaneResolver as defaultControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; + +// Run-ops scalars read off `alert.taskRun` downstream. The control-plane fields (env type/branch, +// lockedBy file/export, lockedToVersion version) are resolved via the resolver and stitched on +// below, so the run-ops findRun selects scalars only. +const taskRunAlertSelect = { + id: true, + friendlyId: true, + taskIdentifier: true, + taskVersion: true, + sdkVersion: true, + cliVersion: true, + status: true, + number: true, + isTest: true, + createdAt: true, + startedAt: true, + completedAt: true, + idempotencyKey: true, + runTags: true, + machinePreset: true, + error: true, + runtimeEnvironmentId: true, + lockedById: true, + lockedToVersionId: true, +} satisfies Prisma.TaskRunSelect; + +type ResolvedAlertTaskRun = Prisma.Result< + typeof prisma.taskRun, + { select: typeof taskRunAlertSelect }, + "findUniqueOrThrow" +> & { + runtimeEnvironment: { type: RuntimeEnvironmentType; branchName: string | null }; + lockedBy: { filePath: string; exportName: string | null } | null; + lockedToVersion: { version: string } | null; +}; type FoundAlert = Prisma.Result< typeof prisma.projectAlert, @@ -58,18 +100,6 @@ type FoundAlert = Prisma.Result< }; }; environment: true; - taskRun: { - include: { - lockedBy: true; - lockedToVersion: true; - runtimeEnvironment: { - select: { - type: true; - branchName: true; - }; - }; - }; - }; workerDeployment: { include: { worker: { @@ -88,7 +118,9 @@ type FoundAlert = Prisma.Result< }; }, "findUniqueOrThrow" ->; +> & { + taskRun: ResolvedAlertTaskRun | null; +}; class SkipRetryError extends Error {} @@ -98,6 +130,20 @@ type DeploymentIntegrationMetadata = { }; export class DeliverAlertService extends BaseService { + #controlPlaneResolver: ControlPlaneResolver; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: RunStore; + controlPlaneResolver?: ControlPlaneResolver; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#controlPlaneResolver = opts.controlPlaneResolver ?? defaultControlPlaneResolver; + } + public async call(alertId: string) { const alertWithoutRun = await this._prisma.projectAlert.findFirst({ where: { id: alertId }, @@ -133,22 +179,42 @@ export class DeliverAlertService extends BaseService { let taskRun: FoundAlert["taskRun"] = null; if (alertWithoutRun.taskRunId) { - taskRun = await this.runStore.findRun( + const resolvedTaskRun = await this.runStore.findRun( { id: alertWithoutRun.taskRunId }, - { - include: { - lockedBy: true, - lockedToVersion: true, - runtimeEnvironment: { - select: { - type: true, - branchName: true, - }, - }, - }, - }, + { select: taskRunAlertSelect }, this._prisma ); + + if (resolvedTaskRun) { + const env = await this.#controlPlaneResolver.resolveAuthenticatedEnv( + resolvedTaskRun.runtimeEnvironmentId + ); + + if (!env) { + throw new Error( + `Could not resolve environment ${resolvedTaskRun.runtimeEnvironmentId} for alert ${alertId}` + ); + } + + const lockedWorker = await this.#controlPlaneResolver.resolveRunLockedWorker({ + lockedById: resolvedTaskRun.lockedById, + lockedToVersionId: resolvedTaskRun.lockedToVersionId, + }); + + taskRun = { + ...resolvedTaskRun, + runtimeEnvironment: { type: env.type, branchName: env.branchName }, + lockedBy: lockedWorker?.lockedBy + ? { + filePath: lockedWorker.lockedBy.filePath, + exportName: lockedWorker.lockedBy.exportName, + } + : null, + lockedToVersion: lockedWorker?.lockedToVersion + ? { version: lockedWorker.lockedToVersion.version } + : null, + }; + } } const alert: FoundAlert = { ...alertWithoutRun, taskRun }; @@ -686,7 +752,6 @@ export class DeliverAlertService extends BaseService { return; } - // Get the org integration const integration = slackProperties.data.integrationId ? await this._prisma.organizationIntegration.findFirst({ where: { @@ -793,7 +858,6 @@ export class DeliverAlertService extends BaseService { ], }); - // Upsert the storage if (message.ts) { if (storage) { await this._prisma.projectAlertStorage.update({ @@ -969,7 +1033,6 @@ export class DeliverAlertService extends BaseService { const signature = await subtle.sign("HMAC", key, hashPayload); const signatureHex = Buffer.from(signature).toString("hex"); - // Send the webhook to the URL specified in webhook.url const response = await fetch(webhook.url, { method: "POST", headers: { diff --git a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts index 31912c39fd0..460ab5a91ed 100644 --- a/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts +++ b/apps/webapp/app/v3/services/alerts/performTaskRunAlerts.server.ts @@ -1,27 +1,44 @@ +import { type RunStore } from "@internal/run-store"; import { type Prisma, type ProjectAlertChannel } from "@trigger.dev/database"; -import { type prisma } from "~/db.server"; +import { type PrismaClientOrTransaction, type prisma } from "~/db.server"; import { alertsWorker } from "~/v3/alertsWorker.server"; +import type { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { controlPlaneResolver as defaultControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { BaseService } from "../baseService.server"; import { DeliverAlertService } from "./deliverAlert.server"; +// The alert hydration reads only run-ops scalars (id/projectId/runtimeEnvironmentId); the env's +// type (and its parent's) is resolved via the control-plane resolver so the run-ops DB can split +// without a cross-provider join. The prior `lockedBy` + `runtimeEnvironment` includes were unused. type FoundRun = Prisma.Result< typeof prisma.taskRun, - { include: { lockedBy: true; runtimeEnvironment: true } }, + { select: { id: true; projectId: true; runtimeEnvironmentId: true } }, "findUniqueOrThrow" >; export class PerformTaskRunAlertsService extends BaseService { + #controlPlaneResolver: ControlPlaneResolver; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: RunStore; + controlPlaneResolver?: ControlPlaneResolver; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#controlPlaneResolver = opts.controlPlaneResolver ?? defaultControlPlaneResolver; + } + public async call(runId: string) { const run = await this.runStore.findRun( { id: runId }, { - include: { - lockedBy: true, - runtimeEnvironment: { - include: { - parentEnvironment: true, - }, - }, + select: { + id: true, + projectId: true, + runtimeEnvironmentId: true, }, }, this._prisma @@ -31,7 +48,12 @@ export class PerformTaskRunAlertsService extends BaseService { return; } - // Find all the alert channels + const env = await this.#controlPlaneResolver.resolveEnv(run.runtimeEnvironmentId); + + if (!env) { + return; + } + const alertChannels = await this._prisma.projectAlertChannel.findMany({ where: { projectId: run.projectId, @@ -39,7 +61,7 @@ export class PerformTaskRunAlertsService extends BaseService { has: "TASK_RUN", }, environmentTypes: { - has: run.runtimeEnvironment.parentEnvironment?.type ?? run.runtimeEnvironment.type, + has: env.parentEnvironmentType ?? env.type, }, enabled: true, }, diff --git a/apps/webapp/app/v3/services/batchTriggerV3.server.ts b/apps/webapp/app/v3/services/batchTriggerV3.server.ts index ae7fcd83d44..62778778969 100644 --- a/apps/webapp/app/v3/services/batchTriggerV3.server.ts +++ b/apps/webapp/app/v3/services/batchTriggerV3.server.ts @@ -11,16 +11,23 @@ import { isUniqueConstraintError, Prisma, } from "@trigger.dev/database"; +import type { RunStore } from "@internal/run-store"; +import { generateKsuidId, RunId } from "@trigger.dev/core/v3/isomorphic"; import { z } from "zod"; import type { PrismaClientOrTransaction } from "~/db.server"; import { prisma } from "~/db.server"; +import { runStore as defaultRunStore } from "~/v3/runStore.server"; import { env } from "~/env.server"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; import { batchTaskRunItemStatusForRunStatus } from "~/models/taskRun.server"; import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { logger } from "~/services/logger.server"; import { getEntitlement } from "~/services/platform.v3.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { resolveRunIdMintKind, type RunIdMintKind } from "~/v3/engineVersion.server"; +import { resolveInheritedMintKind } from "~/v3/runOpsMigration/resolveInheritedMintKind.server"; +import { mintBatchFriendlyId } from "~/v3/runOpsMigration/mintBatchFriendlyId.server"; import { batchTriggerWorker } from "../batchTriggerWorker.server"; -import { generateFriendlyId } from "../friendlyIdentifiers"; import { legacyRunEngineWorker } from "../legacyRunEngineWorker.server"; import { marqs } from "../marqs/index.server"; import { guardQueueSizeLimitsForEnv } from "../queueSizeLimits.server"; @@ -101,7 +108,15 @@ export class BatchTriggerV3Service extends BaseService { constructor( batchProcessingStrategy?: BatchProcessingStrategy, asyncBatchProcessSizeThreshold: number = ASYNC_BATCH_PROCESS_SIZE_THRESHOLD, - protected readonly _prisma: PrismaClientOrTransaction = prisma + protected readonly _prisma: PrismaClientOrTransaction = prisma, + protected readonly runStore: RunStore = defaultRunStore, + // Injected so tests force the env-default branch deterministically; defaults + // to the live per-env mint resolver. + private readonly resolveMintKind: (environment: { + organizationId: string; + id: string; + orgFeatureFlags?: unknown; + }) => Promise = resolveRunIdMintKind ) { super(_prisma); @@ -123,13 +138,15 @@ export class BatchTriggerV3Service extends BaseService { throw new ServiceValidationError("A batch trigger must have at least one item"); } + // BatchTaskRun.runtimeEnvironmentId no longer has an FK into RuntimeEnvironment; + // validate env existence app-side before any create arm (passthrough when split is off). + await controlPlaneResolver.assertEnvExists(environment.id); + const existingBatch = options.idempotencyKey - ? await this._prisma.batchTaskRun.findFirst({ - where: { - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - }, - }) + ? await this.runStore.findBatchTaskRunByIdempotencyKey( + environment.id, + options.idempotencyKey + ) : undefined; if (existingBatch) { @@ -149,9 +166,10 @@ export class BatchTriggerV3Service extends BaseService { }); // Update the existing batch to remove the idempotency key - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: existingBatch.id }, data: { idempotencyKey: null }, + select: { id: true }, }); // Don't return, just continue with the batch trigger @@ -162,7 +180,14 @@ export class BatchTriggerV3Service extends BaseService { } } - const batchId = generateFriendlyId("batch"); + const { id: batchInternalId, friendlyId: batchId } = await mintBatchFriendlyId({ + environment: { + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }, + parentRunFriendlyId: body.parentRunId, + }); span.setAttribute("batchId", batchId); @@ -202,9 +227,8 @@ export class BatchTriggerV3Service extends BaseService { } } - const runs = await this.#prepareRunData(environment, body); + const runs = await this.#prepareRunData(environment, body, batchId); - // Calculate how many new runs we need to create const newRunCount = runs.filter((r) => !r.isCached).length; if (newRunCount === 0) { @@ -212,19 +236,18 @@ export class BatchTriggerV3Service extends BaseService { batchId, }); - await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: body.items.length, - runIds: runs.map((r) => r.id), - status: "COMPLETED", - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: body.items.length, + runIds: runs.map((r) => r.id), + status: "COMPLETED", + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); return { @@ -265,6 +288,7 @@ export class BatchTriggerV3Service extends BaseService { const batch = await this.#createAndProcessBatchTaskRun( batchId, + batchInternalId, runs, payloadPacket, newRunCount, @@ -319,18 +343,48 @@ export class BatchTriggerV3Service extends BaseService { } } + // Mint a child run's friendlyId so it lands in the SAME physical store as its + // residency anchor. The caller passes the batch's friendlyId, so a ksuid + // (NEW) anchor yields a ksuid (NEW) child and a cuid anchor yields a cuid + // (LEGACY) child. With no anchor it falls back to the env's cutover setting. + // Mirrors RunEngineTriggerTaskService.mintRunFriendlyId. + private async mintChildFriendlyId( + environment: AuthenticatedEnvironment, + anchorFriendlyId?: string + ): Promise { + const mintKind = anchorFriendlyId + ? resolveInheritedMintKind(anchorFriendlyId) + : await this.resolveMintKind({ + organizationId: environment.organizationId, + id: environment.id, + orgFeatureFlags: environment.organization.featureFlags, + }); + + return mintKind === "ksuid" + ? RunId.toFriendlyId(generateKsuidId()) + : RunId.generate().friendlyId; + } + async #prepareRunData( environment: AuthenticatedEnvironment, - body: BatchTriggerTaskV2RequestBody + body: BatchTriggerTaskV2RequestBody, + batchFriendlyId: string ): Promise> { + // Anchor every child to the batch's residency: the batch friendlyId is + // minted once, so deriving each child's id-kind from it — rather than re-resolving + // the per-org flag, which can flip mid-batch — keeps batch + children co-resident. + const childAnchor = batchFriendlyId; + // batchTriggerAndWait cannot have cached runs because that does not work in run engine v1 and is not available in the client if (body?.dependentAttempt) { - return body.items.map((item) => ({ - id: generateFriendlyId("run"), - isCached: false, - idempotencyKey: undefined, - taskIdentifier: item.task, - })); + return Promise.all( + body.items.map(async (item) => ({ + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, + idempotencyKey: undefined, + taskIdentifier: item.task, + })) + ); } // Group items by taskIdentifier @@ -374,42 +428,42 @@ export class BatchTriggerV3Service extends BaseService { ) ).then((results) => results.flat()); - // Now we need to create an array of all the run IDs, in order - // If we have a cached run, that isn't expired, we should use that run ID - // If we have a cached run, that is expired, we should generate a new run ID and save that cached run ID to a set of expired run IDs - // If we don't have a cached run, we should generate a new run ID + // Build the run IDs in order: reuse an unexpired cached id, else mint a new id (and record any + // expired cached id so its idempotency key can be cleared below). const expiredRunIds = new Set(); - const runs = body.items.map((item) => { - const cachedRun = cachedRuns.find((r) => r.idempotencyKey === item.options?.idempotencyKey); + const runs = await Promise.all( + body.items.map(async (item) => { + const cachedRun = cachedRuns.find((r) => r.idempotencyKey === item.options?.idempotencyKey); + + if (cachedRun) { + if (cachedRun.idempotencyKeyExpiresAt && cachedRun.idempotencyKeyExpiresAt < new Date()) { + expiredRunIds.add(cachedRun.friendlyId); - if (cachedRun) { - if (cachedRun.idempotencyKeyExpiresAt && cachedRun.idempotencyKeyExpiresAt < new Date()) { - expiredRunIds.add(cachedRun.friendlyId); + return { + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, + idempotencyKey: item.options?.idempotencyKey ?? undefined, + taskIdentifier: item.task, + }; + } return { - id: generateFriendlyId("run"), - isCached: false, + id: cachedRun.friendlyId, + isCached: true, idempotencyKey: item.options?.idempotencyKey ?? undefined, taskIdentifier: item.task, }; } return { - id: cachedRun.friendlyId, - isCached: true, + id: await this.mintChildFriendlyId(environment, childAnchor), + isCached: false, idempotencyKey: item.options?.idempotencyKey ?? undefined, taskIdentifier: item.task, }; - } - - return { - id: generateFriendlyId("run"), - isCached: false, - idempotencyKey: item.options?.idempotencyKey ?? undefined, - taskIdentifier: item.task, - }; - }); + }) + ); // Expire the cached runs that are no longer valid if (expiredRunIds.size) { @@ -424,6 +478,7 @@ export class BatchTriggerV3Service extends BaseService { async #createAndProcessBatchTaskRun( batchId: string, + batchInternalId: string, runs: Array, payloadPacket: IOPacket, newRunCount: number, @@ -433,21 +488,20 @@ export class BatchTriggerV3Service extends BaseService { dependentAttempt?: TaskRunAttempt ) { if (runs.length <= this._asyncBatchProcessSizeThreshold) { - const batch = await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: runs.length, - runIds: runs.map((r) => r.id), - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: runs.length, + runIds: runs.map((r) => r.id), + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); const result = await this.#processBatchTaskRunItems( @@ -466,42 +520,40 @@ export class BatchTriggerV3Service extends BaseService { error: result.error, }); - await this._prisma.batchTaskRun.update({ - where: { - id: batch.id, - }, + await this.runStore.updateBatchTaskRun({ + where: { id: batch.id }, data: { status: "ABORTED", completedAt: new Date(), }, + select: { id: true }, }); throw result.error; } - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); return batch; } else { - const batch = await this._prisma.batchTaskRun.create({ - data: { - friendlyId: batchId, - runtimeEnvironmentId: environment.id, - idempotencyKey: options.idempotencyKey, - idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, - dependentTaskAttemptId: dependentAttempt?.id, - runCount: body.items.length, - runIds: runs.map((r) => r.id), - payload: payloadPacket.data, - payloadType: payloadPacket.dataType, - options, - batchVersion: "v3", - oneTimeUseToken: options.oneTimeUseToken, - }, + const batch = await this.runStore.createBatchTaskRun({ + id: batchInternalId, + friendlyId: batchId, + runtimeEnvironmentId: environment.id, + idempotencyKey: options.idempotencyKey, + idempotencyKeyExpiresAt: options.idempotencyKeyExpiresAt, + dependentTaskAttemptId: dependentAttempt?.id, + runCount: body.items.length, + runIds: runs.map((r) => r.id), + payload: payloadPacket.data, + payloadType: payloadPacket.dataType, + options, + batchVersion: "v3", + oneTimeUseToken: options.oneTimeUseToken, }); switch (this._batchProcessingStrategy) { @@ -524,11 +576,12 @@ export class BatchTriggerV3Service extends BaseService { count: PROCESSING_BATCH_SIZE, })); - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { processingJobsExpectedCount: ranges.length, }, + select: { id: true }, }); await Promise.all( @@ -594,33 +647,30 @@ export class BatchTriggerV3Service extends BaseService { const $attemptCount = options.attemptCount + 1; - // Add early return if max attempts reached if ($attemptCount > MAX_ATTEMPTS) { logger.error("[BatchTriggerV2][processBatchTaskRun] Max attempts reached", { options, attemptCount: $attemptCount, }); - // You might want to update the batch status to failed here return; } - const batch = await this._prisma.batchTaskRun.findFirst({ - where: { id: options.batchId }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - }, - }); + const batch = await this.runStore.findBatchTaskRunById(options.batchId); if (!batch) { return; } - // Check to make sure the currentIndex is not greater than the runCount + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batch.runtimeEnvironmentId); + if (!environment) { + logger.error("[BatchTriggerV2][processBatchTaskRun] Environment not found", { + batchId: batch.id, + runtimeEnvironmentId: batch.runtimeEnvironmentId, + }); + return; + } + if (options.range.start >= batch.runCount) { logger.debug("[BatchTriggerV2][processBatchTaskRun] currentIndex is greater than runCount", { options, @@ -638,7 +688,7 @@ export class BatchTriggerV3Service extends BaseService { data: batch.payload ?? undefined, dataType: batch.payloadType, }, - batch.runtimeEnvironment + environment ); const payload = await parsePacket(payloadPacket); @@ -659,7 +709,7 @@ export class BatchTriggerV3Service extends BaseService { const result = await this.#processBatchTaskRunItems( batch, - batch.runtimeEnvironment, + environment, options.range.start, options.range.count, $payload, @@ -695,12 +745,12 @@ export class BatchTriggerV3Service extends BaseService { switch (options.strategy) { case "sequential": { - // We can tell if we are done by checking if the result.workingIndex is equal or greater than the runCount + // Done once we've walked past the last item in the batch if (result.workingIndex >= batch.runCount) { - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); logger.debug("[BatchTriggerV2][processBatchTaskRun] Batch processing complete", { @@ -710,7 +760,6 @@ export class BatchTriggerV3Service extends BaseService { attemptCount: $attemptCount, }); } else { - // Requeue the next batch of processing await this.#enqueueBatchTaskRun({ batchId: batch.id, processingId: options.processingId, @@ -726,9 +775,9 @@ export class BatchTriggerV3Service extends BaseService { break; } case "parallel": { - // We need to increment the processingJobsCount and check if we are done + // Each processing job increments the count; the last one to arrive seals the batch const { processingJobsCount, processingJobsExpectedCount } = - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { processingJobsCount: { @@ -742,10 +791,10 @@ export class BatchTriggerV3Service extends BaseService { }); if (processingJobsCount >= processingJobsExpectedCount) { - // Update the batch to be sealed - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { sealed: true, sealedAt: new Date() }, + select: { id: true }, }); logger.debug("[BatchTriggerV2][processBatchTaskRun] Batch processing complete", { @@ -766,7 +815,6 @@ export class BatchTriggerV3Service extends BaseService { items: BatchTriggerTaskV2RequestBody["items"], options?: BatchTriggerTaskServiceOptions ): Promise<{ workingIndex: number; error?: Error }> { - // Grab the next PROCESSING_BATCH_SIZE runIds const runIds = batch.runIds.slice(currentIndex, currentIndex + batchSize); logger.debug("[BatchTriggerV2][processBatchTaskRun] Processing batch items", { @@ -776,7 +824,7 @@ export class BatchTriggerV3Service extends BaseService { runCount: batch.runCount, }); - // Combine the "window" between currentIndex and currentIndex + PROCESSING_BATCH_SIZE with the runId and the item in the payload which is an array + // Pair each runId in this window with its item from the payload array const itemsToProcess = runIds.map((runId, index) => ({ runId, item: items[index + currentIndex], @@ -815,13 +863,14 @@ export class BatchTriggerV3Service extends BaseService { } if (expectedCount > 0) { - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batch.id }, data: { expectedCount: { increment: expectedCount, }, }, + select: { id: true }, }); } @@ -873,12 +922,10 @@ export class BatchTriggerV3Service extends BaseService { if (!result.isCached) { try { - await this._prisma.batchTaskRunItem.create({ - data: { - batchTaskRunId: batch.id, - taskRunId: result.run.id, - status: batchTaskRunItemStatusForRunStatus(result.run.status), - }, + await this.runStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: result.run.id, + status: batchTaskRunItemStatusForRunStatus(result.run.status), }); return true; @@ -953,18 +1000,12 @@ export class BatchTriggerV3Service extends BaseService { export async function tryCompleteBatchV3( batchId: string, tx: PrismaClientOrTransaction, - scheduleResumeOnComplete: boolean + scheduleResumeOnComplete: boolean, + // Threaded in so a ksuid (NEW-resident) batch + its items are read/written on the owning + // store, not the control-plane `tx`. Defaults to the singleton (single-DB = passthrough). + runStore: RunStore = defaultRunStore ) { - const batch = await tx.batchTaskRun.findFirst({ - where: { id: batchId }, - select: { - id: true, - sealed: true, - status: true, - expectedCount: true, - dependentTaskAttemptId: true, - }, - }); + const batch = await runStore.findBatchTaskRunById(batchId); if (!batch) { logger.debug("tryCompleteBatchV3: Batch not found", { batchId }); @@ -981,9 +1022,9 @@ export async function tryCompleteBatchV3( return; } - // Count completed items (read-only, no contention) - const completedCount = await tx.batchTaskRunItem.count({ - where: { batchTaskRunId: batchId, status: "COMPLETED" }, + const completedCount = await runStore.countBatchTaskRunItems({ + batchTaskRunId: batchId, + status: "COMPLETED", }); if (completedCount < batch.expectedCount) { @@ -996,7 +1037,7 @@ export async function tryCompleteBatchV3( } // Mark batch COMPLETED (idempotent via status check) - const updated = await tx.batchTaskRun.updateMany({ + const updated = await runStore.updateManyBatchTaskRun({ where: { id: batchId, status: "PENDING" }, data: { status: "COMPLETED", completedAt: new Date(), completedCount }, }); @@ -1019,7 +1060,10 @@ export async function completeBatchTaskRunItemV3( tx: PrismaClientOrTransaction, scheduleResumeOnComplete = false, taskRunAttemptId?: string, - retryAttempt?: number + retryAttempt?: number, + // Threaded in so a ksuid (NEW-resident) batch's item lands on the owning store; route by + // batchTaskRunId (items co-reside with their batch). Defaults to the singleton. + runStore: RunStore = defaultRunStore ) { const isRetry = retryAttempt !== undefined; @@ -1033,9 +1077,10 @@ export async function completeBatchTaskRunItemV3( }); try { - // Update item to COMPLETED (no transaction needed, no contention) - const updated = await tx.batchTaskRunItem.updateMany({ - where: { id: itemId, status: "PENDING" }, + // Update item to COMPLETED (no transaction needed, no contention). Routed by + // batchTaskRunId so the item write lands on the batch's owning DB. + const updated = await runStore.updateManyBatchTaskRunItems({ + where: { id: itemId, batchTaskRunId, status: "PENDING" }, data: { status: "COMPLETED", taskRunAttemptId }, }); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts new file mode 100644 index 00000000000..df6b74753ee --- /dev/null +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.test.ts @@ -0,0 +1,93 @@ +// Real PG14 (legacy replica) + PG17 (new) proof for the bulk batch read-through adapter. +// We NEVER mock the DB: each closure runs a real `$queryRaw` against the passed container +// (crossing the actual PG14↔PG17 boundary) then filters an in-memory seeded set by id — +// mirroring readThrough.server.test.ts's `realRead`. The only injected fakes are throwing +// spies asserting a store was NEVER touched. +import { heteroPostgresTest } from "@internal/testcontainers"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { hydrateRunsAcrossSeam } from "./BulkActionV2.batchReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid body → LEGACY residency. 27-char body → NEW residency. +const LEGACY_RUN_ID = "run_" + "a".repeat(25); +const NEW_RUN_ID = "run_" + "b".repeat(27); + +type Row = { id: string }; + +// Real read against the given container, then return rows for the ids present in `present`. +async function realReadFiltered( + client: PrismaReplicaClient, + ids: string[], + present: Set +): Promise { + await client.$queryRaw<{ marker: number }[]>`SELECT 1 AS marker`; + return ids.filter((id) => present.has(id)).map((id) => ({ id })); +} + +describe("hydrateRunsAcrossSeam (PG14 legacy replica + PG17 new)", () => { + heteroPostgresTest( + "(a) mixed page: NEW id from new, LEGACY id from legacy replica; new id never hits legacy", + async ({ prisma14, prisma17 }) => { + const onNew = new Set([NEW_RUN_ID]); + const onLegacy = new Set([LEGACY_RUN_ID]); + + const readLegacyReplica = vi.fn( + async (replica: PrismaReplicaClient, ids: string[]): Promise => { + if (ids.includes(NEW_RUN_ID)) { + throw new Error("legacy replica must never be probed for a NEW-residency id"); + } + return realReadFiltered(replica, ids, onLegacy); + } + ); + + const rows = await hydrateRunsAcrossSeam({ + runIds: [NEW_RUN_ID, LEGACY_RUN_ID], + readNew: (client, ids) => realReadFiltered(client, ids, onNew), + readLegacyReplica, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + }, + }); + + const ids = rows.map((r) => r.id).sort(); + expect(ids).toEqual([LEGACY_RUN_ID, NEW_RUN_ID].sort()); + expect(readLegacyReplica).toHaveBeenCalledTimes(1); + // legacy was only probed for the legacy id + expect(readLegacyReplica.mock.calls[0][1]).toEqual([LEGACY_RUN_ID]); + } + ); + + heteroPostgresTest( + "(c) passthrough: splitEnabled false reads only the single client; legacy never touched", + async ({ prisma14, prisma17 }) => { + const onNew = new Set([NEW_RUN_ID, LEGACY_RUN_ID]); + const throwingLegacy = vi.fn(async (): Promise => { + throw new Error("readLegacyReplica must never run in single-DB mode"); + }); + const readNew = vi.fn((client: PrismaReplicaClient, ids: string[]) => + realReadFiltered(client, ids, onNew) + ); + + const rows = await hydrateRunsAcrossSeam({ + runIds: [NEW_RUN_ID, LEGACY_RUN_ID], + readNew, + readLegacyReplica: throwingLegacy, + deps: { + splitEnabled: false, + // single collapsed store (use prisma17 here as the "new"/primary analog) + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + }, + }); + + const ids = rows.map((r) => r.id).sort(); + expect(ids).toEqual([LEGACY_RUN_ID, NEW_RUN_ID].sort()); + expect(readNew).toHaveBeenCalledTimes(1); + expect(throwingLegacy).not.toHaveBeenCalled(); + } + ); +}); diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts new file mode 100644 index 00000000000..05e2965287e --- /dev/null +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.batchReadThrough.server.ts @@ -0,0 +1,108 @@ +/** + * Batch adapter over the per-id `readThroughRun` (see + * `~/v3/runOpsMigration/readThrough.server.ts`). A bulk action processes a PAGE of + * member run ids at once, so instead of N per-id round trips this reproduces the + * per-id read-through ordering as SET reads: + * + * 1. single-DB passthrough (splitEnabled === false): ONE read against the collapsed + * store, no residency classification, no legacy probe. + * 2. split on: classify each id's residency via `ownerEngine`, read NEW for every id + * that could be on new (residency NEW *and* legacy-candidates — read-through is + * new-FIRST for legacy too), then probe the LEGACY READ REPLICA ONLY for the + * legacy-candidates the new read missed. + * + * Like the per-id layer this NEVER touches a legacy primary/writer — there is no such + * handle. An id is read from new OR legacy, never both: legacy is only probed for ids + * new missed, so the returned set needs no dedupe. + */ +import type { PrismaReplicaClient } from "~/db.server"; +import { + runOpsLegacyReplica as defaultLegacyReplica, + runOpsNewReplica as defaultNewClient, +} from "~/db.server"; +import { ownerEngine, UnclassifiableRunId } from "@trigger.dev/core/v3/isomorphic"; + +export type SeamReadDeps = { + /** + * Resolved boot constant. REQUIRED here — the caller resolves it once per + * request via `isSplitEnabled()`; this adapter never awaits it itself. + */ + splitEnabled: boolean; + newClient?: PrismaReplicaClient; + legacyReplica?: PrismaReplicaClient; + logger?: { warn: (m: string, meta?: unknown) => void }; +}; + +type HydrateRunsAcrossSeamInput = { + runIds: string[]; + readNew: (client: PrismaReplicaClient, ids: string[]) => Promise; + readLegacyReplica: (replica: PrismaReplicaClient, ids: string[]) => Promise; + deps: SeamReadDeps; +}; + +/** Every row shape we hydrate carries an `id` (CANCEL select includes it; REPLAY is a full row). */ +function getId(row: unknown): string { + return (row as { id: string }).id; +} + +export async function hydrateRunsAcrossSeam(input: HydrateRunsAcrossSeamInput): Promise { + const { runIds, deps } = input; + + if (runIds.length === 0) { + return []; + } + + const newClient = deps.newClient ?? defaultNewClient; + + // Passthrough: one plain read against the single collapsed store. No residency + // classification, no legacy probe, no second connection. When the caller passes its + // own `_replica` as `newClient`, this is byte-identical to the pre-migration single-DB read. + if (deps.splitEnabled === false) { + return input.readNew(newClient, runIds); + } + + // Split is on. Classify residency; unclassifiable → LEGACY (probe rather than drop). + const newIds: string[] = []; + const legacyCandidateIds: string[] = []; + for (const runId of runIds) { + let residency: "LEGACY" | "NEW"; + try { + residency = ownerEngine(runId); + } catch (e) { + if (e instanceof UnclassifiableRunId) { + deps.logger?.warn("hydrateRunsAcrossSeam: UnclassifiableRunId, treating as LEGACY", { + runId, + valueLength: e.valueLength, + }); + residency = "LEGACY"; + } else { + throw e; + } + } + if (residency === "NEW") { + newIds.push(runId); + } else { + legacyCandidateIds.push(runId); + } + } + + // Read NEW for everything that could be on new — NEW-residency ids AND legacy-candidates + // (read-through is new-FIRST for legacy too) — in one read. + const legacyReplica = deps.legacyReplica ?? defaultLegacyReplica; + const newRows = await input.readNew(newClient, [...newIds, ...legacyCandidateIds]); + const foundOnNew = new Set(newRows.map(getId)); + + // Legacy-candidates the new read missed are probed on the legacy read replica. + const legacyToProbe = legacyCandidateIds.filter((id) => !foundOnNew.has(id)); + + // Legacy READ REPLICA only — never a legacy writer/primary (no such handle exists). + // A member absent from both DBs is simply not hydrated (matching today's `findMany`, + // where a missing id yields no row). + let legacyRows: T[] = []; + if (legacyToProbe.length > 0) { + legacyRows = await input.readLegacyReplica(legacyReplica, legacyToProbe); + } + + // Order within the page is irrelevant (downstream pMap does not depend on it). + return [...newRows, ...legacyRows]; +} diff --git a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts index d03ab71796f..093c9408eb7 100644 --- a/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts +++ b/apps/webapp/app/v3/services/bulk/BulkActionV2.server.ts @@ -25,6 +25,9 @@ import parseDuration from "parse-duration"; import { v3BulkActionPath } from "~/utils/pathBuilder"; import { formatDateTime } from "~/components/primitives/DateTime"; import pMap from "p-map"; +import { type PrismaReplicaClient } from "~/db.server"; +import { isSplitEnabled } from "~/v3/runOpsMigration/splitMode.server"; +import { hydrateRunsAcrossSeam, type SeamReadDeps } from "./BulkActionV2.batchReadThrough.server"; export type ProcessToCompletionOptions = { /** Absolute timestamp (ms) after which processing stops and returns incomplete. */ @@ -36,6 +39,20 @@ export type ProcessToCompletionResult = { }; export class BulkActionService extends BaseService { + #splitEnabledPromise?: Promise; + + // Resolves split mode once per service instance and returns the read-through deps for + // bulk member hydration. Single-DB: read through the service replica (byte-identical to + // the pre-migration read). Split: adapter defaults to run-ops new + legacy read replica. + async #seamReadDeps(): Promise { + this.#splitEnabledPromise ??= isSplitEnabled(); + const splitEnabled = await this.#splitEnabledPromise; + return { + splitEnabled, + newClient: splitEnabled ? undefined : (this._replica as unknown as PrismaReplicaClient), + }; + } + public async create( organizationId: string, projectId: string, @@ -218,7 +235,6 @@ export class BulkActionService extends BaseService { prisma: this._replica as PrismaClient, }); - // In the future we can support multiple query names, when we make changes if (group.queryName !== "bulk_action_v1") { throw new Error(`Bulk action group has invalid query name: ${group.queryName}`); } @@ -246,25 +262,37 @@ export class BulkActionService extends BaseService { case BulkActionType.CANCEL: { const cancelService = new CancelTaskRunService(this._prisma); - const runs = await this.runStore.findRuns( - { - where: { - id: { - in: runIdsToProcess, + const seamDeps = await this.#seamReadDeps(); + const runs = await hydrateRunsAcrossSeam({ + runIds: runIdsToProcess, + readNew: (client, ids) => + client.taskRun.findMany({ + where: { id: { in: ids } }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, }, - }, - select: { - id: true, - engine: true, - friendlyId: true, - status: true, - createdAt: true, - completedAt: true, - taskEventStore: true, - }, - }, - this._replica - ); + }), + readLegacyReplica: (replica, ids) => + replica.taskRun.findMany({ + where: { id: { in: ids } }, + select: { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, + }, + }), + deps: seamDeps, + }); await pMap( runs, @@ -300,16 +328,14 @@ export class BulkActionService extends BaseService { case BulkActionType.REPLAY: { const replayService = new ReplayTaskRunService(this._prisma); - const runs = await this.runStore.findRuns( - { - where: { - id: { - in: runIdsToProcess, - }, - }, - }, - this._replica - ); + const seamDeps = await this.#seamReadDeps(); + const runs = await hydrateRunsAcrossSeam({ + runIds: runIdsToProcess, + readNew: (client, ids) => client.taskRun.findMany({ where: { id: { in: ids } } }), + readLegacyReplica: (replica, ids) => + replica.taskRun.findMany({ where: { id: { in: ids } } }), + deps: seamDeps, + }); await pMap( runs, diff --git a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts index c1562275e58..3575a750521 100644 --- a/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts +++ b/apps/webapp/app/v3/services/cancelDevSessionRuns.server.ts @@ -1,9 +1,12 @@ +import { type RunStore } from "@internal/run-store"; import { z } from "zod"; +import { type PrismaClientOrTransaction } from "~/db.server"; import { findLatestSession } from "~/models/runtimeEnvironment.server"; import { logger } from "~/services/logger.server"; import { commonWorker } from "../commonWorker.server"; +import { type ReadThroughDeps, readThroughRun } from "../runOpsMigration/readThrough.server"; import { BaseService } from "./baseService.server"; -import { CancelTaskRunService } from "./cancelTaskRun.server"; +import { type CancelableTaskRun, CancelTaskRunService } from "./cancelTaskRun.server"; export const CancelDevSessionRunsServiceOptions = z.object({ runIds: z.array(z.string()), @@ -15,6 +18,23 @@ export const CancelDevSessionRunsServiceOptions = z.object({ export type CancelDevSessionRunsServiceOptions = z.infer; export class CancelDevSessionRunsService extends BaseService { + // Injectable read-through deps for the run-ops TaskRun read. Undefined in production: + // readThroughRun then uses its ~/db.server singleton handles and the boot split flag, + // so single-DB is unchanged. Tests inject the hetero new/legacy handles + splitEnabled. + readonly #readThroughDeps?: ReadThroughDeps; + + constructor( + opts: { + prisma?: PrismaClientOrTransaction; + replica?: PrismaClientOrTransaction; + runStore?: RunStore; + readThroughDeps?: ReadThroughDeps; + } = {} + ) { + super(opts.prisma, opts.replica, opts.runStore); + this.#readThroughDeps = opts.readThroughDeps; + } + public async call(options: CancelDevSessionRunsServiceOptions) { const cancelledSession = options.cancelledSessionId ? await this._prisma.runtimeEnvironmentSession.findFirst({ @@ -23,7 +43,7 @@ export class CancelDevSessionRunsService extends BaseService { : undefined; if (cancelledSession) { - const latestSession = await findLatestSession(cancelledSession.environmentId); + const latestSession = await findLatestSession(cancelledSession.environmentId, this._replica); if ( latestSession && @@ -49,12 +69,17 @@ export class CancelDevSessionRunsService extends BaseService { const cancelTaskRunService = new CancelTaskRunService(); + // readThroughRun resolves residency from the run id alone; an env scope is only + // available when a cancelled session was resolved. + const environmentId = cancelledSession?.environmentId ?? ""; + for (const runId of options.runIds) { await this.#cancelInProgressRun( runId, cancelTaskRunService, options.cancelledAt, - options.reason + options.reason, + environmentId ); } } @@ -63,18 +88,53 @@ export class CancelDevSessionRunsService extends BaseService { runId: string, service: CancelTaskRunService, cancelledAt: Date, - reason: string + reason: string, + environmentId: string ) { logger.debug("Cancelling in progress run", { runId }); - const taskRun = runId.startsWith("run_") - ? await this.runStore.findRun({ friendlyId: runId }, this._prisma) - : await this.runStore.findRun({ id: runId }, this._prisma); + // Read-through: new store first, legacy read replica for an old + // in-retention run; single plain read in single-DB passthrough. + const where = runId.startsWith("run_") ? { friendlyId: runId } : { id: runId }; + + const result = await readThroughRun({ + runId, + environmentId, + readNew: (client) => + client.taskRun.findFirst({ + where, + select: { + id: true, + engine: true, + status: true, + friendlyId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }), + readLegacy: (replica) => + replica.taskRun.findFirst({ + where, + select: { + id: true, + engine: true, + status: true, + friendlyId: true, + taskEventStore: true, + createdAt: true, + completedAt: true, + }, + }), + deps: this.#readThroughDeps, + }); - if (!taskRun) { + if (result.source === "not-found" || result.source === "past-retention") { return; } + const taskRun = result.value; + try { await service.call(taskRun, { reason, cancelAttempts: true, cancelledAt }); } catch (e) { diff --git a/apps/webapp/app/v3/services/createCheckpoint.server.ts b/apps/webapp/app/v3/services/createCheckpoint.server.ts index e9e4e3d5560..43ff25f2053 100644 --- a/apps/webapp/app/v3/services/createCheckpoint.server.ts +++ b/apps/webapp/app/v3/services/createCheckpoint.server.ts @@ -146,14 +146,12 @@ export class CreateCheckpointService extends BaseService { break; } case "WAIT_FOR_BATCH": { - const batchRun = await this._prisma.batchTaskRun.findFirst({ - where: { - friendlyId: reason.batchFriendlyId, - }, - select: { - resumedAt: true, - }, - }); + // Routed by friendlyId so a ksuid (NEW-resident) batch is found on the owning DB; + // env-scoped to the dependent attempt's run (a batch shares its dependent's env). + const batchRun = await this.runStore.findBatchTaskRunByFriendlyId( + reason.batchFriendlyId, + attempt.taskRun.runtimeEnvironmentId + ); if (!batchRun) { logger.error("CreateCheckpointService: Pre-check - Batch not found", { @@ -363,15 +361,12 @@ export class CreateCheckpointService extends BaseService { }); await marqs?.cancelHeartbeat(attempt.taskRunId); - const batchRun = await this._prisma.batchTaskRun.findFirst({ - select: { - id: true, - batchVersion: true, - }, - where: { - friendlyId: reason.batchFriendlyId, - }, - }); + // Routed by friendlyId so a ksuid (NEW-resident) batch is found on the owning DB; + // env-scoped to the dependent attempt's run (a batch shares its dependent's env). + const batchRun = await this.runStore.findBatchTaskRunByFriendlyId( + reason.batchFriendlyId, + attempt.taskRun.runtimeEnvironmentId + ); if (!batchRun) { logger.error("CreateCheckpointService: Batch not found", { diff --git a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts index cb66166d104..094e75c9a11 100644 --- a/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts +++ b/apps/webapp/app/v3/services/createTaskRunAttempt.server.ts @@ -15,6 +15,7 @@ import { BaseService, ServiceValidationError } from "./baseService.server"; import { CrashTaskRunService } from "./crashTaskRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { runStore } from "../runStore.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; export class CreateTaskRunAttemptService extends BaseService { public async call({ @@ -62,19 +63,6 @@ export class CreateTaskRunAttemptService extends BaseService { number: "desc", }, }, - lockedBy: { - include: { - worker: { - select: { - id: true, - version: true, - sdkVersion: true, - cliVersion: true, - supportsLazyAttempts: true, - }, - }, - }, - }, batchItems: { include: { batchTaskRun: { @@ -108,18 +96,16 @@ export class CreateTaskRunAttemptService extends BaseService { throw new ServiceValidationError("Task run is already finished", 400); } - const lockedBy = taskRun.lockedBy; + const lockedWorker = await controlPlaneResolver.resolveRunLockedWorker({ + lockedById: taskRun.lockedById, + }); + const lockedBy = lockedWorker?.lockedBy; if (!lockedBy) { throw new ServiceValidationError("Task run is not locked", 400); } - const queue = await findQueueInEnvironment( - taskRun.queue, - environment.id, - lockedBy.id, - lockedBy - ); + const queue = await findQueueInEnvironment(taskRun.queue, environment.id, lockedBy.id); if (!queue) { throw new ServiceValidationError("Queue not found", 404); @@ -275,13 +261,8 @@ async function getAuthenticatedEnvironmentFromRun( friendlyId: isFriendlyId ? friendlyId : undefined, }, { - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, + select: { + runtimeEnvironmentId: true, }, }, prismaClient ?? prisma @@ -291,5 +272,7 @@ async function getAuthenticatedEnvironmentFromRun( return; } - return taskRun?.runtimeEnvironment; + return ( + (await controlPlaneResolver.resolveAuthenticatedEnv(taskRun.runtimeEnvironmentId)) ?? undefined + ); } diff --git a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts index 2e9d86916c0..9b78622a057 100644 --- a/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts +++ b/apps/webapp/app/v3/services/enqueueDelayedRun.server.ts @@ -5,6 +5,7 @@ import { commonWorker } from "../commonWorker.server"; import { BaseService } from "./baseService.server"; import { enqueueRun } from "./enqueueRun.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { isV3Disabled } from "../engineDeprecation.server"; export class EnqueueDelayedRunService extends BaseService { @@ -39,12 +40,6 @@ export class EnqueueDelayedRunService extends BaseService { }, { include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, dependency: { include: { dependentBatchRun: { @@ -82,6 +77,13 @@ export class EnqueueDelayedRunService extends BaseService { return; } + const env = await controlPlaneResolver.resolveAuthenticatedEnv(run.runtimeEnvironmentId); + + if (!env) { + logger.debug("EnqueueDelayedRunService: environment not found", { runId }); + return; + } + if (run.status !== "DELAYED") { logger.debug("Delayed run cannot be enqueued because it's not in DELAYED status", { run, @@ -109,7 +111,7 @@ export class EnqueueDelayedRunService extends BaseService { } await enqueueRun({ - env: run.runtimeEnvironment, + env, run: run, dependentRun: run.dependency?.dependentAttempt?.taskRun ?? diff --git a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts index b6c25db43ed..f8d6dcc6555 100644 --- a/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts +++ b/apps/webapp/app/v3/services/executeTasksWaitingForDeploy.ts @@ -1,3 +1,4 @@ +import { isClassifiable, ownerEngine } from "@trigger.dev/core/v3/isomorphic"; import { env } from "~/env.server"; import { logger } from "~/services/logger.server"; import { marqs } from "~/v3/marqs/index.server"; @@ -69,11 +70,27 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { return; } - // Clear any runs awaiting deployment for execution + // Defense-in-depth: the open-predicate findRuns fan-out can select runs from + // either DB, but the status flip below is a single control-plane updateMany. A + // ksuid (NEW-resident) run can only reach WAITING_FOR_DEPLOY via a misconfiguration + // (it is a V1/cuid-only status — V2 uses PENDING_VERSION). Surface it loudly rather + // than silently strand the run, and only mutate the LEGACY-resident runs the + // control-plane client can actually reach. + const newResidentRuns = runsWaitingForDeploy.filter( + (run) => isClassifiable(run.id) && ownerEngine(run.id) === "NEW" + ); + if (newResidentRuns.length) { + logger.error( + "WAITING_FOR_DEPLOY selected NEW-resident runs; skipping their control-plane status flip", + { runIds: newResidentRuns.map((run) => run.id) } + ); + } + const legacyRuns = runsWaitingForDeploy.filter((run) => !newResidentRuns.includes(run)); + const pendingRuns = await this._prisma.taskRun.updateMany({ where: { id: { - in: runsWaitingForDeploy.map((run) => run.id), + in: legacyRuns.map((run) => run.id), }, }, data: { @@ -83,12 +100,14 @@ export class ExecuteTasksWaitingForDeployService extends BaseService { if (pendingRuns.count) { logger.debug("Task runs waiting for deploy are now ready for execution", { - tasks: runsWaitingForDeploy.map((run) => run.id), + tasks: legacyRuns.map((run) => run.id), total: pendingRuns.count, }); } - for (const run of runsWaitingForDeploy) { + // Only enqueue the runs whose status was actually flipped (the legacy set) — never + // marqs-enqueue a NEW-resident run we couldn't transition out of WAITING_FOR_DEPLOY. + for (const run of legacyRuns) { await marqs?.enqueueMessage( backgroundWorker.runtimeEnvironment, run.queue, diff --git a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts index 809c1bb49e3..06b4db57ec4 100644 --- a/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts +++ b/apps/webapp/app/v3/services/expireEnqueuedRun.server.ts @@ -5,6 +5,7 @@ import { BaseService } from "./baseService.server"; import { FinalizeTaskRunService } from "./finalizeTaskRun.server"; import { tryCatch } from "@trigger.dev/core/utils"; import { getEventRepositoryForStore } from "../eventRepository/index.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; import { isV3Disabled } from "../engineDeprecation.server"; export class ExpireEnqueuedRunService extends BaseService { @@ -29,13 +30,24 @@ export class ExpireEnqueuedRunService extends BaseService { id: runId, }, { - include: { - runtimeEnvironment: { - include: { - organization: true, - project: true, - }, - }, + select: { + id: true, + status: true, + engine: true, + lockedAt: true, + ttl: true, + taskEventStore: true, + runtimeEnvironmentId: true, + friendlyId: true, + traceId: true, + spanId: true, + parentSpanId: true, + createdAt: true, + completedAt: true, + taskIdentifier: true, + projectId: true, + organizationId: true, + isTest: true, }, }, this._prisma @@ -55,6 +67,13 @@ export class ExpireEnqueuedRunService extends BaseService { return; } + const env = await controlPlaneResolver.resolveEnv(run.runtimeEnvironmentId); + + if (!env) { + logger.debug("ExpireEnqueuedRunService: environment not found", { runId }); + return; + } + if (run.status !== "PENDING") { logger.debug("Run cannot be expired because it's not in PENDING status", { run, @@ -90,7 +109,7 @@ export class ExpireEnqueuedRunService extends BaseService { const eventRepository = await getEventRepositoryForStore( run.taskEventStore, - run.runtimeEnvironment.organization.id + env.organizationId ); if (run.ttl) { diff --git a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts index d6d35d0e4ca..1443a6b7a0d 100644 --- a/apps/webapp/app/v3/services/finalizeTaskRun.server.ts +++ b/apps/webapp/app/v3/services/finalizeTaskRun.server.ts @@ -19,6 +19,7 @@ import { completeBatchTaskRunItemV3 } from "./batchTriggerV3.server"; import { ExpireEnqueuedRunService } from "./expireEnqueuedRun.server"; import { ResumeBatchRunService } from "./resumeBatchRun.server"; import { ResumeDependentParentsService } from "./resumeDependentParents.server"; +import { controlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; type BaseInput = { id: string; @@ -92,10 +93,8 @@ export class FinalizeTaskRunService extends BaseService { } } - // I moved the error update here for two reasons: - // - A single update is more efficient than two - // - If the status updates to a final status, realtime will receive that status and then shut down the stream - // before the error is updated, which would cause the error to be lost + // Error is written in the same update as the status: a separate later write races realtime, + // which shuts the stream down on the final status before the error lands, losing it. const taskRunError = error ? sanitizeError(error) : undefined; const run = await this._prisma.taskRun.update({ @@ -131,7 +130,6 @@ export class FinalizeTaskRunService extends BaseService { }); } - //resume any dependencies const resumeService = new ResumeDependentParentsService(this._prisma); const result = await resumeService.call({ id: run.id }); @@ -144,7 +142,6 @@ export class FinalizeTaskRunService extends BaseService { }); } - //enqueue alert if (isFailedRunStatus(run.status)) { await PerformTaskRunAlertsService.enqueue(run.id); } @@ -157,22 +154,23 @@ export class FinalizeTaskRunService extends BaseService { { select: { id: true, - lockedToVersion: { - select: { - supportsLazyAttempts: true, - }, - }, - runtimeEnvironment: { - select: { - type: true, - }, - }, + runtimeEnvironmentId: true, + lockedToVersionId: true, }, }, this._prisma ); - if (extendedRun && extendedRun.runtimeEnvironment.type !== "DEVELOPMENT") { + const extendedEnv = extendedRun + ? await controlPlaneResolver.resolveEnv(extendedRun.runtimeEnvironmentId) + : null; + const extendedLockedWorker = extendedRun + ? await controlPlaneResolver.resolveRunLockedWorker({ + lockedToVersionId: extendedRun.lockedToVersionId, + }) + : null; + + if (extendedRun && extendedEnv && extendedEnv.type !== "DEVELOPMENT") { logger.warn("FinalizeTaskRunService: Fatal status, requesting worker exit", { runId: run.id, status: run.status, @@ -183,7 +181,9 @@ export class FinalizeTaskRunService extends BaseService { version: "v1", runId: run.id, // Give the run a few seconds to exit to complete any flushing etc - delayInMs: extendedRun.lockedToVersion?.supportsLazyAttempts ? 5_000 : undefined, + delayInMs: extendedLockedWorker?.lockedToVersion?.supportsLazyAttempts + ? 5_000 + : undefined, }); } } @@ -247,7 +247,6 @@ export class FinalizeTaskRunService extends BaseService { await completeBatchTaskRunItemV3(item.id, item.batchTaskRunId, this._prisma); } else { // THIS IS DEPRECATED and only happens with batchVersion != v3 - // Update the item to complete await this._prisma.batchTaskRunItem.update({ where: { id: item.id, diff --git a/apps/webapp/app/v3/services/resumeBatchRun.server.ts b/apps/webapp/app/v3/services/resumeBatchRun.server.ts index fb3e24d1340..a7e42407d34 100644 --- a/apps/webapp/app/v3/services/resumeBatchRun.server.ts +++ b/apps/webapp/app/v3/services/resumeBatchRun.server.ts @@ -3,34 +3,31 @@ import { commonWorker } from "../commonWorker.server"; import { marqs } from "~/v3/marqs/index.server"; import { BaseService } from "./baseService.server"; import { logger } from "~/services/logger.server"; -import type { BatchTaskRun } from "@trigger.dev/database"; +import type { BatchTaskRun, Prisma } from "@trigger.dev/database"; +import { findEnvironmentById } from "~/models/runtimeEnvironment.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; import { workerQueue } from "~/services/worker.server"; import { isV3Disabled } from "../engineDeprecation.server"; const finishedBatchRunStatuses = ["COMPLETED", "FAILED", "CANCELED"]; -type RetrieveBatchRunResult = NonNullable>>; +const BATCH_RUN_INCLUDE = { + items: { + select: { + status: true, + taskRunAttemptId: true, + }, + }, +} satisfies Prisma.BatchTaskRunInclude; + +type RetrieveBatchRunResult = Prisma.BatchTaskRunGetPayload<{ + include: typeof BATCH_RUN_INCLUDE; +}>; export class ResumeBatchRunService extends BaseService { public async call(batchRunId: string) { - const batchRun = await this._prisma.batchTaskRun.findFirst({ - where: { - id: batchRunId, - }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - items: { - select: { - status: true, - taskRunAttemptId: true, - }, - }, - }, + const batchRun = await this.runStore.findBatchTaskRunById(batchRunId, { + include: BATCH_RUN_INCLUDE, }); if (!batchRun) { @@ -44,8 +41,21 @@ export class ResumeBatchRunService extends BaseService { return "ERROR"; } + // BatchTaskRun -> RuntimeEnvironment FK is dropped; resolve the env from the scalar id. + const environment = await findEnvironmentById(batchRun.runtimeEnvironmentId); + if (!environment) { + logger.error("ResumeBatchRunService: Environment not found", { + batchRunId, + runtimeEnvironmentId: batchRun.runtimeEnvironmentId, + }); + + return "ERROR"; + } + // v3 (engine V1) shutdown: don't resume batches for abandoned V1 projects. v4 is unaffected. - if (isV3Disabled() && batchRun.runtimeEnvironment.project.engine === "V1") { + // The BatchTaskRun -> RuntimeEnvironment relation is dropped, so read the engine from the + // resolved environment's project rather than the unloaded batchRun.runtimeEnvironment relation. + if (isV3Disabled() && environment.project.engine === "V1") { logger.debug("[ResumeBatchRunService] Skipping resume for shut-down v3 batch", { batchRunId, }); @@ -53,13 +63,13 @@ export class ResumeBatchRunService extends BaseService { } if (batchRun.batchVersion === "v3") { - return await this.#handleV3BatchRun(batchRun); + return await this.#handleV3BatchRun(batchRun, environment); } else { - return await this.#handleLegacyBatchRun(batchRun); + return await this.#handleLegacyBatchRun(batchRun, environment); } } - async #handleV3BatchRun(batchRun: RetrieveBatchRunResult) { + async #handleV3BatchRun(batchRun: RetrieveBatchRunResult, environment: AuthenticatedEnvironment) { // V3 batch runs should already be complete by the time this is called if (batchRun.status !== "COMPLETED") { logger.debug("ResumeBatchRunService: Batch run is already completed", { @@ -82,10 +92,17 @@ export class ResumeBatchRunService extends BaseService { return "ERROR"; } - return await this.#handleDependentTaskAttempt(batchRun, batchRun.dependentTaskAttemptId); + return await this.#handleDependentTaskAttempt( + batchRun, + batchRun.dependentTaskAttemptId, + environment + ); } - async #handleLegacyBatchRun(batchRun: RetrieveBatchRunResult) { + async #handleLegacyBatchRun( + batchRun: RetrieveBatchRunResult, + environment: AuthenticatedEnvironment + ) { if (batchRun.status === "COMPLETED") { logger.debug("ResumeBatchRunService: Batch run is already completed", { batchRunId: batchRun.id, @@ -99,7 +116,6 @@ export class ResumeBatchRunService extends BaseService { } if (batchRun.batchVersion === "v2") { - // Make sure batchRun.items.length is equal to or greater than batchRun.runCount if (batchRun.items.length < batchRun.runCount) { logger.debug("ResumeBatchRunService: All items aren't yet completed [v2]", { batchRunId: batchRun.id, @@ -128,26 +144,32 @@ export class ResumeBatchRunService extends BaseService { } // If we are in development, or there is no dependent attempt, we can just mark the batch as completed and return - if (batchRun.runtimeEnvironment.type === "DEVELOPMENT" || !batchRun.dependentTaskAttemptId) { + if (environment.type === "DEVELOPMENT" || !batchRun.dependentTaskAttemptId) { // We need to update the batchRun status so we don't resume it again - await this._prisma.batchTaskRun.update({ + await this.runStore.updateBatchTaskRun({ where: { id: batchRun.id, }, data: { status: "COMPLETED", }, + select: { id: true }, }); return "COMPLETED"; } - return await this.#handleDependentTaskAttempt(batchRun, batchRun.dependentTaskAttemptId); + return await this.#handleDependentTaskAttempt( + batchRun, + batchRun.dependentTaskAttemptId, + environment + ); } async #handleDependentTaskAttempt( batchRun: RetrieveBatchRunResult, - dependentTaskAttemptId: string + dependentTaskAttemptId: string, + environment: AuthenticatedEnvironment ) { const dependentTaskAttempt = await this._prisma.taskRunAttempt.findFirst({ where: { @@ -179,7 +201,6 @@ export class ResumeBatchRunService extends BaseService { } // This batch has a dependent attempt and just finalized, we should resume that attempt - const environment = batchRun.runtimeEnvironment; const dependentRun = dependentTaskAttempt.taskRun; if (dependentTaskAttempt.status === "PAUSED" && batchRun.checkpointEventId) { @@ -298,7 +319,7 @@ export class ResumeBatchRunService extends BaseService { async #setBatchToResumedOnce(batchRun: BatchTaskRun) { // v3 batches don't use the status for deciding whether a batch has been resumed if (batchRun.batchVersion === "v3") { - const result = await this._prisma.batchTaskRun.updateMany({ + const result = await this.runStore.updateManyBatchTaskRun({ where: { id: batchRun.id, resumedAt: null, @@ -308,16 +329,14 @@ export class ResumeBatchRunService extends BaseService { }, }); - // Check if any records were updated if (result.count > 0) { - // The status was changed, so we return true return true; } else { return false; } } - const result = await this._prisma.batchTaskRun.updateMany({ + const result = await this.runStore.updateManyBatchTaskRun({ where: { id: batchRun.id, status: { @@ -329,9 +348,7 @@ export class ResumeBatchRunService extends BaseService { }, }); - // Check if any records were updated if (result.count > 0) { - // The status was changed, so we return true return true; } else { return false; @@ -380,25 +397,3 @@ export class ResumeBatchRunService extends BaseService { } } } - -async function retrieveBatchRun(id: string, prisma: PrismaClientOrTransaction) { - return await prisma.batchTaskRun.findFirst({ - where: { - id, - }, - include: { - runtimeEnvironment: { - include: { - project: true, - organization: true, - }, - }, - items: { - select: { - status: true, - taskRunAttemptId: true, - }, - }, - }, - }); -} diff --git a/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts new file mode 100644 index 00000000000..d5a4f0ccda9 --- /dev/null +++ b/apps/webapp/test/batchTriggerV3ResidencyInheritance.test.ts @@ -0,0 +1,109 @@ +import { describe, expect, it, vi } from "vitest"; + +// Module-level db wiring is imported transitively by the service file. The mint +// helper under test never touches the DB (it is driven with injected deps), so +// these empty singletons only satisfy the import graph — same boundary pattern +// as triggerTask.server.test.ts and runEngineBatchTriggerStoreRouting.test.ts. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); + +import { BatchId, generateKsuidId, ownerEngine, RunId } from "@trigger.dev/core/v3/isomorphic"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; +import { BatchTriggerV3Service } from "~/v3/services/batchTriggerV3.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +const CUID_LEN = 25; +const KSUID_LEN = 27; + +// Minimal AuthenticatedEnvironment — only the fields the mint path reads +// (organizationId, id, organization.featureFlags) need to be real. A root batch +// (no parentRunId) with no ksuid override mints cuid, which is the env-default +// branch we assert on below. +function fakeEnv(): AuthenticatedEnvironment { + return { + id: "env_123", + organizationId: "org_123", + organization: { featureFlags: {} }, + } as unknown as AuthenticatedEnvironment; +} + +// Build the service with resolveMintKind forced to "cuid" (its production default +// when split is off / org not cut over), proving the CHILD branch overrides the env +// default purely from the parent's id-shape. +function buildService() { + return new BatchTriggerV3Service(undefined, undefined, {} as any, {} as any, async () => "cuid"); +} + +describe("BatchTriggerV3Service child-residency inheritance", () => { + it("a ksuid parent yields ksuid (NEW) child friendlyIds", async () => { + const service = buildService(); + const parentFriendlyId = RunId.toFriendlyId( + // 27-char ksuid internal id → NEW residency parent + "a".repeat(KSUID_LEN) + ); + expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("NEW"); + + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), parentFriendlyId); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); + }); + + it("a cuid parent yields cuid (LEGACY) child friendlyIds", async () => { + const service = buildService(); + const parentFriendlyId = RunId.generate().friendlyId; // cuid (25) → LEGACY parent + expect(ownerEngine(RunId.fromFriendlyId(parentFriendlyId))).toBe("LEGACY"); + + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), parentFriendlyId); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); + + it("a ROOT batch (no parentRunId) mints by the env setting (cuid default here)", async () => { + const service = buildService(); + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), undefined); + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); + + // A root batch's children are anchored to the batch's friendlyId, NOT to a + // re-resolution of the per-org flag. Even with the env flag forced to "cuid" (a flip + // away from the batch's residency), a ksuid batch anchor yields ksuid children — so + // batch + children stay co-resident and TaskRun.batchId never crosses the seam. + it("a ksuid batch anchor yields ksuid children even when the env flag resolves cuid", async () => { + const service = buildService(); // resolveMintKind forced to "cuid" + const batchFriendlyId = BatchId.toFriendlyId(generateKsuidId()); // ksuid (NEW) batch + expect(ownerEngine(batchFriendlyId)).toBe("NEW"); + + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), batchFriendlyId); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(KSUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("NEW"); + }); + + // The cuid mirror: a cuid batch anchor yields cuid children even if the flag flipped ON. + it("a cuid batch anchor yields cuid children even when the env flag resolves ksuid", async () => { + const service = new BatchTriggerV3Service( + undefined, + undefined, + {} as any, + {} as any, + async () => "ksuid" // env flag flipped ON mid-batch + ); + const batchFriendlyId = BatchId.generate().friendlyId; // cuid (LEGACY) batch + expect(ownerEngine(batchFriendlyId)).toBe("LEGACY"); + + const childFriendlyId = await (service as any).mintChildFriendlyId(fakeEnv(), batchFriendlyId); + + expect(RunId.fromFriendlyId(childFriendlyId).length).toBe(CUID_LEN); + expect(ownerEngine(RunId.fromFriendlyId(childFriendlyId))).toBe("LEGACY"); + }); +}); diff --git a/apps/webapp/test/batchTriggerV3StoreRouting.test.ts b/apps/webapp/test/batchTriggerV3StoreRouting.test.ts new file mode 100644 index 00000000000..5e1f60d4de6 --- /dev/null +++ b/apps/webapp/test/batchTriggerV3StoreRouting.test.ts @@ -0,0 +1,252 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import { isUniqueConstraintError, type PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +vi.setConfig({ testTimeout: 60_000 }); + +// Proves BatchTriggerV3's three store seams (cached-run lookup, expired-key clear, +// membership write) route correctly against real PG14 (legacy) + PG17 (run-ops) +// containers, using the service's exact query shapes. The service methods are +// JS #-private, so the seam is driven directly — same approach as the sibling +// legacy-authority test. + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey?: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey ?? null, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +async function seedBatch(prisma: PrismaClient, runtimeEnvironmentId: string, suffix: string) { + const batchId = generateKsuidId(); + return prisma.batchTaskRun.create({ + data: { + id: batchId, + friendlyId: `batch_${suffix}_${batchId}`, + runtimeEnvironmentId, + }, + }); +} + +describe("BatchTriggerV3 · store-seam routing (cross-DB)", () => { + heteroPostgresTest( + "(A) cached-run reuse resolves via the legacy (PG14) authority; a PG17-only key is invisible", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "batch-cached" + ); + const newSide = await seedOrgProjectEnv(prisma17, "batch-cached-new"); + + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const key1 = "idem-batch-1"; + const key2 = "idem-batch-2"; + const freshKey = "idem-batch-fresh"; + + const run1 = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key1, + }); + const run2 = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key2, + }); + + // A row with one of the SAME keys lives only on PG17 (run-ops). The + // legacy-pinned read must NOT see it. + await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key1, + }); + + // The service's exact cached-run query shape, pinned to PG14. + const cachedRuns = await legacyStore.findRuns( + { + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: { in: [key1, key2, freshKey] }, + }, + select: { + friendlyId: true, + idempotencyKey: true, + idempotencyKeyExpiresAt: true, + }, + }, + prisma14 + ); + + // Exactly the 2 seeded rows; the fresh key matches nothing. + expect(cachedRuns).toHaveLength(2); + const friendlyIds = cachedRuns.map((r) => r.friendlyId).sort(); + expect(friendlyIds).toEqual([run1.friendlyId, run2.friendlyId].sort()); + // Each friendlyId distinct, exactly one row per seeded key. + expect(new Set(friendlyIds).size).toBe(2); + expect(cachedRuns.filter((r) => r.idempotencyKey === key1)).toHaveLength(1); + expect(cachedRuns.filter((r) => r.idempotencyKey === key2)).toHaveLength(1); + } + ); + + heteroPostgresTest( + "(B) expired-key clear is routed to the legacy (PG14) authority and does not touch PG17", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "batch-expired" + ); + const newSide = await seedOrgProjectEnv(prisma17, "batch-expired-new"); + + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const expiredKey = "idem-batch-expired"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: expiredKey, + idempotencyKeyExpiresAt: new Date(Date.now() - 60_000), + }); + + // A PG17 row with the same key, to prove the clear does not reach it. + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: expiredKey, + }); + + // The service's exact expired-key clear shape, pinned to PG14. + await legacyStore.clearIdempotencyKey({ byFriendlyIds: [legacyRun.friendlyId] }, prisma14); + + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + + // The PG17 row is untouched. + const untouched = await prisma17.taskRun.findFirst({ where: { id: newRun.id } }); + expect(untouched?.idempotencyKey).toBe(expiredKey); + } + ); + + heteroPostgresTest( + "(C) membership write lands on the run-ops (PG17) store; duplicate raises a unique-constraint error", + async ({ prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma17, + "batch-membership" + ); + + const runOpsStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const batch = await seedBatch(prisma17, runtimeEnvironment.id, "membership"); + const run = await seedRun(prisma17, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + }); + + await runOpsStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: run.id, + status: "PENDING", + }); + + const item = await prisma17.batchTaskRunItem.findFirst({ + where: { batchTaskRunId: batch.id, taskRunId: run.id }, + }); + expect(item).not.toBeNull(); + expect(item?.status).toBe("PENDING"); + + // Re-calling with the SAME pair raises a unique-constraint error at the + // store layer (the service's try/catch is what swallows it). + let caught: unknown; + try { + await runOpsStore.createBatchTaskRunItem({ + batchTaskRunId: batch.id, + taskRunId: run.id, + status: "PENDING", + }); + } catch (error) { + caught = error; + } + + expect(caught).toBeDefined(); + expect(isUniqueConstraintError(caught, ["batchTaskRunId", "taskRunId"])).toBe(true); + + // Still exactly one row. + const count = await prisma17.batchTaskRunItem.count({ + where: { batchTaskRunId: batch.id, taskRunId: run.id }, + }); + expect(count).toBe(1); + } + ); +}); diff --git a/apps/webapp/test/bulkActionV2ReadRouting.test.ts b/apps/webapp/test/bulkActionV2ReadRouting.test.ts new file mode 100644 index 00000000000..314a6df6ca2 --- /dev/null +++ b/apps/webapp/test/bulkActionV2ReadRouting.test.ts @@ -0,0 +1,205 @@ +// Service-level proof for bulk CANCEL/REPLAY member hydration across the run-ops seam. +// +// `BulkActionService.process()` builds its ClickHouse-backed RunsRepository internally and +// has no test seam to inject the member-id page, and driving it end-to-end would require a +// full ClickHouse replication stack just to make `listRunIds` return the seeded ids. The +// cross-DB hydration semantics — the DoD's core — are proven exhaustively at the adapter +// unit level (BulkActionV2.batchReadThrough.server.test.ts). Here we prove the SERVICE-level +// wiring by driving the exact closures `process()` passes to `hydrateRunsAcrossSeam` against +// REAL rows seeded on the two containers (PG14 legacy + PG17 new), so the PG14↔PG17 boundary +// is genuinely crossed and the full REPLAY row shape is exercised. We NEVER mock the DB. +import { heteroPostgresTest } from "@internal/testcontainers"; +import { describe, expect, vi } from "vitest"; +import type { PrismaClient } from "@trigger.dev/database"; +import type { PrismaReplicaClient } from "~/db.server"; +import { hydrateRunsAcrossSeam } from "~/v3/services/bulk/BulkActionV2.batchReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 27-char body → NEW residency (ksuid analog). 25-char body → LEGACY residency (cuid analog). +function newId(c: string) { + return "run_" + c.repeat(27); +} +function legacyId(c: string) { + return "run_" + c.repeat(25); +} + +// The exact closures BulkActionService.process() uses for each branch. +const cancelSelect = { + id: true, + engine: true, + friendlyId: true, + status: true, + createdAt: true, + completedAt: true, + taskEventStore: true, +} as const; + +function cancelReadNew(client: PrismaReplicaClient, ids: string[]) { + return client.taskRun.findMany({ where: { id: { in: ids } }, select: cancelSelect }); +} +function cancelReadLegacy(replica: PrismaReplicaClient, ids: string[]) { + return replica.taskRun.findMany({ where: { id: { in: ids } }, select: cancelSelect }); +} +function replayReadNew(client: PrismaReplicaClient, ids: string[]) { + return client.taskRun.findMany({ where: { id: { in: ids } } }); +} +function replayReadLegacy(replica: PrismaReplicaClient, ids: string[]) { + return replica.taskRun.findMany({ where: { id: { in: ids } } }); +} + +async function seedEnv(prisma: PrismaClient, slug: string) { + const user = await prisma.user.create({ + data: { email: `${slug}@test.com`, name: "t", authenticationMethod: "MAGIC_LINK" }, + }); + const organization = await prisma.organization.create({ + data: { + title: "Org", + slug: `org-${slug}`, + members: { create: { userId: user.id, role: "ADMIN" } }, + }, + }); + const project = await prisma.project.create({ + data: { + name: "Proj", + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `ext-${slug}`, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `api-${slug}`, + pkApiKey: `pk-${slug}`, + shortcode: `sc-${slug}`, + }, + }); + return { organization, project, environment }; +} + +async function seedRun( + prisma: PrismaClient, + ctx: { organization: { id: string }; project: { id: string }; environment: { id: string } }, + id: string +) { + await prisma.taskRun.create({ + data: { + id, + friendlyId: id, + taskIdentifier: "t", + status: "EXECUTING", + payload: JSON.stringify({}), + payloadType: "application/json", + traceId: id, + spanId: id, + queue: "main", + runtimeEnvironmentId: ctx.environment.id, + projectId: ctx.project.id, + organizationId: ctx.organization.id, + environmentType: "PRODUCTION", + engine: "V2", + }, + }); +} + +describe("BulkActionService member hydration across the seam (PG14 legacy + PG17 new)", () => { + heteroPostgresTest( + "CANCEL across both DBs hydrates every member; the NEW id never hits the legacy replica", + async ({ prisma14, prisma17 }) => { + const newRunId = newId("a"); + const legacyRunId = legacyId("b"); + + const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "cancel-new"); + const legacyCtx = await seedEnv(prisma14 as unknown as PrismaClient, "cancel-legacy"); + await seedRun(prisma17 as unknown as PrismaClient, newCtx, newRunId); + await seedRun(prisma14 as unknown as PrismaClient, legacyCtx, legacyRunId); + + const legacySpy = vi.fn((replica: PrismaReplicaClient, ids: string[]) => { + if (ids.includes(newRunId)) { + throw new Error("legacy replica must never be probed for a NEW-residency id"); + } + return cancelReadLegacy(replica, ids); + }); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [newRunId, legacyRunId], + readNew: cancelReadNew, + readLegacyReplica: legacySpy, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + }, + }); + + // Every member hydrated → every member reaches cancel (none dropped). + expect(runs.map((r) => r.id).sort()).toEqual([newRunId, legacyRunId].sort()); + expect(legacySpy.mock.calls[0][1]).toEqual([legacyRunId]); + } + ); + + heteroPostgresTest( + "REPLAY across both DBs hydrates every member as a FULL row", + async ({ prisma14, prisma17 }) => { + const newRunId = newId("c"); + const legacyRunId = legacyId("d"); + + const newCtx = await seedEnv(prisma17 as unknown as PrismaClient, "replay-new"); + const legacyCtx = await seedEnv(prisma14 as unknown as PrismaClient, "replay-legacy"); + await seedRun(prisma17 as unknown as PrismaClient, newCtx, newRunId); + await seedRun(prisma14 as unknown as PrismaClient, legacyCtx, legacyRunId); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [newRunId, legacyRunId], + readNew: replayReadNew, + readLegacyReplica: replayReadLegacy, + deps: { + splitEnabled: true, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + }, + }); + + expect(runs.map((r) => r.id).sort()).toEqual([newRunId, legacyRunId].sort()); + // Full row, not a select projection: a non-selected column is populated. + const newRow = runs.find((r) => r.id === newRunId)!; + const legacyRow = runs.find((r) => r.id === legacyRunId)!; + expect(newRow.runtimeEnvironmentId).toBe(newCtx.environment.id); + expect(legacyRow.runtimeEnvironmentId).toBe(legacyCtx.environment.id); + } + ); + + heteroPostgresTest( + "single-DB passthrough hydrates all members from one client; legacy never invoked", + async ({ prisma14, prisma17 }) => { + // In single-DB mode the service passes its _replica as newClient. Seed everything there. + const idA = newId("f"); + const idB = legacyId("g"); + const ctx = await seedEnv(prisma17 as unknown as PrismaClient, "passthrough"); + await seedRun(prisma17 as unknown as PrismaClient, ctx, idA); + await seedRun(prisma17 as unknown as PrismaClient, ctx, idB); + + const throwingLegacy = vi.fn(() => { + throw new Error("legacy replica must never run in single-DB mode"); + }); + + const runs = await hydrateRunsAcrossSeam({ + runIds: [idA, idB], + readNew: cancelReadNew, + readLegacyReplica: throwingLegacy as never, + deps: { + splitEnabled: false, + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + }, + }); + + expect(runs.map((r) => r.id).sort()).toEqual([idA, idB].sort()); + expect(throwingLegacy).not.toHaveBeenCalled(); + } + ); +}); diff --git a/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts new file mode 100644 index 00000000000..ea29821fd19 --- /dev/null +++ b/apps/webapp/test/cancelDevSessionRunsStoreRouting.test.ts @@ -0,0 +1,249 @@ +// Real PG14 (legacy) + PG17 (new) proof for the dev-session-cancel TaskRun read. +// The DB is never mocked: reads hit the two real containers. Only the pure +// splitEnabled boundary and recording client wrappers are injected. +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { CancelDevSessionRunsService } from "~/v3/services/cancelDevSessionRuns.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid body (length-disjoint from the 27-char KSUID) → LEGACY residency. +function generateLegacyCuid() { + const suffix = Array.from( + { length: 24 }, + () => "0123456789abcdefghijklmnopqrstuvwxyz"[Math.floor(Math.random() * 36)] + ).join(""); + return `c${suffix}`; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + ids: { id: string; friendlyId: string }, + env: { runtimeEnvironmentId: string; projectId: string; organizationId: string } +) { + return prisma.taskRun.create({ + data: { + id: ids.id, + friendlyId: ids.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: env.runtimeEnvironmentId, + projectId: env.projectId, + organizationId: env.organizationId, + environmentType: "DEVELOPMENT", + // V1 so the (best-effort, error-swallowed) cancel does not require the V2 engine; + // the unit under test is the READ resolution, not the cancel side effect. + engine: "V1", + status: "EXECUTING", + }, + }); +} + +// A read client whose taskRun.findFirst is recorded; throws if used after being marked +// forbidden, so we can prove a store was NEVER read. +function recording(client: PrismaClient, opts: { forbidden?: boolean } = {}) { + const calls: unknown[] = []; + const taskRun = { + findFirst: (args: unknown) => { + calls.push(args); + if (opts.forbidden) { + throw new Error("this store must never be read"); + } + return (client as unknown as PrismaReplicaClient).taskRun.findFirst(args as never); + }, + }; + return { handle: { ...client, taskRun } as unknown as PrismaReplicaClient, calls }; +} + +describe("CancelDevSessionRunsService store routing (hetero)", () => { + heteroPostgresTest( + "a NEW run (ksuid) resolves on the new store via read-through, by friendlyId and by id", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + expect(id.length).toBe(27); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma17, + "new" + ); + await seedRun( + prisma17, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + // by friendlyId + { + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + const service = new CancelDevSessionRunsService({ + prisma: prisma17, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + }, + }); + await service.call({ + runIds: [friendlyId], + cancelledAt: new Date(), + reason: "test", + }); + // ksuid → NEW: new store served the read, legacy never touched. + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + + // by internal id + { + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + const service = new CancelDevSessionRunsService({ + prisma: prisma17, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + }, + }); + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + }); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + } + ); + + heteroPostgresTest( + "an OLD in-retention run (cuid) resolves off the LEGACY replica, never a legacy primary", + async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + expect(id.length).toBe(25); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "legacy" + ); + await seedRun( + prisma14, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + const newClient = recording(prisma17); + const legacy = recording(prisma14); + const service = new CancelDevSessionRunsService({ + prisma: prisma14, + readThroughDeps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + }, + }); + + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + }); + + // NEW first (miss) → resolved off the LEGACY REPLICA handle (no primary handle exists). + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(1); + } + ); +}); + +describe("CancelDevSessionRunsService passthrough (single-DB)", () => { + postgresTest( + "with no read-through deps, the run is read from the single DB and session reads stay on it", + async ({ prisma }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv(prisma, "pt"); + await seedRun( + prisma, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + const session = await prisma.runtimeEnvironmentSession.create({ + data: { environmentId: runtimeEnvironment.id, ipAddress: "127.0.0.1" }, + }); + + // splitEnabled=false → single plain read against the one client; the session + // control-plane read runs on the same prisma. + const service = new CancelDevSessionRunsService({ + prisma, + replica: prisma, + readThroughDeps: { + splitEnabled: false, + newClient: prisma as unknown as PrismaReplicaClient, + }, + }); + + await service.call({ + runIds: [id], + cancelledAt: new Date(), + reason: "test", + cancelledSessionId: session.id, + }); + + // Run found + handed to cancel against the single DB; confirm the row is present. + const row = await prisma.taskRun.findFirst({ where: { id } }); + expect(row).not.toBeNull(); + expect(row?.friendlyId).toBe(friendlyId); + } + ); +}); diff --git a/apps/webapp/test/engine/streamBatchItems.test.ts b/apps/webapp/test/engine/streamBatchItems.test.ts index 48b0c3ccca1..038820ecbb8 100644 --- a/apps/webapp/test/engine/streamBatchItems.test.ts +++ b/apps/webapp/test/engine/streamBatchItems.test.ts @@ -15,6 +15,7 @@ vi.mock("~/services/platform.v3.server", async (importOriginal) => { }); import { RunEngine } from "@internal/run-engine"; +import { PostgresRunStore } from "@internal/run-store"; import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; // Per-test redis isolation: each test runs its own RunEngine whose background work outlives the test // body. NoClickhouse because this suite never touches ClickHouse - skips the worker-scoped boot+migrate. @@ -41,9 +42,6 @@ import { setTimeout as sleep } from "node:timers/promises"; vi.setConfig({ testTimeout: 120_000 }); describe("StreamBatchItemsService", () => { - /** - * Helper to create a batch directly in the database - */ async function createBatch( prisma: PrismaClient, environmentId: string, @@ -74,9 +72,6 @@ describe("StreamBatchItemsService", () => { return batch; } - /** - * Helper to create an async iterable from items - */ async function* itemsToAsyncIterable( items: Array<{ task: string; payload: string; index: number }> ) { @@ -85,9 +80,6 @@ describe("StreamBatchItemsService", () => { } } - /** - * Build N valid batch items. - */ function makeItems(count: number, taskId = "test-task") { return Array.from({ length: count }, (_, index) => ({ task: taskId, @@ -660,6 +652,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -784,6 +784,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -908,6 +916,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1033,6 +1049,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1245,6 +1269,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1376,6 +1408,14 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + // The batch find + seal updateMany now route through the engine's run-store + // (route-by-batch-id under the run-ops split), so the racing client must back the + // store the service reads through. + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1529,12 +1569,11 @@ describe("StreamBatchItemsService", () => { processingConcurrency: 10, }); - // Force the count-mismatch branch by leaving Redis at 0 items vs - // runCount=4. The pre-loop must see "initial" state (so it passes - // through to the loop), and the count-mismatch re-query must see - // "post-callback" state. Use a findFirst counter to flip the DB - // between those two reads, exactly matching the production timing - // where the callback fires while our loop is running. + // The pre-loop validate-find must see "initial" state (so it passes through to the + // loop), and the count-mismatch re-query must see "post-callback" state. Use a findFirst + // counter to flip the DB between those two reads, matching production timing where the + // callback fires while our loop runs. Both reads route through the engine's run-store + // (route-by-batch-id under the split), so the racing client backs the store. let findFirstCallCount = 0; const racingPrisma = { ...prisma, @@ -1543,10 +1582,6 @@ describe("StreamBatchItemsService", () => { findFirst: async (args: Parameters[0]) => { findFirstCallCount++; if (findFirstCallCount === 2) { - // The post-loop count-mismatch re-query: BatchQueue completed - // all items and the callback fired in the window before this - // read. Status stays PENDING (all runs created OK) but - // processingCompletedAt is now set. await prisma.batchTaskRun.update({ where: { id: batch.id }, data: { @@ -1562,6 +1597,11 @@ describe("StreamBatchItemsService", () => { }, } as unknown as PrismaClient; + engine.runStore = new PostgresRunStore({ + prisma: racingPrisma, + readOnlyPrisma: racingPrisma, + }); + const service = new StreamBatchItemsService({ prisma: racingPrisma, engine, @@ -1925,9 +1965,6 @@ describe("StreamBatchItemsService", () => { }); describe("createNdjsonParserStream", () => { - /** - * Helper to collect all items from a ReadableStream - */ async function collectStream(stream: ReadableStream): Promise { const results: T[] = []; for await (const item of streamToAsyncIterable(stream)) { @@ -1936,9 +1973,6 @@ describe("createNdjsonParserStream", () => { return results; } - /** - * Helper to create a ReadableStream from an array of Uint8Array chunks - */ function chunksToStream(chunks: Uint8Array[]): ReadableStream { let index = 0; return new ReadableStream({ diff --git a/apps/webapp/test/engine/triggerFailedTask.test.ts b/apps/webapp/test/engine/triggerFailedTask.test.ts new file mode 100644 index 00000000000..ab6951a5701 --- /dev/null +++ b/apps/webapp/test/engine/triggerFailedTask.test.ts @@ -0,0 +1,264 @@ +import { describe, expect } from "vitest"; + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/run-engine/tests"; +import { containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { RunId, classifyKind, generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { TriggerFailedTaskService } from "../../app/runEngine/services/triggerFailedTask.server"; +import { EventRepository } from "../../app/v3/eventRepository/eventRepository.server"; + +vi.setConfig?.({ testTimeout: 60_000 }); + +// Bind the service's trace-event writes to the testcontainer DB. Without this, +// call() resolves the repository via getEventRepository → global prisma, which +// points at a database that doesn't exist in CI. +function makeService(prisma: any, engine: RunEngine) { + return new TriggerFailedTaskService({ + prisma, + engine, + // Read the parent through the same store the engine wrote it to. + runStore: engine.runStore, + eventRepository: { + repository: new EventRepository(prisma, prisma, { + batchSize: 100, + batchInterval: 1000, + retentionInDays: 30, + partitioningEnabled: false, + }), + store: "taskEvent", + }, + }); +} + +function makeEngine(prisma: any, redisOptions: any) { + return new RunEngine({ + prisma, + worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +describe("TriggerFailedTaskService — failed run residency", () => { + containerTest( + "root failed run mints cuid when split is off (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const service = makeService(prisma, engine); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "root" }, + errorMessage: "boom", + }); + + expect(friendlyId).toBeTruthy(); + expect(classifyKind(friendlyId!)).toBe("cuid"); + + // The failed run write must land (persistence) with no parent linkage. + const persisted = await prisma.taskRun.findFirst({ where: { friendlyId: friendlyId! } }); + expect(persisted).not.toBeNull(); + expect(persisted!.status).toBe("SYSTEM_FAILURE"); + expect(persisted!.depth).toBe(0); + expect(persisted!.parentTaskRunId).toBeNull(); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a NEW (ksuid) parent mints ksuid (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = makeService(prisma, engine); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("ksuid"); + + // The failed run write must land (persistence) and link to the resolved parent. + const persisted = await prisma.taskRun.findFirst({ where: { friendlyId: friendlyId! } }); + expect(persisted).not.toBeNull(); + expect(persisted!.status).toBe("SYSTEM_FAILURE"); + + const parent = await prisma.taskRun.findFirst({ where: { friendlyId: parentFriendlyId } }); + expect(persisted!.parentTaskRunId).toBe(parent!.id); + expect(persisted!.depth).toBe(parent!.depth + 1); + expect(persisted!.rootTaskRunId).toBe(parent!.rootTaskRunId ?? parent!.id); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a LEGACY (cuid) parent mints cuid (call)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.generate().friendlyId; // cuid → LEGACY + expect(classifyKind(parentFriendlyId)).toBe("cuid"); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = makeService(prisma, engine); + + const friendlyId = await service.call({ + taskId: taskIdentifier, + environment, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "failed child of a NEW parent mints ksuid (callWithoutTraceEvents)", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + await engine.trigger( + { + friendlyId: parentFriendlyId, + environment, + taskIdentifier, + payload: "{}", + payloadType: "application/json", + traceId: "00000000000000000000000000000000", + spanId: "0000000000000000", + workerQueue: "main", + queue: `task/${taskIdentifier}`, + isTest: false, + tags: [], + } as any, + prisma + ); + + const service = makeService(prisma, engine); + + const friendlyId = await service.callWithoutTraceEvents({ + environmentId: environment.id, + environmentType: environment.type, + projectId: environment.projectId, + organizationId: environment.organizationId, + taskId: taskIdentifier, + payload: { test: "child" }, + errorMessage: "boom", + parentRunId: parentFriendlyId, + }); + + expect(classifyKind(friendlyId!)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "callWithoutTraceEvents returns null (best-effort) when the derived parent row is absent", + async ({ prisma, redisOptions }) => { + const engine = makeEngine(prisma, redisOptions); + const environment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "failed-residency-task"; + await setupBackgroundWorker(engine, environment, taskIdentifier); + + const service = makeService(prisma, engine); + + // A well-formed ksuid parent friendlyId that was NEVER triggered → no row. + // Exercises the missing-parent fallback in callWithoutTraceEvents. + const absentParentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + + const friendlyId = await service.callWithoutTraceEvents({ + environmentId: environment.id, + environmentType: environment.type, + projectId: environment.projectId, + organizationId: environment.organizationId, + taskId: taskIdentifier, + payload: { test: "absent-parent" }, + errorMessage: "boom", + parentRunId: absentParentFriendlyId, + }); + + // Fallback derives parentTaskRunId from an id with no row; the parentTaskRunId FK rejects the create, so the method returns null instead of throwing. + expect(friendlyId).toBeNull(); + const orphan = await prisma.taskRun.findFirst({ + where: { parentTaskRunId: RunId.fromFriendlyId(absentParentFriendlyId) }, + }); + expect(orphan).toBeNull(); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/engine/triggerTask.test.ts b/apps/webapp/test/engine/triggerTask.test.ts index 235c67637b3..ba1452abceb 100644 --- a/apps/webapp/test/engine/triggerTask.test.ts +++ b/apps/webapp/test/engine/triggerTask.test.ts @@ -1,11 +1,18 @@ import { describe, expect, vi } from "vitest"; -// Mock the db prisma client +// Mock the db prisma client. The run-ops handles are stubbed so the idempotency +// dedup import resolves; with split off (below) they are never used — the concern's +// constructor prisma is passed through to every store call. vi.mock("~/db.server", () => ({ prisma: {}, $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, })); +// Keep split off so resolveIdempotencyDedupClient returns the passed container client. +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + vi.mock("~/services/platform.v3.server", async (importOriginal) => { const actual = (await importOriginal()) as Record; return { @@ -19,6 +26,12 @@ import { setupAuthenticatedEnvironment, setupBackgroundWorker } from "@internal/ import { assertNonNullable, containerTest } from "@internal/testcontainers"; import { trace } from "@opentelemetry/api"; import type { IOPacket } from "@trigger.dev/core/v3"; +import { + RunId, + classifyKind, + generateInternalId, + generateKsuidId, +} from "@trigger.dev/core/v3/isomorphic"; import type { TaskRun } from "@trigger.dev/database"; import { Redis } from "ioredis"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; @@ -42,7 +55,7 @@ import { RunEngineTriggerTaskService } from "../../app/runEngine/services/trigge import { promiseWithResolvers } from "@trigger.dev/core"; import { setTimeout } from "node:timers/promises"; -vi.setConfig({ testTimeout: 60_000 }); // 60 seconds timeout +vi.setConfig({ testTimeout: 60_000 }); class MockPayloadProcessor implements PayloadProcessor { async process(request: TriggerTaskRequest): Promise { @@ -211,7 +224,6 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); const queuesManager = new DefaultQueueManager(prisma, engine); @@ -489,7 +501,6 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); const queuesManager = new DefaultQueueManager(prisma, engine); @@ -605,13 +616,12 @@ describe("RunEngineTriggerTaskService", () => { const taskIdentifier = "test-task"; - //create background worker await setupBackgroundWorker(engine, authenticatedEnvironment, [parentTask, taskIdentifier]); const parentRun1 = await engine.trigger( { number: 1, - friendlyId: "run_p1", + friendlyId: "run_cmqxvncxq0000kaulzpafkicv", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -642,7 +652,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun2 = await engine.trigger( { number: 2, - friendlyId: "run_p2", + friendlyId: "run_cmqxvncxr0001kauldv9mqa9z", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -1116,7 +1126,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun1 = await engine.trigger( { number: 1, - friendlyId: "run_p1", + friendlyId: "run_cmqxvncxq0000kaulzpafkicv", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -1146,7 +1156,7 @@ describe("RunEngineTriggerTaskService", () => { const parentRun2 = await engine.trigger( { number: 2, - friendlyId: "run_p2", + friendlyId: "run_cmqxvncxr0001kauldv9mqa9z", environment: authenticatedEnvironment, taskIdentifier: parentTask, payload: "{}", @@ -2276,3 +2286,164 @@ describe("DefaultQueueManager task metadata cache", () => { } ); }); + +describe("RunEngineTriggerTaskService — child run residency inheritance", () => { + // Helper: stand up an engine + service wired for a single (real) Postgres/Redis + // pair. Returns the service plus the authenticated environment and a registered + // task identifier. + async function setupResidencyService(prisma: any, redisOptions: any) { + const engine = new RunEngine({ + prisma, + worker: { redis: redisOptions, workers: 1, tasksPerWorker: 10, pollIntervalMs: 100 }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { + name: "small-1x" as const, + cpu: 0.5, + memory: 0.5, + centsPerMs: 0.0001, + }, + }, + baseCostInCents: 0.0005, + }, + tracer: trace.getTracer("test", "0.0.0"), + }); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + const taskIdentifier = "residency-task"; + await setupBackgroundWorker(engine, authenticatedEnvironment, taskIdentifier); + + const queuesManager = new DefaultQueueManager(prisma, engine); + const idempotencyKeyConcern = new IdempotencyKeyConcern( + prisma, + engine, + new MockTraceEventConcern() + ); + + const triggerTaskService = new RunEngineTriggerTaskService({ + engine, + prisma, + payloadProcessor: new MockPayloadProcessor(), + queueConcern: queuesManager, + idempotencyKeyConcern, + validator: new MockTriggerTaskValidator(), + traceEventConcern: new MockTraceEventConcern(), + tracer: trace.getTracer("test", "0.0.0"), + metadataMaximumSize: 1024 * 1024 * 1, + }); + + return { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService }; + } + + containerTest( + "root run mints by the env flag (cuid when split is off)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "root" } }, + }); + + expect(result?.run.friendlyId).toBeDefined(); + // Split disabled in CI ⇒ flag resolves "cuid". + expect(classifyKind(result!.run.friendlyId)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "child of a LEGACY (cuid) parent is minted cuid (born LEGACY)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + // Root parent — cuid in CI (split off). + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + }); + expect(classifyKind(parent!.run.friendlyId)).toBe("cuid"); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "child" }, options: { parentRunId: parent!.run.friendlyId } }, + }); + + expect(classifyKind(child!.run.friendlyId)).toBe("cuid"); + + await engine.quit(); + } + ); + + containerTest( + "child of a NEW (ksuid) parent is minted ksuid (born NEW)", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + // Construct a NEW-resident parent directly by minting a ksuid friendlyId + // and creating its run row, so the child inherits NEW by id-shape alone + // (no marker needed). We trigger the parent with an explicit ksuid id via + // the runFriendlyId option so the row physically exists for the parent + // lookup the child path performs. + const parentFriendlyId = RunId.toFriendlyId( + // 27-char ksuid → classifies NEW + (await import("@trigger.dev/core/v3/isomorphic")).generateKsuidId() + ); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + + const parent = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "parent" } }, + options: { runFriendlyId: parentFriendlyId }, + }); + expect(parent!.run.friendlyId).toBe(parentFriendlyId); + + const child = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "child" }, options: { parentRunId: parentFriendlyId } }, + }); + + expect(classifyKind(child!.run.friendlyId)).toBe("ksuid"); + + await engine.quit(); + } + ); + + containerTest( + "caller-supplied runFriendlyId wins verbatim and skips residency inheritance", + async ({ prisma, redisOptions }) => { + const { engine, authenticatedEnvironment, taskIdentifier, triggerTaskService } = + await setupResidencyService(prisma, redisOptions); + + // Explicit cuid id for the run, and a ksuid/NEW parent id. + const explicitFriendlyId = RunId.toFriendlyId(generateInternalId()); + const parentFriendlyId = RunId.toFriendlyId(generateKsuidId()); + expect(classifyKind(explicitFriendlyId)).toBe("cuid"); + expect(classifyKind(parentFriendlyId)).toBe("ksuid"); + + const result = await triggerTaskService.call({ + taskId: taskIdentifier, + environment: authenticatedEnvironment, + body: { payload: { test: "explicit" }, options: { parentRunId: parentFriendlyId } }, + options: { runFriendlyId: explicitFriendlyId }, + }); + + // Caller-supplied id wins verbatim — NOT re-minted to ksuid despite the NEW parent. + expect(result!.run.friendlyId).toBe(explicitFriendlyId); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/idempotencyDedupResidency.test.ts b/apps/webapp/test/idempotencyDedupResidency.test.ts new file mode 100644 index 00000000000..d8ab8d934cb --- /dev/null +++ b/apps/webapp/test/idempotencyDedupResidency.test.ts @@ -0,0 +1,184 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub so the runStore singleton doesn't eagerly connect at import. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, +})); +// Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import type { TriggerTaskRequest } from "~/runEngine/types"; + +vi.setConfig({ testTimeout: 60_000 }); + +function makeConcern(client: PrismaClient) { + return new IdempotencyKeyConcern(client as never, {} as never, {} as never); +} + +function makeRequest(opts: { + environmentId: string; + organizationId: string; + projectId: string; + taskId: string; + idempotencyKey: string; +}): TriggerTaskRequest { + return { + taskId: opts.taskId, + environment: { + id: opts.environmentId, + organizationId: opts.organizationId, + projectId: opts.projectId, + organization: { featureFlags: {} }, + }, + options: {}, + body: { options: { idempotencyKey: opts.idempotencyKey } }, + } as unknown as TriggerTaskRequest; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("IdempotencyKeyConcern · residency-routed dedup (cross-DB)", () => { + heteroPostgresTest( + "a would-be-new run resolves its key against the new (PG17) DB, not the legacy (PG14) DB", + async ({ prisma14, prisma17 }) => { + // Same env shape on both DBs. + const legacy = await seedOrgProjectEnv(prisma14, "resid-legacy"); + const next = await seedOrgProjectEnv(prisma17, "resid-new"); + + const key = "idem-resid-1"; + + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: next.runtimeEnvironment.id, + projectId: next.project.id, + organizationId: next.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + }); + + const concernOnNew = makeConcern(prisma17); + const hit = await concernOnNew.handleTriggerRequest( + makeRequest({ + environmentId: next.runtimeEnvironment.id, + organizationId: next.organization.id, + projectId: next.project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + expect(hit.isCached).toBe(true); + if (hit.isCached === true) { + expect(hit.run.id).toBe(newRun.id); + } + + // The legacy DB holds no row for this key — a legacy-pinned probe would miss it. + const legacyMatches = await prisma14.taskRun.count({ + where: { + runtimeEnvironmentId: legacy.runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: key, + }, + }); + expect(legacyMatches).toBe(0); + } + ); + + heteroPostgresTest( + "a would-be-legacy run still resolves its key against the legacy (PG14) DB", + async ({ prisma14 }) => { + const legacy = await seedOrgProjectEnv(prisma14, "resid-legacy-only"); + const key = "idem-resid-legacy"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: legacy.runtimeEnvironment.id, + projectId: legacy.project.id, + organizationId: legacy.organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + }); + + const concernOnLegacy = makeConcern(prisma14); + const hit = await concernOnLegacy.handleTriggerRequest( + makeRequest({ + environmentId: legacy.runtimeEnvironment.id, + organizationId: legacy.organization.id, + projectId: legacy.project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + expect(hit.isCached).toBe(true); + if (hit.isCached === true) { + expect(hit.run.id).toBe(legacyRun.id); + } + } + ); +}); diff --git a/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts new file mode 100644 index 00000000000..5434567d42e --- /dev/null +++ b/apps/webapp/test/idempotencyKeyConcernLegacyAuthority.test.ts @@ -0,0 +1,306 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub `~/db.server` so the `runStore` singleton doesn't eagerly connect at +// import. The concern passes its constructor `prisma` arg as the explicit +// client/tx to every store call, so the singleton's bound handles are never +// exercised — the passed client runs the query. Mirrors the shipped +// `mollifierClaimResolution` test: env-wiring mock only; the DB under test is +// the real PG14 + PG17 hetero-fixture containers. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, +})); +// Keep split off so resolveIdempotencyDedupClient returns this.prisma (the hetero fixture client). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ isSplitEnabled: async () => false })); + +import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; +import type { TriggerTaskRequest } from "~/runEngine/types"; + +vi.setConfig({ testTimeout: 60_000 }); + +// The constructor `prisma` arg is the client the four store sites execute +// against. With the run-ops split off (mocked above) the dedup resolver is a +// pass-through that returns this same client, so constructing with the PG14 or +// PG17 fixture client decides which DB the residency-routed dedup probe reads. +function makeConcern(client: PrismaClient) { + return new IdempotencyKeyConcern( + client as never, + {} as never, // engine — unused on the reuse / clear paths + {} as never // traceEventConcern — unused on the reuse / clear paths + ); +} + +function makeRequest(opts: { + environmentId: string; + organizationId: string; + projectId: string; + taskId: string; + idempotencyKey: string; +}): TriggerTaskRequest { + return { + taskId: opts.taskId, + environment: { + id: opts.environmentId, + organizationId: opts.organizationId, + projectId: opts.projectId, + // Leave the org mollifier flag unset so the pre-gate claim path is + // skipped — this test exercises the PG existing-run lookup + clear, + // not the Redis claim. (resolveOrgMollifierFlag returns falsy for an + // org with no mollifier flag, so claimEligible is false.) + organization: { featureFlags: {} }, + }, + options: {}, + body: { options: { idempotencyKey: opts.idempotencyKey } }, + } as unknown as TriggerTaskRequest; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("IdempotencyKeyConcern · residency-routed dedup (cross-DB)", () => { + heteroPostgresTest( + "resolves a legacy-resident key against the legacy DB; a key whose run lives on the new DB is resolved against the new DB", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "legacy-auth" + ); + + // Seed the same org/project/env shape on the NEW (PG17) DB so we can + // place a row there for a *different* key — proving the legacy-pinned + // read does not see it. + const newSide = await seedOrgProjectEnv(prisma17, "new-side"); + + const reusedKey = "idem-reuse-1"; + + // The authoritative existing run lives on the LEGACY (PG14) DB. + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: reusedKey, + status: "EXECUTING", + }); + + // A row for a DIFFERENT key lives only on the NEW (PG17) DB. + const newOnlyKey = "idem-new-only"; + await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier: "my-task", + idempotencyKey: newOnlyKey, + status: "EXECUTING", + }); + + const concern = makeConcern(prisma14); + + // (1) Reuse with the legacy key resolves the legacy-seeded run. + const reuse = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: reusedKey, + }), + undefined + ); + expect(reuse.isCached).toBe(true); + if (reuse.isCached === true) { + expect(reuse.run.id).toBe(legacyRun.id); + } + + // Exactly one run matches the key on the legacy DB — no duplicate. + const legacyMatches = await prisma14.taskRun.count({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier: "my-task", + idempotencyKey: reusedKey, + }, + }); + expect(legacyMatches).toBe(1); + + // (2) A key whose run lives on the new DB is resolved against the new DB. + const concernOnNew = makeConcern(prisma17); + const newSideHit = await concernOnNew.handleTriggerRequest( + makeRequest({ + environmentId: newSide.runtimeEnvironment.id, + organizationId: newSide.organization.id, + projectId: newSide.project.id, + taskId: "my-task", + idempotencyKey: newOnlyKey, + }), + undefined + ); + expect(newSideHit.isCached).toBe(true); + if (newSideHit.isCached === true) { + expect(newSideHit.run.idempotencyKey).toBe(newOnlyKey); + } + + // (3) An unknown key on the legacy env does not wrongly return the + // stale legacy hit for a different key. + const unknown = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: "idem-never-seen", + }), + undefined + ); + expect(unknown.isCached).toBe(false); + } + ); + + heteroPostgresTest( + "cleared-status reuse clears the key on the legacy (PG14) DB and proceeds with a fresh trigger", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "cleared-status" + ); + + const key = "idem-cleared-1"; + + // Existing run is in a failed (cleared) status — the concern must + // clear its key against the legacy authority and return isCached:false. + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "COMPLETED_WITH_ERRORS", + }); + + const concern = makeConcern(prisma14); + + const result = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + + // A fresh trigger proceeds (not cached). + expect(result.isCached).toBe(false); + + // The clear executed against the legacy (PG14) DB: re-query PG14 and + // assert the key + its expiry are now null on the seeded run. + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + expect(cleared?.idempotencyKeyExpiresAt).toBeNull(); + } + ); + + heteroPostgresTest( + "expired idempotency key is cleared on the legacy (PG14) DB and a fresh trigger proceeds", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "expired-key" + ); + + const key = "idem-expired-1"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier: "my-task", + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() - 60_000), // already expired + }); + + const concern = makeConcern(prisma14); + + const result = await concern.handleTriggerRequest( + makeRequest({ + environmentId: runtimeEnvironment.id, + organizationId: organization.id, + projectId: project.id, + taskId: "my-task", + idempotencyKey: key, + }), + undefined + ); + + expect(result.isCached).toBe(false); + + const cleared = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(cleared?.idempotencyKey).toBeNull(); + expect(cleared?.idempotencyKeyExpiresAt).toBeNull(); + } + ); +}); diff --git a/apps/webapp/test/mollifierClaimResolution.test.ts b/apps/webapp/test/mollifierClaimResolution.test.ts index 7a2a0c1e546..c35c24c1c84 100644 --- a/apps/webapp/test/mollifierClaimResolution.test.ts +++ b/apps/webapp/test/mollifierClaimResolution.test.ts @@ -3,8 +3,17 @@ import { describe, expect, it, vi } from "vitest"; // Stub `~/db.server` before importing the concern — the real module // eagerly calls `prisma.$connect()` at singleton construction, which // would fail without a database. The concern under test receives its -// prisma via the constructor, so the stub is never used by the code path. -vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); +// prisma via the constructor, so these empty stubs are never used by the +// tested path; the run-ops singletons only satisfy the concern's static +// imports (vitest validates every named import against the mock). +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, + runOpsNewPrisma: {}, + runOpsLegacyPrisma: {}, + runOpsNewReplica: {}, + runOpsLegacyReplica: {}, +})); // The IdempotencyKeyConcern resolves the pre-gate claim through the // global mollifier buffer (`getMollifierBuffer`), shared by both @@ -22,6 +31,13 @@ vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ vi.mock("~/v3/mollifier/mollifierGate.server", () => ({ makeResolveMollifierFlag: () => async () => h.orgFlag, })); +// Pin the idempotency dedup routing to the injected fake prisma: split OFF +// makes resolveIdempotencyDedupClient return the concern's constructor client, +// so these tests exercise claim resolution deterministically regardless of the +// ambient RUN_OPS_SPLIT_ENABLED (the split path routes to the empty runOps mocks). +vi.mock("~/v3/runOpsMigration/splitMode.server", () => ({ + isSplitEnabled: async () => false, +})); import type { MollifierBuffer } from "@trigger.dev/redis-worker"; import { IdempotencyKeyConcern } from "~/runEngine/concerns/idempotencyKeys.server"; diff --git a/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts new file mode 100644 index 00000000000..707a3546cf8 --- /dev/null +++ b/apps/webapp/test/performTaskRunAlertsStoreRouting.test.ts @@ -0,0 +1,355 @@ +// Real heterogeneous legacy + new Postgres proof for the alert-hydration TaskRun read. +// The DB is never mocked. A test-only RunStore wraps two real PostgresRunStore +// instances and routes findRun by id residency (ksuid → NEW, cuid → LEGACY), +// mirroring the sibling routing suite. The ProjectAlertChannel read must stay control-plane. +// +// The alert env-type read (parentEnvironment?.type ?? type) is resolved via the app +// ControlPlaneResolver over a control-plane client DISTINCT from the run-ops store, proving the +// cross-provider inversion. The prior version co-located env + run and masked it. +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { ReadClient, RunStore } from "@internal/run-store"; +import type { Prisma, PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect } from "vitest"; +import { ControlPlaneCache } from "~/v3/runOpsMigration/controlPlaneCache.server"; +import { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { PerformTaskRunAlertsService } from "~/v3/services/alerts/performTaskRunAlerts.server"; + +function buildControlPlaneResolver(controlPlane: PrismaClient) { + return new ControlPlaneResolver({ + controlPlanePrimary: controlPlane, + controlPlaneReplica: controlPlane, + cache: new ControlPlaneCache({ ttlMs: 60_000, maxEntries: 100 }), + // Split OFF: plain control-plane query every call, byte-identical to the inline join. + splitEnabled: () => false, + }); +} + +vi.setConfig({ testTimeout: 60_000 }); + +// Test-only routing store: resolve findRun by id length (27 → NEW, else LEGACY), +// dropping any forwarded client so each inner store uses its OWN prisma. NOT a mock — +// real DB I/O against two PostgresRunStore instances. +class RoutingRunStore implements RunStore { + readonly #newStore: PostgresRunStore; + readonly #legacyStore: PostgresRunStore; + + constructor(newStore: PostgresRunStore, legacyStore: PostgresRunStore) { + this.#newStore = newStore; + this.#legacyStore = legacyStore; + } + + #resolveById(runId: string): PostgresRunStore { + return runId.length === 27 ? this.#newStore : this.#legacyStore; + } + + #idFromWhere(where: Prisma.TaskRunWhereInput): string | undefined { + const id = (where as { id?: unknown }).id; + return typeof id === "string" ? id : undefined; + } + + async findRun( + where: Prisma.TaskRunWhereInput, + argsOrClient?: { select?: Prisma.TaskRunSelect; include?: Prisma.TaskRunInclude } | ReadClient, + _client?: ReadClient + ): Promise { + const id = this.#idFromWhere(where); + if (id !== undefined) { + return (this.#resolveById(id).findRun as any)(where, argsOrClient); + } + const fromNew = await (this.#newStore.findRun as any)(where, argsOrClient); + return fromNew ?? (this.#legacyStore.findRun as any)(where, argsOrClient); + } + + // The remaining RunStore methods are not exercised here; delegate to NEW to satisfy + // the interface. + findRunOrThrow(...a: any[]): any { + return (this.#newStore.findRunOrThrow as any)(...a); + } + findRuns(...a: any[]): any { + return (this.#newStore.findRuns as any)(...a); + } + createRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createRun(p, tx); + } + createCancelledRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createCancelledRun(p, tx); + } + createFailedRun(p: any, tx?: any): any { + return this.#resolveById(p.data.id).createFailedRun(p, tx); + } + updateMetadata(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).updateMetadata as any)(...[runId, ...a]); + } + startAttempt(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).startAttempt as any)(runId, ...a); + } + completeAttemptSuccess(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).completeAttemptSuccess as any)(runId, ...a); + } + recordRetryOutcome(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).recordRetryOutcome as any)(runId, ...a); + } + requeueRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).requeueRun as any)(runId, ...a); + } + recordBulkActionMembership(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).recordBulkActionMembership as any)(runId, ...a); + } + cancelRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).cancelRun as any)(runId, ...a); + } + failRunPermanently(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).failRunPermanently as any)(runId, ...a); + } + expireRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).expireRun as any)(runId, ...a); + } + expireRunsBatch(runIds: string[], ...a: any[]): any { + return (this.#resolveById(runIds[0] ?? "").expireRunsBatch as any)(runIds, ...a); + } + lockRunToWorker(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).lockRunToWorker as any)(runId, ...a); + } + parkPendingVersion(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).parkPendingVersion as any)(runId, ...a); + } + promotePendingVersionRuns(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).promotePendingVersionRuns as any)(runId, ...a); + } + suspendForCheckpoint(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).suspendForCheckpoint as any)(runId, ...a); + } + resumeFromCheckpoint(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).resumeFromCheckpoint as any)(runId, ...a); + } + rescheduleRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).rescheduleRun as any)(runId, ...a); + } + enqueueDelayedRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).enqueueDelayedRun as any)(runId, ...a); + } + rewriteDebouncedRun(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).rewriteDebouncedRun as any)(runId, ...a); + } + clearIdempotencyKey(params: any, tx?: any): any { + const runId = params?.byId?.runId ?? ""; + return this.#resolveById(runId).clearIdempotencyKey(params, tx); + } + pushTags(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).pushTags as any)(runId, ...a); + } + pushRealtimeStream(runId: string, ...a: any[]): any { + return (this.#resolveById(runId).pushRealtimeStream as any)(runId, ...a); + } +} + +function buildRoutingStore(prisma17: PrismaClient, prisma14: PrismaClient) { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + return new RoutingRunStore(newStore, legacyStore); +} + +async function seedProject(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + ids: { id: string; friendlyId: string }, + env: { runtimeEnvironmentId: string; projectId: string; organizationId: string } +) { + return prisma.taskRun.create({ + data: { + id: ids.id, + friendlyId: ids.friendlyId, + taskIdentifier: "my-task", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: env.runtimeEnvironmentId, + projectId: env.projectId, + organizationId: env.organizationId, + environmentType: "PRODUCTION", + engine: "V2", + status: "COMPLETED_SUCCESSFULLY", + }, + }); +} + +const TASK_RUN_CROSS_SEAM_FKS = [ + "TaskRun_runtimeEnvironmentId_fkey", + "TaskRun_projectId_fkey", + "TaskRun_organizationId_fkey", +] as const; + +async function dropTaskRunCrossSeamFks(prisma: PrismaClient) { + for (const constraint of TASK_RUN_CROSS_SEAM_FKS) { + await prisma.$executeRawUnsafe( + `ALTER TABLE "TaskRun" DROP CONSTRAINT IF EXISTS "${constraint}"` + ); + } +} + +describe("PerformTaskRunAlertsService store routing (hetero)", () => { + heteroPostgresTest( + "env type resolves via the control-plane resolver (distinct DB) while the run resolves on the run-ops store", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + // Cloud shape: run-ops = the new DB (cross-seam FKs dropped), control-plane = the legacy DB. + // The control-plane ProjectAlert -> run-ops TaskRun FK is also dropped on the control-plane DB. + await dropTaskRunCrossSeamFks(prisma17); + await prisma14.$executeRawUnsafe( + `ALTER TABLE "ProjectAlert" DROP CONSTRAINT IF EXISTS "ProjectAlert_taskRunId_fkey"` + ); + + // Org/project/env + a PARENT env + the alert channel are control-plane → the control-plane DB. + const { project, organization } = await seedProject(prisma14, "cp"); + // A branch env whose parent type drives the channel filter (parentEnvironmentType ?? type). + const parentEnv = await prisma14.runtimeEnvironment.create({ + data: { + slug: "cp-parent", + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: "cp-parent", + pkApiKey: "cp-parent", + shortcode: "cp-parent", + }, + }); + const childEnv = await prisma14.runtimeEnvironment.create({ + data: { + slug: "cp-child", + type: "PREVIEW", + projectId: project.id, + organizationId: organization.id, + apiKey: "cp-child", + pkApiKey: "cp-child", + shortcode: "cp-child", + parentEnvironmentId: parentEnv.id, + }, + }); + + // The run-ops scalar row lives on the run-ops DB, referencing the child (preview) env on the control-plane DB. + await seedRun( + prisma17, + { id, friendlyId }, + { + runtimeEnvironmentId: childEnv.id, + projectId: project.id, + organizationId: organization.id, + } + ); + + // A channel scoped to the PARENT env's type (PRODUCTION). It matches only if the service + // computes parentEnvironmentType ?? type — i.e. the parent's PRODUCTION, not the run env's + // PREVIEW. This proves the resolver's parentEnvironmentType is honoured. + await prisma14.projectAlertChannel.create({ + data: { + friendlyId: `alert_${id}`, + name: "test-channel", + projectId: project.id, + alertTypes: ["TASK_RUN"], + environmentTypes: ["PRODUCTION"], + type: "EMAIL", + properties: { type: "EMAIL", email: "test@example.com" }, + enabled: true, + }, + }); + + // prisma (control-plane channel read) = the control-plane DB; the run-ops read is routed to + // the run-ops DB; the env type is resolved via the resolver over the control-plane client. + const service = new PerformTaskRunAlertsService({ + prisma: prisma14, + runStore: buildRoutingStore(prisma17, prisma14), + controlPlaneResolver: buildControlPlaneResolver(prisma14), + }); + + // The downstream DeliverAlertService.enqueue hits redis (absent here); the projectAlert row + // is created before that, so tolerate the enqueue rejection. + await service.call(id).catch(() => {}); + + // The channel matched on the PARENT env type → a DeliverAlert row was created on the control-plane DB. + const delivered = await prisma14.projectAlert.findMany({ where: { projectId: project.id } }); + expect(delivered.length).toBe(1); + + // Inversion: the run-ops DB holds NO env rows; a co-located join would resolve null. + expect(await prisma17.runtimeEnvironment.count()).toBe(0); + // The run-ops store has the run; the control-plane DB never received it. + expect(await prisma14.taskRun.findFirst({ where: { id } })).toBeNull(); + } + ); +}); + +describe("PerformTaskRunAlertsService passthrough (single-DB)", () => { + postgresTest( + "with the default store, run read + alert-channel read both resolve on the single DB", + async ({ prisma }) => { + const id = generateKsuidId(); + const friendlyId = `run_${id}`; + + const { project, organization, runtimeEnvironment } = await seedProject(prisma, "pt"); + await seedRun( + prisma, + { id, friendlyId }, + { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + } + ); + await prisma.projectAlertChannel.create({ + data: { + friendlyId: `alert_${id}`, + name: "test-channel", + projectId: project.id, + alertTypes: ["TASK_RUN"], + environmentTypes: ["PRODUCTION"], + type: "EMAIL", + properties: { type: "EMAIL", email: "test@example.com" }, + enabled: true, + }, + }); + + const service = new PerformTaskRunAlertsService({ + prisma, + // The single-DB default store: a passthrough PostgresRunStore over the one + // container. Injected explicitly so the read resolves on the container the run + // was seeded into, not the ambient module singleton. + runStore: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + controlPlaneResolver: buildControlPlaneResolver(prisma), + }); + await service.call(id).catch(() => {}); + + const delivered = await prisma.projectAlert.findMany({ where: { projectId: project.id } }); + expect(delivered.length).toBe(1); + } + ); +}); diff --git a/apps/webapp/test/realtime/runReaderReadThrough.test.ts b/apps/webapp/test/realtime/runReaderReadThrough.test.ts new file mode 100644 index 00000000000..88c4e11e27c --- /dev/null +++ b/apps/webapp/test/realtime/runReaderReadThrough.test.ts @@ -0,0 +1,532 @@ +import { heteroPostgresTest, postgresTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { ReadClient, RunStore } from "@internal/run-store"; +import { ownerEngine, type Residency } from "@trigger.dev/core/v3/isomorphic"; +import type { Prisma, PrismaClient } from "@trigger.dev/database"; +import { describe, expect, vi } from "vitest"; +import { RunHydrator } from "~/services/realtime/runReader.server"; + +// Realtime read-route proof for the RunHydrator. +// +// On origin/main the realtime RunHydrator's two run-ops reads already flow through the runStore +// seam: `hydrateByIds` -> `runStore.findRuns(..., replica)` and `#fetch` -> `runStore.findRun(..., +// replica)`. The split-aware routing (new-DB-first, legacy READ REPLICA only for ids not +// known-migrated) is the store's job below the seam, so this file proves the hydrator *inherits* +// that routing — plus that the single-flight + short-TTL cache and the skipColumns projection +// (which live in the hydrator, not the store) are unaffected by the seam. +// +// The heterogeneous fixture gives real legacy + new Postgres containers; NO DB is mocked. The ONLY +// non-DB fake is the residency selector that the routing-shaped store uses (`ownerEngine`: ksuid -> +// NEW, cuid -> LEGACY), exactly the substrate the RoutingRunStore ships. Run ids are 25 chars (cuid +// -> LEGACY) or 27 chars (ksuid -> NEW) so the classifier routes them deterministically. + +// 25-char internal id -> cuid -> LEGACY; 27-char internal id -> ksuid -> NEW. The +// classifier strips a leading `_`, so these ids must carry NO underscore (a bare +// alphanumeric body of the exact length). +function newId(label: string): string { + return ("k" + label.replace(/[^a-z0-9]/gi, "")).padEnd(27, "0").slice(0, 27); +} +function legacyId(label: string): string { + return ("c" + label.replace(/[^a-z0-9]/gi, "")).padEnd(25, "0").slice(0, 25); +} + +async function seedEnvironment(prisma: PrismaClient, slugSuffix: string) { + const organization = await prisma.organization.create({ + data: { title: `Org ${slugSuffix}`, slug: `org-${slugSuffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `Project ${slugSuffix}`, + slug: `project-${slugSuffix}`, + externalRef: `proj_${slugSuffix}`, + organizationId: organization.id, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slugSuffix}`, + pkApiKey: `pk_dev_${slugSuffix}`, + shortcode: `short_${slugSuffix}`, + }, + }); + return { organization, project, environment }; +} + +async function seedRun( + prisma: PrismaClient, + params: { + runId: string; + organizationId: string; + projectId: string; + runtimeEnvironmentId: string; + payload?: string; + output?: string | null; + metadata?: string | null; + runTags?: string[]; + error?: Prisma.InputJsonValue; + } +) { + await prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: "PENDING", + friendlyId: `run_friendly_${params.runId.slice(0, 8)}`, + runtimeEnvironmentId: params.runtimeEnvironmentId, + environmentType: "DEVELOPMENT", + organizationId: params.organizationId, + projectId: params.projectId, + taskIdentifier: "my-task", + payload: params.payload ?? '{"hello":"world"}', + payloadType: "application/json", + ...(params.output !== undefined && { output: params.output }), + outputType: "application/json", + ...(params.metadata !== undefined && { metadata: params.metadata }), + ...(params.error !== undefined && { error: params.error }), + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + runTags: params.runTags ?? ["alpha", "beta"], + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + }, + }); +} + +/** + * A routing-shaped RunStore: routes the single-run `findRun` by residency (the exact substrate + * the RoutingRunStore ships) and fans `findRuns` out across NEW + LEGACY, merging by id + * (the union/dedup the routing store owns; this hydrator inherits it). For not-known-migrated ids + * the read falls back to the LEGACY slot — which is wired over a READ REPLICA handle, never a + * writer. Only `findRun`/`findRuns` (the two reads this unit exercises) are implemented; the rest + * throw so any accidental call surfaces. The only non-DB fake here is the residency selector. + * + * By design the router ignores the explicit read `client` and reads off the selected slot's OWN + * configured replica, so the hydrator's `replica` arg is dropped here. + */ +function makeRoutingShapedStore(options: { + newStore: PostgresRunStore; + legacyStore: PostgresRunStore; + classify?: (id: string) => Residency; +}): RunStore { + const classify = options.classify ?? ownerEngine; + const route = (id: string | undefined): PostgresRunStore => { + if (typeof id !== "string") return options.legacyStore; + try { + return classify(id) === "NEW" ? options.newStore : options.legacyStore; + } catch { + // Not known-migrated / unclassifiable -> fall back to the LEGACY read replica only. + return options.legacyStore; + } + }; + + const idFromWhere = (where: Prisma.TaskRunWhereInput): string | undefined => { + const id = where.id; + if (typeof id === "string") return id; + if (id && typeof id === "object" && "equals" in id && typeof id.equals === "string") { + return id.equals; + } + return undefined; + }; + + const handler: ProxyHandler = { + get(_target, prop) { + if (prop === "findRun") { + // Drop the explicit `client`: the selected slot reads off its OWN replica. + return (where: Prisma.TaskRunWhereInput, args: unknown, _client?: ReadClient) => + (route(idFromWhere(where)).findRun as (...rest: unknown[]) => Promise)( + where, + args + ); + } + if (prop === "findRuns") { + return async ( + args: { where: Prisma.TaskRunWhereInput; select: Prisma.TaskRunSelect }, + _client?: ReadClient + ) => { + // Fan out across both slots (each on its OWN replica) and merge by id (the routing + // store's union/dedup contract). + const [fromNew, fromLegacy] = await Promise.all([ + options.newStore.findRuns(args as never), + options.legacyStore.findRuns(args as never), + ]); + const byId = new Map>(); + for (const row of [...fromLegacy, ...fromNew] as Record[]) { + byId.set(row.id as string, row); + } + return [...byId.values()]; + }; + } + throw new Error(`routing-shaped store: ${String(prop)} not implemented in test`); + }, + }; + + return new Proxy({} as RunStore, handler); +} + +describe("RunHydrator read-route through the runStore seam (legacy + new)", () => { + // Realtime hydrate pulls run-ops rows from the run-ops replica. A split hydrate returns the + // union of NEW + LEGACY-replica rows, byte-identical to source, via both + // getRunById and hydrateByIds. + heteroPostgresTest( + "split hydrate returns the NEW + legacy-replica union, byte-identical", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "u14"); + const seed17 = await seedEnvironment(prisma17, "u17"); + // Both seed envs use the SAME runtimeEnvironmentId so the env-scoped `where` matches across + // the two physical DBs (each env row is local to its DB but carries the same id). + const envId = seed17.environment.id; + await prisma14.runtimeEnvironment.update({ + where: { id: seed14.environment.id }, + data: { id: envId }, + }); + + const newRunId = newId("union_new"); + const legacyRunId = legacyId("union_old"); + + await seedRun(prisma17, { + runId: newRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + payload: '{"side":"new"}', + output: '{"result":42}', + metadata: '{"m":1}', + runTags: ["new", "z"], + error: { type: "BUILT_IN_ERROR", name: "Boom", message: "new-side" }, + }); + await seedRun(prisma14, { + runId: legacyRunId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"side":"legacy"}', + output: null, + metadata: null, + runTags: ["legacy", "a"], + error: { type: "STRING_ERROR", raw: "legacy-side" }, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const rows = await hydrator.hydrateByIds(envId, [newRunId, legacyRunId]); + expect(rows.map((r) => r.id).sort()).toEqual([legacyRunId, newRunId].sort()); + + const newRow = rows.find((r) => r.id === newRunId)!; + const legacyRow = rows.find((r) => r.id === legacyRunId)!; + + // Byte-identical to source incl. JSON columns, runTags, error JSON. + expect(newRow.payload).toBe('{"side":"new"}'); + expect(newRow.output).toBe('{"result":42}'); + expect(newRow.metadata).toBe('{"m":1}'); + expect(newRow.runTags).toEqual(["new", "z"]); + expect(newRow.error).toEqual({ type: "BUILT_IN_ERROR", name: "Boom", message: "new-side" }); + + expect(legacyRow.payload).toBe('{"side":"legacy"}'); + expect(legacyRow.output).toBeNull(); + expect(legacyRow.metadata).toBeNull(); + expect(legacyRow.runTags).toEqual(["legacy", "a"]); + expect(legacyRow.error).toEqual({ type: "STRING_ERROR", raw: "legacy-side" }); + + // getRunById resolves each individual run from its correct source through the seam. + const newById = await hydrator.getRunById(envId, newRunId); + const legacyById = await hydrator.getRunById(envId, legacyRunId); + expect(newById?.payload).toBe('{"side":"new"}'); + expect(legacyById?.payload).toBe('{"side":"legacy"}'); + } + ); + + // A known-migrated (NEW-residency) run is NOT re-probed on the legacy replica. + heteroPostgresTest( + "known-migrated run is never probed on the legacy slot", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "k17"); + const envId = seed17.environment.id; + const migratedRunId = newId("known_mig"); + await seedRun(prisma17, { + runId: migratedRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const row = await hydrator.getRunById(envId, migratedRunId); + expect(row?.id).toBe(migratedRunId); + // The NEW-residency id resolved against the NEW slot only — the legacy probe never ran. + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); + + // An old in-retention run is served from the LEGACY read replica (never a writer/primary path). + heteroPostgresTest( + "old in-retention run served from the legacy replica slot", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + // The LEGACY slot exposes only a read/replica handle: `prisma14` is wired as BOTH prisma and + // readOnlyPrisma, and the hydrator passes it as the explicit read client — there is no + // legacy-writer read path on the read route (the replica-only invariant is structural in the + // store; asserted here as inheritance). + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "o14"); + const envId = seed14.environment.id; + const oldRunId = legacyId("old_run"); + await seedRun(prisma14, { + runId: oldRunId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"era":"old"}', + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore }); + + const byId = await hydrator.getRunById(envId, oldRunId); + expect(byId?.payload).toBe('{"era":"old"}'); + + const [hydrated] = await hydrator.hydrateByIds(envId, [oldRunId]); + expect(hydrated.payload).toBe('{"era":"old"}'); + } + ); + + // Terminal-metadata read-seam: a NEW-resident (ksuid) run's final metadata is hydrated through + // the owning (NEW) store, not off a generic legacy replica. Asserts read-seam ROUTING for the + // terminal read; it is not a hard ordering/consistency guarantee about when the terminal marker + // and the row's terminal columns converge. + heteroPostgresTest( + "terminal hydrate reads a NEW-resident run's final metadata through the owning store", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "term17"); + const envId = seed17.environment.id; + const terminalRunId = newId("terminal_run"); + + // A terminal run with its final metadata persisted on the NEW store only. + await seedRun(prisma17, { + runId: terminalRunId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + output: '{"result":"final"}', + metadata: '{"done":true}', + }); + + // A generic legacy replica would miss the NEW row entirely — the metadata must come off NEW. + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 0 }); + + const snapshot = await hydrator.getRunById(envId, terminalRunId); + expect(snapshot?.id).toBe(terminalRunId); + expect(snapshot?.metadata).toBe('{"done":true}'); + expect(snapshot?.output).toBe('{"result":"final"}'); + // The NEW-residency terminal read never touched the legacy slot. + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); + + // A live-migrated run continues streaming across the seam crossing with no gap. + heteroPostgresTest( + "live-migrated run continues streaming across the seam crossing", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + + const seed14 = await seedEnvironment(prisma14, "m14"); + const seed17 = await seedEnvironment(prisma17, "m17"); + const envId = seed17.environment.id; + await prisma14.runtimeEnvironment.update({ + where: { id: seed14.environment.id }, + data: { id: envId }, + }); + + // The run starts life on LEGACY; the residency selector classifies it NEW once it migrates. + // We model the migration by seeding the same run id on LEGACY first, then on NEW, while + // flipping the classifier from LEGACY to NEW for that id at the seam crossing. + const runId = legacyId("migrating"); + await seedRun(prisma14, { + runId, + organizationId: seed14.organization.id, + projectId: seed14.project.id, + runtimeEnvironmentId: envId, + payload: '{"home":"legacy"}', + }); + + let migrated = false; + const classify = (id: string): Residency => + id === runId && migrated ? "NEW" : ownerEngine(id); + const legacyFindRunSpy = vi.spyOn(legacyStore, "findRun"); + + // Use a 0ms TTL so each getRunById re-reads through the seam (no cached stale row across the + // crossing). Single-flight/TTL are proven separately below. + const runStore = makeRoutingShapedStore({ newStore, legacyStore, classify }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 0 }); + + // Before migration: served from LEGACY. + const before = await hydrator.getRunById(envId, runId); + expect(before?.payload).toBe('{"home":"legacy"}'); + expect(legacyFindRunSpy).toHaveBeenCalled(); + + // Migrate: the run now lives on NEW and the classifier routes it NEW. + await seedRun(prisma17, { + runId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + payload: '{"home":"new"}', + }); + migrated = true; + legacyFindRunSpy.mockClear(); + + // After migration: served from NEW, with no gap and no legacy re-probe. + const after = await hydrator.getRunById(envId, runId); + expect(after?.payload).toBe('{"home":"new"}'); + expect(after?.id).toBe(runId); + expect(legacyFindRunSpy).not.toHaveBeenCalled(); + } + ); +}); + +describe("RunHydrator single-flight + TTL cache intact across the seam", () => { + // The cache/single-flight live in the hydrator, independent of the storage seam. Proven in + // SPLIT mode here (a counting wrapper over the selected underlying store's read). + heteroPostgresTest( + "split mode: two concurrent getRunById -> one underlying read; repeat within TTL is cached", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const newFindRunSpy = vi.spyOn(newStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "s17"); + const envId = seed17.environment.id; + const runId = newId("cached_run"); + await seedRun(prisma17, { + runId, + organizationId: seed17.organization.id, + projectId: seed17.project.id, + runtimeEnvironmentId: envId, + }); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 60_000 }); + + // Two concurrent calls -> single-flight collapses to ONE underlying read. + const [a, b] = await Promise.all([ + hydrator.getRunById(envId, runId), + hydrator.getRunById(envId, runId), + ]); + expect(a?.id).toBe(runId); + expect(b?.id).toBe(runId); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + + // A third call within the TTL returns the cached value with no new read. + const c = await hydrator.getRunById(envId, runId); + expect(c?.id).toBe(runId); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + } + ); + + // A cached `null` (missing run) is a valid not-found hit and is not re-read within the TTL. + heteroPostgresTest( + "split mode: a cached null (missing run) is not re-read within the TTL", + { timeout: 60_000 }, + async ({ prisma14, prisma17 }) => { + const newStore = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + const legacyStore = new PostgresRunStore({ prisma: prisma14, readOnlyPrisma: prisma14 }); + const newFindRunSpy = vi.spyOn(newStore, "findRun"); + + const seed17 = await seedEnvironment(prisma17, "n17"); + const envId = seed17.environment.id; + const missingRunId = newId("missing_run"); + + const runStore = makeRoutingShapedStore({ newStore, legacyStore }); + const hydrator = new RunHydrator({ replica: prisma14, runStore, cacheTtlMs: 60_000 }); + + const first = await hydrator.getRunById(envId, missingRunId); + expect(first).toBeNull(); + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + + const second = await hydrator.getRunById(envId, missingRunId); + expect(second).toBeNull(); + // Still one read — the null was cached as a valid "not found" hit. + expect(newFindRunSpy).toHaveBeenCalledTimes(1); + } + ); +}); + +describe("RunHydrator single-DB passthrough (one PostgresRunStore over one client)", () => { + // Passthrough: in single-DB the store is one PostgresRunStore over one client; the hydrator + // behaves byte-for-byte as today. No split branch, no legacy slot, no second connection. + postgresTest( + "single store: getRunById + hydrateByIds read from the one client, cache intact", + { timeout: 60_000 }, + async ({ prisma }) => { + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const findRunSpy = vi.spyOn(store, "findRun"); + + const seed = await seedEnvironment(prisma, "sd1"); + const envId = seed.environment.id; + const runIdA = newId("single_a"); + const runIdB = legacyId("single_b"); + for (const runId of [runIdA, runIdB]) { + await seedRun(prisma, { + runId, + organizationId: seed.organization.id, + projectId: seed.project.id, + runtimeEnvironmentId: envId, + payload: `{"id":"${runId}"}`, + }); + } + + const hydrator = new RunHydrator({ replica: prisma, runStore: store, cacheTtlMs: 60_000 }); + + // hydrateByIds returns both rows from the single client. + const rows = await hydrator.hydrateByIds(envId, [runIdA, runIdB]); + expect(rows.map((r) => r.id).sort()).toEqual([runIdA, runIdB].sort()); + + // getRunById hydrates from the single store; the cache short-circuits a repeat read. + const a1 = await hydrator.getRunById(envId, runIdA); + const a2 = await hydrator.getRunById(envId, runIdA); + expect(a1?.payload).toBe(`{"id":"${runIdA}"}`); + expect(a2?.payload).toBe(`{"id":"${runIdA}"}`); + expect(findRunSpy).toHaveBeenCalledTimes(1); + } + ); + + // Empty id-set short-circuits with no store call. + postgresTest("empty id-set returns [] without touching the store", async ({ prisma }) => { + const store = new PostgresRunStore({ prisma, readOnlyPrisma: prisma }); + const findRunsSpy = vi.spyOn(store, "findRuns"); + const hydrator = new RunHydrator({ replica: prisma, runStore: store }); + + const rows = await hydrator.hydrateByIds("env_none", []); + expect(rows).toEqual([]); + expect(findRunsSpy).not.toHaveBeenCalled(); + }); +}); diff --git a/apps/webapp/test/realtime/streamRegistrationRouting.test.ts b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts new file mode 100644 index 00000000000..6a4f5960f80 --- /dev/null +++ b/apps/webapp/test/realtime/streamRegistrationRouting.test.ts @@ -0,0 +1,240 @@ +import { heteroPostgresTest, redisTest } from "@internal/testcontainers"; +import { PostgresRunStore } from "@internal/run-store"; +import type { PrismaClient } from "@trigger.dev/database"; +import Redis from "ioredis"; +import { describe, expect } from "vitest"; +import { RedisRealtimeStreams } from "~/services/realtime/redisRealtimeStreams.server.js"; + +// Seeds organization -> project -> runtimeEnvironment -> taskRun on the given prisma client. +// Mirrors the route's target run: a V2 run with an (optionally completed) lifecycle and an +// initially-empty realtimeStreams array. +async function seedRun( + prisma: PrismaClient, + params: { + runId: string; + slugSuffix: string; + completedAt?: Date; + } +) { + const organization = await prisma.organization.create({ + data: { + title: "Test Organization", + slug: `test-organization-${params.slugSuffix}`, + }, + }); + + const project = await prisma.project.create({ + data: { + name: "Test Project", + slug: `test-project-${params.slugSuffix}`, + externalRef: `proj_${params.slugSuffix}`, + organizationId: organization.id, + }, + }); + + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "DEVELOPMENT", + slug: "dev", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_apikey_${params.slugSuffix}`, + pkApiKey: `pk_dev_apikey_${params.slugSuffix}`, + shortcode: `short_code_${params.slugSuffix}`, + }, + }); + + await prisma.taskRun.create({ + data: { + id: params.runId, + engine: "V2", + status: "PENDING", + friendlyId: `run_friendly_${params.slugSuffix}`, + runtimeEnvironmentId: environment.id, + environmentType: "DEVELOPMENT", + organizationId: organization.id, + projectId: project.id, + taskIdentifier: "my-task", + payload: "{}", + payloadType: "application/json", + traceContext: {}, + traceId: `trace_${params.runId}`, + spanId: `span_${params.runId}`, + queue: "task/my-task", + isTest: false, + taskEventStore: "taskEvent", + depth: 0, + ...(params.completedAt !== undefined && { completedAt: params.completedAt }), + }, + }); + + return { organization, project, environment }; +} + +// The exact routed sequence performed by realtime.v1.streams.$runId.$target.$streamId(.append) PUT: +// read the target via the store, then push the streamId iff it is not already present and the run +// is not completed. Driving this against the store is the routed seam (no engine instance required). +async function routedRegisterStream( + store: PostgresRunStore, + client: PrismaClient, + runId: string, + streamId: string +): Promise<{ pushed: boolean }> { + const target = await store.findRun( + { id: runId }, + { + select: { + id: true, + realtimeStreams: true, + realtimeStreamsVersion: true, + completedAt: true, + }, + }, + client + ); + + if (!target) { + throw new Error("Run not found"); + } + + // Completed-run guard (route returns 400 here). + if (target.completedAt) { + return { pushed: false }; + } + + if (!target.realtimeStreams.includes(streamId)) { + await store.pushRealtimeStream(target.id, streamId, client); + return { pushed: true }; + } + + return { pushed: false }; +} + +describe("realtime stream registration — run-ops store routed writes", () => { + heteroPostgresTest( + "push routes to run-ops store for a run on the new DB", + { timeout: 60_000 }, + async ({ prisma17, prisma14 }) => { + // The run-ops store owns the PG17 (new) DB. + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_new_db"; + await seedRun(prisma17, { runId, slugSuffix: "push17" }); + + const streamId = "stream-abc"; + const result = await routedRegisterStream(store, prisma17, runId, streamId); + + expect(result.pushed).toBe(true); + + // Write landed on the new (PG17) DB. + const onNewDb = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(onNewDb?.realtimeStreams).toContain(streamId); + + // Write is isolated to the new DB — the legacy (PG14) DB carries no run with that streamId. + const onLegacyDb = await prisma14.taskRun.findFirst({ + where: { realtimeStreams: { has: streamId } }, + select: { id: true }, + }); + expect(onLegacyDb).toBeNull(); + } + ); + + heteroPostgresTest( + "idempotent — already-registered streamId issues no second write", + { timeout: 60_000 }, + async ({ prisma17 }) => { + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_idempotent"; + await seedRun(prisma17, { runId, slugSuffix: "idem17" }); + + const streamId = "stream-once"; + + const first = await routedRegisterStream(store, prisma17, runId, streamId); + expect(first.pushed).toBe(true); + + const second = await routedRegisterStream(store, prisma17, runId, streamId); + // The includes() guard skipped the second push. + expect(second.pushed).toBe(false); + + const row = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + // Exactly one entry — no duplicate appended. + expect(row?.realtimeStreams).toEqual([streamId]); + expect(row?.realtimeStreams).toHaveLength(1); + } + ); + + heteroPostgresTest( + "completed run guard issues no push", + { timeout: 60_000 }, + async ({ prisma17 }) => { + const store = new PostgresRunStore({ prisma: prisma17, readOnlyPrisma: prisma17 }); + + const runId = "run_routed_push_completed"; + await seedRun(prisma17, { + runId, + slugSuffix: "completed17", + completedAt: new Date("2026-06-01T00:00:00.000Z"), + }); + + const streamId = "stream-late"; + const result = await routedRegisterStream(store, prisma17, runId, streamId); + + // The completedAt guard blocks the push (route returns 400). + expect(result.pushed).toBe(false); + + const row = await prisma17.taskRun.findFirst({ + where: { id: runId }, + select: { realtimeStreams: true }, + }); + expect(row?.realtimeStreams).toEqual([]); + } + ); + + redisTest( + "chunks flow — stream attaches and chunks are ingested", + { timeout: 30_000 }, + async ({ redisOptions }) => { + const redis = new Redis(redisOptions); + const streams = new RedisRealtimeStreams({ redis: redisOptions }); + + const runId = "run_chunks_flow"; + const streamId = "registered-stream"; + + const chunks = [ + JSON.stringify({ chunk: 0, data: "chunk 0" }), + JSON.stringify({ chunk: 1, data: "chunk 1" }), + JSON.stringify({ chunk: 2, data: "chunk 2" }), + ]; + + const encoder = new TextEncoder(); + const stream = new ReadableStream({ + start(controller) { + for (const chunk of chunks) { + controller.enqueue(encoder.encode(chunk + "\n")); + } + controller.close(); + }, + }); + + const response = await streams.ingestData(stream, runId, streamId, "default"); + expect(response.status).toBe(200); + + const streamKey = `stream:${runId}:${streamId}`; + const entries = await redis.xrange(streamKey, "-", "+"); + expect(entries.length).toBe(3); + + const lastChunkIndex = await streams.getLastChunkIndex(runId, streamId, "default"); + expect(lastChunkIndex).toBe(2); + + await redis.del(streamKey); + await redis.quit(); + } + ); +}); diff --git a/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts new file mode 100644 index 00000000000..04c442ca187 --- /dev/null +++ b/apps/webapp/test/resetIdempotencyKeyLegacyAuthority.test.ts @@ -0,0 +1,223 @@ +import { heteroPostgresTest } from "@internal/testcontainers"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; + +// Stub these so the default singletons don't eagerly connect at import. The +// reset service passes its `_prisma` arg as the explicit tx to every store +// call, so the singleton handles are never exercised — the passed PG14 client +// runs the query. The DB under test is the real PG14 + PG17 hetero fixture. +vi.mock("~/db.server", () => ({ prisma: {}, $replica: {} })); +vi.mock("~/v3/runEngine.server", () => ({ engine: {} })); + +// With `getMollifierBuffer()` returning null the PG clear path runs cleanly +// (no Redis surface). The buffer path is out of scope for this unit. +const bufferMock: { current: unknown } = { current: null }; +vi.mock("~/v3/mollifier/mollifierBuffer.server", () => ({ + getMollifierBuffer: () => bufferMock.current, +})); + +import { PostgresRunStore } from "@internal/run-store"; +import { ResetIdempotencyKeyService } from "~/v3/services/resetIdempotencyKey.server"; +import type { AuthenticatedEnvironment } from "~/services/apiAuth.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +function makeService(legacyPrisma: PrismaClient) { + const legacyStore = new PostgresRunStore({ + prisma: legacyPrisma, + readOnlyPrisma: legacyPrisma, + }); + return new ResetIdempotencyKeyService(legacyPrisma as never, legacyPrisma as never, legacyStore); +} + +function makeEnv(opts: { id: string; organizationId: string }): AuthenticatedEnvironment { + return { + id: opts.id, + organizationId: opts.organizationId, + } as unknown as AuthenticatedEnvironment; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `test-${suffix}`, + pkApiKey: `test-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, runtimeEnvironment }; +} + +async function seedRun( + prisma: PrismaClient, + args: { + runtimeEnvironmentId: string; + projectId: string; + organizationId: string; + taskIdentifier: string; + idempotencyKey: string; + status?: "PENDING" | "EXECUTING" | "COMPLETED_SUCCESSFULLY" | "COMPLETED_WITH_ERRORS"; + idempotencyKeyExpiresAt?: Date; + } +) { + const runId = generateKsuidId(); + return prisma.taskRun.create({ + data: { + id: runId, + friendlyId: `run_${runId}`, + taskIdentifier: args.taskIdentifier, + idempotencyKey: args.idempotencyKey, + idempotencyKeyExpiresAt: args.idempotencyKeyExpiresAt ?? null, + status: args.status ?? "EXECUTING", + payload: JSON.stringify({ foo: "bar" }), + payloadType: "application/json", + traceId: "1234", + spanId: "1234", + queue: "test", + runtimeEnvironmentId: args.runtimeEnvironmentId, + projectId: args.projectId, + organizationId: args.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +describe("ResetIdempotencyKeyService · legacy-authority pin (cross-DB)", () => { + heteroPostgresTest( + "clears the key on the legacy (PG14) authority only; a PG17-only same-key row is untouched, and reuse-after-reset finds no row", + async ({ prisma14, prisma17 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "reset-legacy" + ); + const newSide = await seedOrgProjectEnv(prisma17, "reset-new-side"); + + const key = "idem-reset-1"; + const taskIdentifier = "my-task"; + + const legacyRun = await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() + 60_000), + }); + + // Same (task, key) tuple planted ONLY on PG17 — a legacy-pinned reset + // must not leak to it. + const newRun = await seedRun(prisma17, { + runtimeEnvironmentId: newSide.runtimeEnvironment.id, + projectId: newSide.project.id, + organizationId: newSide.organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + idempotencyKeyExpiresAt: new Date(Date.now() + 60_000), + }); + + const service = makeService(prisma14); + + const result = await service.call( + key, + taskIdentifier, + makeEnv({ id: runtimeEnvironment.id, organizationId: organization.id }) + ); + + expect(result).toEqual({ id: key }); + + // Cleared on legacy; run otherwise intact (not deleted). + const clearedLegacy = await prisma14.taskRun.findFirst({ where: { id: legacyRun.id } }); + expect(clearedLegacy).not.toBeNull(); + expect(clearedLegacy?.idempotencyKey).toBeNull(); + expect(clearedLegacy?.idempotencyKeyExpiresAt).toBeNull(); + expect(clearedLegacy?.id).toBe(legacyRun.id); + expect(clearedLegacy?.status).toBe("EXECUTING"); + + // PG17-only row untouched — no leak to the wrong DB. + const untouchedNew = await prisma17.taskRun.findFirst({ where: { id: newRun.id } }); + expect(untouchedNew?.idempotencyKey).toBe(key); + expect(untouchedNew?.idempotencyKeyExpiresAt).not.toBeNull(); + + // Reuse-after-reset: no row resolves on legacy → a fresh run would mint. + const reusable = await prisma14.taskRun.findFirst({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier, + idempotencyKey: key, + }, + }); + expect(reusable).toBeNull(); + } + ); + + heteroPostgresTest( + "handoff re-check (totalCount === 0 branch) clears a row that materialises on the legacy (PG14) authority after the initial clear", + async ({ prisma14 }) => { + const { project, organization, runtimeEnvironment } = await seedOrgProjectEnv( + prisma14, + "reset-handoff" + ); + + const key = "idem-handoff-1"; + const taskIdentifier = "my-task"; + + // Model the PG↔buffer race: initial clear sees no row (count 0), buffer + // reports no cleared run (totalCount 0), then the run materialises on + // legacy mid-call (drainer's engine.trigger) before the handoff re-check. + bufferMock.current = { + resetIdempotency: vi.fn(async () => { + await seedRun(prisma14, { + runtimeEnvironmentId: runtimeEnvironment.id, + projectId: project.id, + organizationId: organization.id, + taskIdentifier, + idempotencyKey: key, + status: "EXECUTING", + }); + return { clearedRunId: null as string | null }; + }), + }; + + const service = makeService(prisma14); + + const result = await service.call( + key, + taskIdentifier, + makeEnv({ id: runtimeEnvironment.id, organizationId: organization.id }) + ); + + // Handoff re-check cleared the materialised row on legacy → success. + expect(result).toEqual({ id: key }); + + const reusable = await prisma14.taskRun.findFirst({ + where: { + runtimeEnvironmentId: runtimeEnvironment.id, + taskIdentifier, + idempotencyKey: key, + }, + }); + expect(reusable).toBeNull(); + + bufferMock.current = null; + } + ); +}); diff --git a/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts new file mode 100644 index 00000000000..3b3ec417895 --- /dev/null +++ b/apps/webapp/test/resolveWaitpointThroughReadThrough.readthrough.test.ts @@ -0,0 +1,245 @@ +import { heteroRunOpsPostgresTest, postgresTest } from "@internal/testcontainers"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import { describe, expect, vi } from "vitest"; +import type { PrismaReplicaClient } from "~/db.server"; +import { resolveWaitpointThroughReadThrough } from "~/runEngine/concerns/resolveWaitpointThroughReadThrough.server"; + +vi.setConfig({ testTimeout: 60_000 }); + +// 25-char cuid (length-disjoint from the 27-char KSUID) -> LEGACY residency. +function generateLegacyCuid() { + const suffix = Array.from( + { length: 24 }, + () => "0123456789abcdefghijklmnopqrstuvwxyz"[Math.floor(Math.random() * 36)] + ).join(""); + return `c${suffix}`; +} + +function recording(client: PrismaClient | RunOpsPrismaClient, opts: { forbidden?: boolean } = {}) { + const calls: unknown[] = []; + const waitpoint = { + findFirst: (args: unknown) => { + calls.push(args); + if (opts.forbidden) { + throw new Error("this store must never be read"); + } + return (client as unknown as PrismaReplicaClient).waitpoint.findFirst(args as never); + }, + }; + return { handle: { ...client, waitpoint } as unknown as PrismaReplicaClient, calls }; +} + +async function seedOrgProjectEnv(prisma: PrismaClient, suffix: string) { + const organization = await prisma.organization.create({ + data: { title: `test-${suffix}`, slug: `test-${suffix}` }, + }); + const project = await prisma.project.create({ + data: { + name: `test-${suffix}`, + slug: `test-${suffix}`, + organizationId: organization.id, + externalRef: `test-${suffix}`, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + slug: `test-${suffix}`, + type: "PRODUCTION", + projectId: project.id, + organizationId: organization.id, + apiKey: `apikey-${suffix}`, + pkApiKey: `pk-${suffix}`, + shortcode: `test-${suffix}`, + }, + }); + return { organization, project, environment }; +} + +async function seedWaitpoint( + prisma: PrismaClient | RunOpsPrismaClient, + id: string, + env: { id: string; projectId: string } +) { + return prisma.waitpoint.create({ + data: { + id, + friendlyId: `waitpoint_${id}`, + type: "MANUAL", + status: "PENDING", + idempotencyKey: `idem-${id}`, + userProvidedIdempotencyKey: false, + projectId: env.projectId, + environmentId: env.id, + }, + }); +} + +const read = (waitpointId: string, environmentId: string) => (client: PrismaReplicaClient) => + client.waitpoint.findFirst({ + where: { id: waitpointId, environmentId }, + select: { id: true, status: true, projectId: true, environmentId: true }, + }); + +describe("resolveWaitpointThroughReadThrough (hetero PG14 legacy + dedicated run-ops PG17)", () => { + heteroRunOpsPostgresTest( + "ksuid waitpoint resolves on the dedicated run-ops client; legacy replica never touched", + async ({ prisma17, prisma14 }) => { + const id = generateKsuidId(); + expect(id.length).toBe(27); + + // The dedicated run-ops DB has no control-plane tables; the waitpoint's + // environment/project FKs are synthetic scalar ids. + const environmentId = generateKsuidId(); + const projectId = generateKsuidId(); + const seeded = await seedWaitpoint(prisma17, id, { id: environmentId, projectId }); + + const newClient = recording(prisma17); + const legacy = recording(prisma14, { forbidden: true }); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + deps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(result!.projectId).toBe(projectId); + expect(result!.environmentId).toBe(environmentId); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + ); + + heteroRunOpsPostgresTest( + "cuid waitpoint resolves off the LEGACY replica (new probed first, miss)", + async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + expect(id.length).toBe(25); + + const { project, environment } = await seedOrgProjectEnv(prisma14, "legacy"); + const seeded = await seedWaitpoint(prisma14, id, { + id: environment.id, + projectId: project.id, + }); + + const newClient = recording(prisma17); + const legacy = recording(prisma14); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: true, + newClient: newClient.handle, + legacyReplica: legacy.handle, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(newClient.calls.length).toBe(1); + expect(legacy.calls.length).toBe(1); + } + ); + + heteroRunOpsPostgresTest( + "bare caller (no deps) resolves a NEW-resident waitpoint via the safe run-ops defaults", + async ({ prisma17, prisma14 }) => { + // The bare wait route passes NO `deps`; the `defaults` DI seam models old vs new + // fallback against containers, avoiding the real db.server topology. + const id = generateKsuidId(); + expect(id.length).toBe(27); + const environmentId = generateKsuidId(); + const projectId = generateKsuidId(); + const seeded = await seedWaitpoint(prisma17, id, { id: environmentId, projectId }); + + // FAIL-BEFORE: old default pinned newClient to control-plane ($replica ≈ prisma14) → miss. + const oldDefaultResult = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + defaults: { + newClient: prisma14 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + splitEnabled: true, + }, + }); + expect(oldDefaultResult).toBeNull(); + + // PASS-AFTER: safe default routes newClient to the run-ops replica (runOpsNewReplica ≈ prisma17). + const safeDefaultResult = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId, + read: read(id, environmentId), + defaults: { + newClient: prisma17 as unknown as PrismaReplicaClient, + legacyReplica: prisma14 as unknown as PrismaReplicaClient, + splitEnabled: true, + }, + }); + + expect(safeDefaultResult).not.toBeNull(); + expect(safeDefaultResult!.id).toBe(seeded.id); + expect(safeDefaultResult!.projectId).toBe(projectId); + expect(safeDefaultResult!.environmentId).toBe(environmentId); + } + ); + + heteroRunOpsPostgresTest("not-found maps to null (no throw)", async ({ prisma17, prisma14 }) => { + const id = generateLegacyCuid(); + const { environment } = await seedOrgProjectEnv(prisma14, "nf"); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: true, + newClient: recording(prisma17).handle, + legacyReplica: recording(prisma14).handle, + }, + }); + + expect(result).toBeNull(); + }); + + postgresTest( + "passthrough (single-DB): one plain read; legacy never invoked", + async ({ prisma }) => { + const id = generateKsuidId(); + const { project, environment } = await seedOrgProjectEnv(prisma, "pt"); + const seeded = await seedWaitpoint(prisma, id, { + id: environment.id, + projectId: project.id, + }); + + const single = recording(prisma); + const legacy = recording(prisma, { forbidden: true }); + + const result = await resolveWaitpointThroughReadThrough({ + waitpointId: id, + environmentId: environment.id, + read: read(id, environment.id), + deps: { + splitEnabled: false, + newClient: single.handle, + legacyReplica: legacy.handle, + }, + }); + + expect(result).not.toBeNull(); + expect(result!.id).toBe(seeded.id); + expect(single.calls.length).toBe(1); + expect(legacy.calls.length).toBe(0); + } + ); +}); diff --git a/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts b/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts new file mode 100644 index 00000000000..0e0f0841f20 --- /dev/null +++ b/apps/webapp/test/runEngineBatchTriggerStoreRouting.test.ts @@ -0,0 +1,172 @@ +import { describe, expect, vi } from "vitest"; + +// Redirect the module-level db client to the per-test container prisma so the worker-path +// env resolution (`findEnvironmentById`/`controlPlaneResolver`, which read `~/db.server`) +// hits the real container DB. The DB itself is never mocked — only the module binding is +// pointed at the container client created by the fixture. +const dbHolder = vi.hoisted(() => ({ prisma: undefined as any })); +vi.mock("~/db.server", () => ({ + get prisma() { + return dbHolder.prisma; + }, + get $replica() { + return dbHolder.prisma; + }, +})); + +import { RunEngine } from "@internal/run-engine"; +import { setupAuthenticatedEnvironment } from "@internal/run-engine/tests"; +import { PostgresRunStore, RoutingRunStore } from "@internal/run-store"; +import { containerTestWithIsolatedRedisNoClickhouse as containerTest } from "@internal/testcontainers"; +import { trace } from "@opentelemetry/api"; +import { BatchId } from "@trigger.dev/core/v3/isomorphic"; +import type { PrismaClient } from "@trigger.dev/database"; +import { RunEngineBatchTriggerService } from "../app/runEngine/services/batchTrigger.server"; + +vi.setConfig({ testTimeout: 120_000 }); + +function buildEngine(prisma: PrismaClient, redisOptions: any, store?: RoutingRunStore) { + return new RunEngine({ + prisma, + ...(store ? { store } : {}), + worker: { + redis: redisOptions, + workers: 1, + tasksPerWorker: 10, + pollIntervalMs: 100, + disabled: true, + }, + queue: { redis: redisOptions }, + runLock: { redis: redisOptions }, + machines: { + defaultMachine: "small-1x", + machines: { + "small-1x": { name: "small-1x" as const, cpu: 0.5, memory: 0.5, centsPerMs: 0.0001 }, + }, + baseCostInCents: 0.0005, + }, + batchQueue: { redis: redisOptions }, + tracer: trace.getTracer("test", "0.0.0"), + }); +} + +function batchCreateData(params: { + id: string; + friendlyId: string; + runtimeEnvironmentId: string; + runCount: number; + payload: string; +}) { + return { + id: params.id, + friendlyId: params.friendlyId, + runtimeEnvironmentId: params.runtimeEnvironmentId, + runCount: params.runCount, + runIds: [] as string[], + payload: params.payload, + payloadType: "application/json", + options: {}, + batchVersion: "runengine:v1", + }; +} + +describe("RunEngineBatchTriggerService store routing", () => { + // The service issues BatchTaskRun create/find/update through `this._engine.runStore`. + // With an injected RoutingRunStore whose NEW slot is a PostgresRunStore, those calls + // land on the run-ops store (born on NEW), not on a separate `this._prisma` path. + containerTest( + "create/find/update route through the injected run-ops store", + async ({ prisma, redisOptions }) => { + dbHolder.prisma = prisma; + const runStore = new RoutingRunStore({ + new: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + legacy: new PostgresRunStore({ prisma, readOnlyPrisma: prisma }), + }); + const engine = buildEngine(prisma, redisOptions, runStore); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new RunEngineBatchTriggerService("sequential", prisma, engine); + + // The service holds the injected routing store. + expect(service["_engine"].runStore).toBe(runStore); + + // (create) Born on the run-ops store and present in the DB. + const { id, friendlyId } = BatchId.generate(); + const created = await service["_engine"].runStore.createBatchTaskRun( + batchCreateData({ + id, + friendlyId, + runtimeEnvironmentId: authenticatedEnvironment.id, + runCount: 1, + payload: "[]", + }) + ); + expect(created.id).toBe(id); + expect(await prisma.batchTaskRun.findUnique({ where: { id } })).not.toBeNull(); + + // (find + update) Drive the worker entrypoint with an empty payload so no child runs + // are triggered: the path exercises findBatchTaskRunById -> findEnvironmentById -> + // inline-payload parse -> updateBatchTaskRun, all through the store. + await service.processBatchTaskRun({ + batchId: id, + processingId: "0", + range: { start: 0, count: 50 }, + attemptCount: 0, + strategy: "sequential", + }); + + // The update routed through the store ran (processingJobsCount incremented by the 0 + // processed items; runIds untouched). The row is the one written to the run-ops DB. + const after = await prisma.batchTaskRun.findUnique({ where: { id } }); + expect(after).not.toBeNull(); + expect(after!.processingJobsCount).toBe(0); + expect(after!.runIds).toEqual([]); + + await engine.quit(); + } + ); + + // Single-DB passthrough (self-host collapse): with no `store` injected, the engine + // defaults to a PostgresRunStore over the one client, byte-identical to pre-routing. + containerTest( + "single-DB passthrough uses the default PostgresRunStore", + async ({ prisma, redisOptions }) => { + dbHolder.prisma = prisma; + const engine = buildEngine(prisma, redisOptions); + + const authenticatedEnvironment = await setupAuthenticatedEnvironment(prisma, "PRODUCTION"); + + const service = new RunEngineBatchTriggerService("sequential", prisma, engine); + + // The default store is a plain PostgresRunStore (no RoutingRunStore, no second client). + expect(service["_engine"].runStore).toBeInstanceOf(PostgresRunStore); + expect(service["_engine"].runStore).not.toBeInstanceOf(RoutingRunStore); + + const { id, friendlyId } = BatchId.generate(); + await service["_engine"].runStore.createBatchTaskRun( + batchCreateData({ + id, + friendlyId, + runtimeEnvironmentId: authenticatedEnvironment.id, + runCount: 1, + payload: "[]", + }) + ); + + await service.processBatchTaskRun({ + batchId: id, + processingId: "0", + range: { start: 0, count: 50 }, + attemptCount: 0, + strategy: "sequential", + }); + + const after = await prisma.batchTaskRun.findUnique({ where: { id } }); + expect(after).not.toBeNull(); + expect(after!.processingJobsCount).toBe(0); + + await engine.quit(); + } + ); +}); diff --git a/apps/webapp/test/runsRepository.readthrough.test.ts b/apps/webapp/test/runsRepository.readthrough.test.ts new file mode 100644 index 00000000000..fd3f342f9c6 --- /dev/null +++ b/apps/webapp/test/runsRepository.readthrough.test.ts @@ -0,0 +1,436 @@ +import { describe, expect, vi } from "vitest"; + +// The runsRepository module graph imports `~/v3/runStore.server`, which imports `~/db.server` +// at load. Stub it (the existing runsRepository.part*.test.ts do the same) — the repo under test +// is driven entirely through injected real containers, never the stubbed module singletons. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { PostgresRunStore } from "@internal/run-store"; +import { createPostgresContainer, replicationContainerTest } from "@internal/testcontainers"; +import { PrismaClient } from "@trigger.dev/database"; +import { setTimeout } from "node:timers/promises"; +import { RunsRepository } from "~/services/runsRepository/runsRepository.server"; +import { setupClickhouseReplication } from "./utils/replicationUtils"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** + * Creates the org/project/env parents on a single prisma client. TaskRun FKs require + * these to exist on every DB a run is hydrated from, so we seed identical parents + * (same ids) on both the legacy (PG14) and new (PG17) databases. + */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** Mirrors the org/project/env parents onto a second DB with the SAME ids. */ +async function mirrorParents(prisma: PrismaClient, ctx: SeedContext, slug: string): Promise { + await prisma.organization.create({ + data: { id: ctx.organizationId, title: `org-${slug}`, slug: `org-${slug}` }, + }); + await prisma.project.create({ + data: { + id: ctx.projectId, + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: ctx.organizationId, + externalRef: `proj-${slug}`, + }, + }); + await prisma.runtimeEnvironment.create({ + data: { + id: ctx.environmentId, + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: ctx.projectId, + organizationId: ctx.organizationId, + apiKey: `tr_dev_${slug}_b`, + pkApiKey: `pk_dev_${slug}_b`, + shortcode: `sc-${slug}-b`, + }, + }); +} + +async function createRun( + prisma: PrismaClient, + ctx: SeedContext, + run: { + id?: string; + friendlyId: string; + taskIdentifier?: string; + status?: any; + runTags?: string[]; + createdAt?: Date; + } +) { + return prisma.taskRun.create({ + data: { + ...(run.id ? { id: run.id } : {}), + friendlyId: run.friendlyId, + taskIdentifier: run.taskIdentifier ?? "my-task", + status: run.status ?? "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runTags: run.runTags ?? [], + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + ...(run.createdAt ? { createdAt: run.createdAt } : {}), + }, + }); +} + +describe("RunsRepository read-through id-set hydrate (PG14 legacy + PG17 new)", () => { + // --- DoD line + e2e #6: split fan-out across new + legacy-replica with known-migrated filter --- + replicationContainerTest( + "split mode hydrates the CH id-set as the union of NEW + legacy-replica rows, byte-identical and id-desc ordered", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { + // The fixture's PG14 container is the LEGACY read replica AND the replication source that + // feeds the ClickHouse id-set. The dedicated PG17 container is the NEW run-ops DB. + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const { url: newUrl } = await createPostgresContainer(network, { + imageTag: "docker.io/postgres:17", + }); + const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); + + try { + const ctx = await seedParents(prisma, "split1"); + await mirrorParents(prismaNew, ctx, "split1"); + + // Seed all four runs on PG14 (legacy + replication source -> CH gets the full id-set). + const legacyOnlyA = await createRun(prisma, ctx, { friendlyId: "run_legacyA" }); + const legacyOnlyB = await createRun(prisma, ctx, { friendlyId: "run_legacyB" }); + const migratedA = await createRun(prisma, ctx, { friendlyId: "run_newA" }); + const migratedB = await createRun(prisma, ctx, { friendlyId: "run_newB" }); + + // The two "migrated" runs ALSO live on the NEW DB (authoritative during retention). + // Same ids so set-membership and ordering line up with the CH id-set. + await createRun(prismaNew, { ...ctx }, { friendlyId: "run_newA" }); + await createRun(prismaNew, { ...ctx }, { friendlyId: "run_newB" }); + // Force the NEW rows to share the legacy ids exactly. + await prismaNew.taskRun.update({ + where: { friendlyId: "run_newA" }, + data: { id: migratedA.id }, + }); + await prismaNew.taskRun.update({ + where: { friendlyId: "run_newB" }, + data: { id: migratedB.id }, + }); + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ + prisma, // single-DB default handle (unused on the split path here) + clickhouse, + runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), + readThrough: { + splitEnabled: true, + newClient: prismaNew, + legacyReplica: prisma, + }, + }); + + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + // Union of all four, id-desc ordered. + const expectedIds = [migratedA.id, migratedB.id, legacyOnlyA.id, legacyOnlyB.id].sort( + (a, b) => (a < b ? 1 : a > b ? -1 : 0) + ); + expect(runs.map((r) => r.id)).toEqual(expectedIds); + + // Byte-identity for a NEW-served row (from PG17) and a legacy-served row (from PG14). + const newRow = runs.find((r) => r.id === migratedA.id)!; + expect(newRow.friendlyId).toBe("run_newA"); + expect(newRow.taskIdentifier).toBe("my-task"); + const legacyRow = runs.find((r) => r.id === legacyOnlyA.id)!; + expect(legacyRow.friendlyId).toBe("run_legacyA"); + + // Order parity with single-DB: a pure id-desc sort of the same ids. + expect(runs.map((r) => r.id)).toEqual( + [...runs.map((r) => r.id)].sort((a, b) => (a < b ? 1 : a > b ? -1 : 0)) + ); + } finally { + await prismaNew.$disconnect(); + } + } + ); + + // --- Passthrough (single-DB): one plain store read, legacy never touched --- + replicationContainerTest( + "single-DB passthrough hydrates from one store read and never touches the legacy boundary", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "passthrough"); + const run = await createRun(prisma, ctx, { friendlyId: "run_passthrough" }); + + await setTimeout(1500); + + // splitEnabled false → the split branch is never entered (one plain store read). + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + readThrough: { + splitEnabled: false, + legacyReplica: prisma, + }, + }); + + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(runs).toHaveLength(1); + expect(runs[0].id).toBe(run.id); + + const friendlyIds = await runsRepository.listFriendlyRunIds({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + expect(friendlyIds).toEqual(["run_passthrough"]); + } + ); + + // --- Ordering: the hydrated page follows the ClickHouse keyset (created_at desc), NOT raw id --- + replicationContainerTest( + "listRuns orders by the ClickHouse created_at keyset, not by raw id", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "ordering"); + // Make chronological order the OPPOSITE of id order: the run created FIRST (smaller + // time-prefixed cuid id) is given the MOST-RECENT created_at. A correct list returns + // [mostRecent, oldest] (created_at desc); the old id-desc hydrate would invert it. + // created_at is set at insert time (not via update) so ClickHouse never holds a second + // ReplacingMergeTree version that could surface as a duplicate. + const now = Date.now(); + const mostRecent = await createRun(prisma, ctx, { + friendlyId: "run_orderA", + createdAt: new Date(now), + }); + const oldest = await createRun(prisma, ctx, { + friendlyId: "run_orderB", + createdAt: new Date(now - 3_600_000), + }); + expect(mostRecent.id < oldest.id).toBe(true); // raw id-desc would yield [oldest, mostRecent] + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ prisma, clickhouse }); + const { runs } = await runsRepository.listRuns({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(runs.map((r) => r.id)).toEqual([mostRecent.id, oldest.id]); + } + ); + + // --- listFriendlyRunIds parity: split union, id projected away to a plain string[] --- + replicationContainerTest( + "listFriendlyRunIds returns the union of friendly ids across new + legacy, projecting id away", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma, network }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const { url: newUrl } = await createPostgresContainer(network, { + imageTag: "docker.io/postgres:17", + }); + const prismaNew = new PrismaClient({ datasources: { db: { url: newUrl } } }); + + try { + const ctx = await seedParents(prisma, "friendly"); + await mirrorParents(prismaNew, ctx, "friendly"); + + const legacy = await createRun(prisma, ctx, { friendlyId: "run_fLegacy" }); + const migrated = await createRun(prisma, ctx, { friendlyId: "run_fNew" }); + await createRun(prismaNew, ctx, { friendlyId: "run_fNew" }); + await prismaNew.taskRun.update({ + where: { friendlyId: "run_fNew" }, + data: { id: migrated.id }, + }); + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ + prisma, + clickhouse, + runStore: new PostgresRunStore({ prisma: prismaNew, readOnlyPrisma: prismaNew }), + readThrough: { + splitEnabled: true, + newClient: prismaNew, + legacyReplica: prisma, + }, + }); + + const friendlyIds = await runsRepository.listFriendlyRunIds({ + page: { size: 10 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + + expect(friendlyIds.every((f) => typeof f === "string")).toBe(true); + expect([...friendlyIds].sort()).toEqual(["run_fLegacy", "run_fNew"]); + // id projected away: a friendlyId is never a run internal id. + expect(friendlyIds).not.toContain(legacy.id); + } finally { + await prismaNew.$disconnect(); + } + } + ); + + // Full-keyset walk over interleaved cuid + ksuid ids: hydration must preserve the ClickHouse + // (created_at DESC, run_id DESC) order across the id-space seam. A hydrate that reverts to lexical + // `id desc` splits the two id-spaces into separate blocks, so it would fail this walk. + replicationContainerTest( + "paginating the full keyset enumerates every interleaved cuid/ksuid id once, in CH keyset order, with no empty page", + async ({ clickhouseContainer, redisOptions, postgresContainer, prisma }) => { + const { clickhouse } = await setupClickhouseReplication({ + prisma, + databaseUrl: postgresContainer.getConnectionUri(), + clickhouseUrl: clickhouseContainer.getConnectionUrl(), + redisOptions, + }); + + const ctx = await seedParents(prisma, "keysetwalk"); + + // cuid-shaped ids (25 chars, "c" prefix) and ksuid-shaped ids (27 chars, "2" prefix). Lexical + // `id desc` groups all "c" ids ahead of all "2" ids; the created_at order below interleaves + // them, so the two orders genuinely differ across the seam. + const cuid = (n: number) => `c${String(n).padStart(24, "0")}`; + const ksuid = (n: number) => `2${String(n).padStart(26, "0")}`; + + // created_at DESC order (index 0 = most recent) interleaves the id-spaces: ksuid, cuid, + // ksuid, cuid, ksuid, cuid. + const now = Date.now(); + const seeds = [ + { id: ksuid(6), friendlyId: "run_k6", createdAt: new Date(now - 0 * 60_000) }, + { id: cuid(5), friendlyId: "run_c5", createdAt: new Date(now - 1 * 60_000) }, + { id: ksuid(4), friendlyId: "run_k4", createdAt: new Date(now - 2 * 60_000) }, + { id: cuid(3), friendlyId: "run_c3", createdAt: new Date(now - 3 * 60_000) }, + { id: ksuid(2), friendlyId: "run_k2", createdAt: new Date(now - 4 * 60_000) }, + { id: cuid(1), friendlyId: "run_c1", createdAt: new Date(now - 5 * 60_000) }, + ]; + for (const s of seeds) { + await createRun(prisma, ctx, s); + } + + await setTimeout(1500); + + const runsRepository = new RunsRepository({ prisma, clickhouse }); + + // The authoritative order the hydrate must reproduce: exactly the CH keyset the id-list scan + // returns (created_at DESC, run_id DESC). Lexical id-desc of the same ids differs from this. + const chOrder = await runsRepository.listRunIds({ + page: { size: 100 }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + const expectedOrder = chOrder.runIds; + const lexicalIdDesc = [...expectedOrder].sort((a, b) => (a < b ? 1 : a > b ? -1 : 0)); + expect(expectedOrder).not.toEqual(lexicalIdDesc); // the seam actually separates the two orders + + // Walk the whole keyset a page at a time. + const walked: string[] = []; + let cursor: string | undefined; + let pages = 0; + while (true) { + const { runs, pagination } = await runsRepository.listRuns({ + page: { size: 2, cursor }, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + organizationId: ctx.organizationId, + }); + pages++; + expect(pages).toBeLessThan(20); // guard against a non-terminating walk + + for (const r of runs) walked.push(r.id); + + if (!pagination.nextCursor) break; + // No empty page may be returned while more pages exist. + expect(runs.length).toBeGreaterThan(0); + cursor = pagination.nextCursor; + } + + // Every seeded id enumerated exactly once. + expect(walked.slice().sort()).toEqual(seeds.map((s) => s.id).sort()); + expect(new Set(walked).size).toBe(seeds.length); + // The emitted order equals the CH keyset order across the id-space seam. + expect(walked).toEqual(expectedOrder); + } + ); +}); diff --git a/apps/webapp/test/runsRepositoryCpres.test.ts b/apps/webapp/test/runsRepositoryCpres.test.ts new file mode 100644 index 00000000000..29627f79ccd --- /dev/null +++ b/apps/webapp/test/runsRepositoryCpres.test.ts @@ -0,0 +1,260 @@ +import { describe, expect, vi } from "vitest"; + +// The runsRepository module graph imports `~/v3/runStore.server`, which imports `~/db.server` +// at load. Stub it (the existing runsRepository.*.test.ts do the same) — the function under +// test is driven entirely through a RunStore built from the injected real containers, never +// the stubbed module singletons. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { heteroRunOpsPostgresTest } from "@internal/testcontainers"; +import { buildRunStore } from "~/v3/runStore.server"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { BulkActionId, RunId } from "@trigger.dev/core/v3/isomorphic"; +import { convertRunListInputOptionsToFilterRunsOptions } from "~/services/runsRepository/runsRepository.server"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** Seeds org/project/env parents on the control-plane client. */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** A batch on the NEW (dedicated run-ops) DB — the residency the single control-plane client silently missed. */ +async function seedNewBatch( + prisma: RunOpsPrismaClient, + friendlyId: string, + runtimeEnvironmentId: string +) { + return prisma.batchTaskRun.create({ data: { friendlyId, runtimeEnvironmentId } }); +} + +/** A batch on the LEGACY (control-plane) DB. */ +async function seedLegacyBatch( + prisma: PrismaClient, + friendlyId: string, + runtimeEnvironmentId: string +) { + return prisma.batchTaskRun.create({ data: { friendlyId, runtimeEnvironmentId } }); +} + +async function seedSchedule(prisma: PrismaClient, friendlyId: string, projectId: string) { + return prisma.taskSchedule.create({ + data: { friendlyId, projectId, taskIdentifier: "my-task", generatorExpression: "* * * * *" }, + }); +} + +describe("convertRunListInputOptionsToFilterRunsOptions cross-DB filter resolution (control-plane + run-ops)", () => { + // --- A NEW-resident batch must resolve via the store's NEW->LEGACY probe --- + // Previously the single control-plane client missed it, leaving the friendlyId in the + // ClickHouse `batch_id` filter -> zero runs. Schedule (control-plane) resolves off prisma14. + heteroRunOpsPostgresTest( + "split: a NEW-resident batch resolves via the run-ops store; schedule resolves on control-plane", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test1"); + + const batch = await seedNewBatch(prisma17, "batch_test1", ctx.environmentId); + const schedule = await seedSchedule(prisma14, "sched_test1", ctx.projectId); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test1", + scheduleId: "sched_test1", + }, + prisma14, // control-plane client (used for the schedule lookup) + store + ); + + expect(result.batchId).toBe(batch.id); + expect(result.scheduleId).toBe(schedule.id); + } + ); + + // --- A LEGACY-resident batch still resolves via the NEW->LEGACY fallback --- + heteroRunOpsPostgresTest( + "split: a LEGACY-resident batch resolves via the store's legacy fallback", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test2"); + + const batch = await seedLegacyBatch(prisma14, "batch_test2", ctx.environmentId); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test2", + }, + prisma14, + store + ); + + expect(result.batchId).toBe(batch.id); + } + ); + + // --- An unknown batch friendlyId is retained unchanged (no spurious match) --- + heteroRunOpsPostgresTest( + "split: an unknown batch friendlyId is retained (resolves on neither DB)", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "test2b"); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_missing", + }, + prisma14, + store + ); + + expect(result.batchId).toBe("batch_missing"); + } + ); + + // --- Single-DB passthrough: a passthrough store resolves the batch off the one client --- + heteroRunOpsPostgresTest( + "single-DB passthrough: the batch + schedule resolve off the one client", + async ({ prisma14 }) => { + const ctx = await seedParents(prisma14, "test3"); + const batch = await seedLegacyBatch(prisma14, "batch_test3", ctx.environmentId); + const schedule = await seedSchedule(prisma14, "sched_test3", ctx.projectId); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + batchId: "batch_test3", + scheduleId: "sched_test3", + }, + prisma14, + store + ); + + expect(result.batchId).toBe(batch.id); + expect(result.scheduleId).toBe(schedule.id); + } + ); + + // --- Pure-conversion non-regression (period, bulkId, runId, rootOnly) --- + heteroRunOpsPostgresTest( + "pure conversions unchanged: period, bulkId, runId, rootOnly in a single-DB call", + async ({ prisma14 }) => { + const ctx = await seedParents(prisma14, "test4"); + const batch = await seedLegacyBatch(prisma14, "batch_test4", ctx.environmentId); + + const bulkFriendly = BulkActionId.generate().friendlyId; // real "bulk_..." friendlyId + const internalRunId = RunId.generate().id; // internal id to be converted to a friendlyId + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const result = await convertRunListInputOptionsToFilterRunsOptions( + { + organizationId: ctx.organizationId, + projectId: ctx.projectId, + environmentId: ctx.environmentId, + period: "1h", + bulkId: bulkFriendly, + runId: [internalRunId], + batchId: "batch_test4", + rootOnly: true, + }, + prisma14, + store + ); + + // period: "1h" -> 3600000 ms via parseDuration. + expect(result.period).toBe(3600000); + // bulkId: round-tripped through BulkActionId.toId. + expect(result.bulkId).toBe(BulkActionId.toId(bulkFriendly)); + // runId: each element mapped via RunId.toFriendlyId. + expect(result.runId).toEqual([RunId.toFriendlyId(internalRunId)]); + // batchId still resolved off the single client. + expect(result.batchId).toBe(batch.id); + // rootOnly forced false because batchId/runId are present (even though caller passed true). + expect(result.rootOnly).toBe(false); + } + ); +}); diff --git a/apps/webapp/test/sessions.readthrough.test.ts b/apps/webapp/test/sessions.readthrough.test.ts new file mode 100644 index 00000000000..6496baeb16f --- /dev/null +++ b/apps/webapp/test/sessions.readthrough.test.ts @@ -0,0 +1,347 @@ +import { describe, expect, vi } from "vitest"; + +// The sessions.server module graph imports `~/db.server` (and the run-store +// singleton) at load. Stub `~/db.server` so importing the module under test does +// not construct the real boot clients — the serializer is driven entirely through +// an explicitly injected RunStore built from the real test containers. +vi.mock("~/db.server", () => ({ + prisma: {}, + $replica: {}, +})); + +import { heteroRunOpsPostgresTest, postgresTest } from "@internal/testcontainers"; +import { buildRunStore } from "~/v3/runStore.server"; +import { generateKsuidId } from "@trigger.dev/core/v3/isomorphic"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { + resolveSessionByIdOrExternalId, + serializeSessionsWithFriendlyRunIds, + serializeSessionWithFriendlyRunId, +} from "~/services/realtime/sessions.server"; + +vi.setConfig({ testTimeout: 90_000 }); + +type SeedContext = { + organizationId: string; + projectId: string; + environmentId: string; +}; + +/** + * Creates the org/project/env parents on the control-plane client. `Session` + * and the legacy `TaskRun` both need these FK parents; the dedicated run-ops + * schema (`prisma17`) is FK-free, so NEW runs only need the scalar tenant ids. + */ +async function seedParents(prisma: PrismaClient, slug: string): Promise { + const organization = await prisma.organization.create({ + data: { title: `org-${slug}`, slug: `org-${slug}` }, + }); + const project = await prisma.project.create({ + data: { + name: `proj-${slug}`, + slug: `proj-${slug}`, + organizationId: organization.id, + externalRef: `proj-${slug}`, + }, + }); + const runtimeEnvironment = await prisma.runtimeEnvironment.create({ + data: { + slug: `env-${slug}`, + type: "DEVELOPMENT", + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_dev_${slug}`, + pkApiKey: `pk_dev_${slug}`, + shortcode: `sc-${slug}`, + }, + }); + + return { + organizationId: organization.id, + projectId: project.id, + environmentId: runtimeEnvironment.id, + }; +} + +/** + * Create a legacy (control-plane) TaskRun. A default cuid id classifies LEGACY. + */ +async function createLegacyRun( + prisma: PrismaClient, + ctx: SeedContext, + run: { friendlyId: string } +) { + return prisma.taskRun.create({ + data: { + friendlyId: run.friendlyId, + taskIdentifier: "my-task", + status: "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +/** + * Create a NEW (dedicated run-ops) TaskRun with a ksuid id — classifies NEW and + * lives only on the run-ops DB. Scalar tenant columns only (the subset schema is + * FK-free, so no org/project/env rows are required here). + */ +async function createNewRun( + prisma: RunOpsPrismaClient, + ctx: SeedContext, + run: { friendlyId: string; id: string } +) { + return prisma.taskRun.create({ + data: { + id: run.id, + friendlyId: run.friendlyId, + taskIdentifier: "my-task", + status: "PENDING", + payload: JSON.stringify({ foo: run.friendlyId }), + traceId: run.friendlyId, + spanId: run.friendlyId, + queue: "test", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "DEVELOPMENT", + engine: "V2", + }, + }); +} + +async function createSession( + prisma: PrismaClient, + ctx: SeedContext, + session: { friendlyId: string; externalId?: string; currentRunId?: string | null } +) { + return prisma.session.create({ + data: { + friendlyId: session.friendlyId, + externalId: session.externalId, + type: "chat", + projectId: ctx.projectId, + runtimeEnvironmentId: ctx.environmentId, + environmentType: "DEVELOPMENT", + organizationId: ctx.organizationId, + taskIdentifier: "my-task", + triggerConfig: {}, + currentRunId: session.currentRunId ?? null, + }, + }); +} + +describe("sessions serializer currentRunId resolution", () => { + // --- Passthrough single-run (single-DB) --- + postgresTest( + "single-run passthrough resolves currentRunId -> friendlyId; null stays null", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "single-pass"); + const run = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_single" }); + const session = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_single", + currentRunId: run.id, + }); + const nullSession = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_null", + currentRunId: null, + }); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma as PrismaClient, + singleReplica: prisma as PrismaClient, + }); + + const item = await serializeSessionWithFriendlyRunId(session, store); + expect(item.currentRunId).toBe("run_single"); + + const nullItem = await serializeSessionWithFriendlyRunId(nullSession, store); + expect(nullItem.currentRunId).toBeNull(); + } + ); + + // --- Passthrough batched (single-DB) + tenant scope --- + postgresTest( + "batched passthrough resolves each currentRunId; null stays null; cross-env is dropped", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "batch-pass"); + const otherCtx = await seedParents(prisma as PrismaClient, "batch-pass-other"); + + const runA = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_A" }); + const runB = await createLegacyRun(prisma as PrismaClient, ctx, { friendlyId: "run_B" }); + // A run in a DIFFERENT env — pointer must not resolve under our scope. + const crossEnvRun = await createLegacyRun(prisma as PrismaClient, otherCtx, { + friendlyId: "run_cross", + }); + + const sessionA = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_A", + currentRunId: runA.id, + }); + const sessionB = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_B", + currentRunId: runB.id, + }); + const sessionNull = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_n", + currentRunId: null, + }); + const sessionCross = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_x", + currentRunId: crossEnvRun.id, + }); + + const store = buildRunStore({ + splitEnabled: false, + singleWriter: prisma as PrismaClient, + singleReplica: prisma as PrismaClient, + }); + + const items = await serializeSessionsWithFriendlyRunIds( + [sessionA, sessionB, sessionNull, sessionCross], + { projectId: ctx.projectId, runtimeEnvironmentId: ctx.environmentId }, + store + ); + + const byFriendly = new Map(items.map((i) => [i.id, i.currentRunId])); + expect(byFriendly.get("session_A")).toBe("run_A"); + expect(byFriendly.get("session_B")).toBe("run_B"); + expect(byFriendly.get("session_n")).toBeNull(); + // cross-env run exists, but the tenant-scoped find drops it -> null. + expect(byFriendly.get("session_x")).toBeNull(); + } + ); + + // --- Control-plane Session resolve is not routed --- + postgresTest( + "resolveSessionByIdOrExternalId resolves the Session row by friendlyId and by externalId", + async ({ prisma }) => { + const ctx = await seedParents(prisma as PrismaClient, "controlplane"); + const session = await createSession(prisma as PrismaClient, ctx, { + friendlyId: "session_cp", + externalId: "ext-cp-1", + currentRunId: null, + }); + + const byFriendly = await resolveSessionByIdOrExternalId( + prisma as PrismaClient, + ctx.environmentId, + session.friendlyId + ); + expect(byFriendly?.id).toBe(session.id); + + const byExternal = await resolveSessionByIdOrExternalId( + prisma as PrismaClient, + ctx.environmentId, + "ext-cp-1" + ); + expect(byExternal?.id).toBe(session.id); + } + ); + + // --- Split single-run across two physical DBs (the production-shaped break) --- + // ksuid (NEW-DB) session run must serialize a non-null friendlyId, and a cuid + // (LEGACY) run must still resolve — proving the asymmetry is gone. + heteroRunOpsPostgresTest( + "split single-run resolves a NEW-ksuid run from the run-ops DB and a LEGACY-cuid run from control-plane", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "split-single"); + + const newRun = await createNewRun(prisma17, ctx, { + friendlyId: "run_new", + id: generateKsuidId(), + }); + const legacyRun = await createLegacyRun(prisma14, ctx, { friendlyId: "run_legacy" }); + + const newSession = await createSession(prisma14, ctx, { + friendlyId: "session_new", + currentRunId: newRun.id, + }); + const legacySession = await createSession(prisma14, ctx, { + friendlyId: "session_legacy", + currentRunId: legacyRun.id, + }); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const newItem = await serializeSessionWithFriendlyRunId(newSession, store); + expect(newItem.currentRunId).toBe("run_new"); + + const legacyItem = await serializeSessionWithFriendlyRunId(legacySession, store); + expect(legacyItem.currentRunId).toBe("run_legacy"); + } + ); + + // --- Split batched — NEW + legacy union; null + cross-env dropped --- + heteroRunOpsPostgresTest( + "split batched resolves runs across NEW + legacy; null stays null; cross-env dropped", + async ({ prisma14, prisma17 }) => { + const ctx = await seedParents(prisma14, "split-batch"); + const otherCtx = await seedParents(prisma14, "split-batch-other"); + + const newRun = await createNewRun(prisma17, ctx, { + friendlyId: "run_bnew", + id: generateKsuidId(), + }); + const legacyRun = await createLegacyRun(prisma14, ctx, { friendlyId: "run_blegacy" }); + const crossEnvRun = await createLegacyRun(prisma14, otherCtx, { friendlyId: "run_bcross" }); + + const sessionNew = await createSession(prisma14, ctx, { + friendlyId: "session_bnew", + currentRunId: newRun.id, + }); + const sessionLegacy = await createSession(prisma14, ctx, { + friendlyId: "session_blegacy", + currentRunId: legacyRun.id, + }); + const sessionNull = await createSession(prisma14, ctx, { + friendlyId: "session_bnull", + currentRunId: null, + }); + const sessionCross = await createSession(prisma14, ctx, { + friendlyId: "session_bcross", + currentRunId: crossEnvRun.id, + }); + + const store = buildRunStore({ + splitEnabled: true, + newWriter: prisma17, + newReplica: prisma17, + legacyWriter: prisma14, + legacyReplica: prisma14, + singleWriter: prisma14, + singleReplica: prisma14, + }); + + const items = await serializeSessionsWithFriendlyRunIds( + [sessionNew, sessionLegacy, sessionNull, sessionCross], + { projectId: ctx.projectId, runtimeEnvironmentId: ctx.environmentId }, + store + ); + + const byFriendly = new Map(items.map((i) => [i.id, i.currentRunId])); + expect(byFriendly.get("session_bnew")).toBe("run_bnew"); + expect(byFriendly.get("session_blegacy")).toBe("run_blegacy"); + expect(byFriendly.get("session_bnull")).toBeNull(); + expect(byFriendly.get("session_bcross")).toBeNull(); + } + ); +}); diff --git a/apps/webapp/test/setup.ts b/apps/webapp/test/setup.ts index 607ad78f3a9..5824c15b900 100644 --- a/apps/webapp/test/setup.ts +++ b/apps/webapp/test/setup.ts @@ -2,5 +2,148 @@ // EnvironmentSchema.parse(process.env) succeeds in vitest workers. import { config } from "dotenv"; import path from "node:path"; +import { vi } from "vitest"; +import type * as IORedisModule from "ioredis"; +import type * as TaskMetadataCacheModule from "~/services/taskMetadataCache.server"; config({ path: path.resolve(__dirname, "../.env") }); + +// CI has no .env and no REDIS_HOST/REDIS_PORT, so import-time guards like +// autoIncrementCounter.server.ts throw and their suites fail to collect. Default +// the pair — the ioredis mock below forces lazyConnect, so nothing ever dials. +process.env.REDIS_HOST ??= "localhost"; +process.env.REDIS_PORT ??= "6379"; + +// Worker singletons construct a RedisWorker at import time whose ioredis client +// connects eagerly, so any test importing the service graph opens real Redis +// connections on import — which floods and fails in CI (no Redis). Mock them to +// no-op stubs. Only the worker modules are mocked, never the run store +// (~/v3/runStore.server, ~/db.server), which store-routing tests need real. +function createWorkerStub() { + return { + start: vi.fn(), + stop: vi.fn(), + enqueue: vi.fn().mockResolvedValue(undefined), + enqueueOnce: vi.fn().mockResolvedValue(undefined), + reschedule: vi.fn().mockResolvedValue(undefined), + cancel: vi.fn().mockResolvedValue(undefined), + ack: vi.fn().mockResolvedValue(undefined), + }; +} + +vi.mock("~/v3/commonWorker.server", () => ({ commonWorker: createWorkerStub() })); +vi.mock("~/v3/batchTriggerWorker.server", () => ({ batchTriggerWorker: createWorkerStub() })); +vi.mock("~/v3/legacyRunEngineWorker.server", () => ({ + legacyRunEngineWorker: createWorkerStub(), +})); +vi.mock("~/v3/alertsWorker.server", () => ({ alertsWorker: createWorkerStub() })); + +// RunEngine, MarQS, devPubSub and the socket.io server are further singletons +// that open eager ioredis connections at import via the same pattern. No test +// uses these app-level singletons directly (store-routing tests build their own +// engine and run store), so stub them to no-op proxies. +// Recursive no-op proxy: property access at any depth returns another callable +// no-op proxy, so real service tests reaching nested singleton methods (e.g. +// engine.runQueue.updateEnvConcurrencyLimits) don't break on an intermediate stub. +type NoopProxyFn = ((...args: unknown[]) => Promise) & Record; + +const noopProxy = (): NoopProxyFn => { + const fn = () => Promise.resolve(undefined); + return new Proxy(fn, { + get: (_target, prop) => (prop === "then" ? undefined : noopProxy()), + apply: () => Promise.resolve(undefined), + }) as unknown as NoopProxyFn; +}; + +// Beyond the modules mocked above, dozens more app modules construct an +// ioredis client at import time pointed at env-configured Redis, and ioredis +// dials on construction — in CI (no Redis service) that floods ECONNREFUSED at +// shard scale. Force `lazyConnect: true` on every client instead: import-time +// singletons construct but never dial, while anything that actually issues a +// command (tests against live testcontainers) connects on first command +// exactly as before. +vi.mock("ioredis", async (importOriginal) => { + const actual = await importOriginal(); + + // Normalize ioredis's overloaded ctor args — (), (port), (path), + // (port, host), (opts), (port, opts), (port, host, opts), (path, opts) — + // so lazyConnect lands in the options object in every form. + function withLazyConnect(args: unknown[]): unknown[] { + if (args.length === 0) { + return [{ lazyConnect: true }]; + } + const last = args[args.length - 1]; + if (typeof last === "object" && last !== null) { + return [...args.slice(0, -1), { ...last, lazyConnect: true }]; + } + return [...args, { lazyConnect: true }]; + } + + class LazyRedis extends actual.Redis { + constructor(...args: unknown[]) { + // @ts-expect-error – forwarding ioredis's overloaded ctor args + super(...withLazyConnect(args)); + } + } + + class LazyCluster extends actual.Cluster { + constructor(startupNodes: unknown, options?: Record) { + // @ts-expect-error – forwarding ioredis's ctor args + super(startupNodes, { ...options, lazyConnect: true }); + } + } + + // Keep the `Redis.Cluster` static alias (`new Redis.Cluster(...)`) working. + // The base class exposes `Cluster` as a getter-only static, so define our + // own property rather than assigning through the inherited getter. + Object.defineProperty(LazyRedis, "Cluster", { value: LazyCluster }); + + return { + ...actual, + default: LazyRedis, + Redis: LazyRedis, + Cluster: LazyCluster, + }; +}); + +// alertsRateLimiter.check() is invoked at runtime by deliverAlert; against +// env-configured Redis each check burns ~20 reconnect cycles before its +// caught error, stalling alert-path tests into timeouts. Allow everything. +vi.mock("~/v3/alertsRateLimiter.server", () => ({ + alertsRateLimiter: { check: vi.fn().mockResolvedValue({ allowed: true }) }, +})); + +// tracePubSub.publish() runs inside eventRepository writes; each publish to +// env-configured Redis stalls ~20 reconnect cycles (errors are allSettled- +// swallowed but awaited), timing out any test that records trace events. +vi.mock("~/v3/services/tracePubSub.server", async () => { + const { EventEmitter } = await import("node:events"); + return { + tracePubSub: { + publish: vi.fn().mockResolvedValue(undefined), + subscribeToTrace: vi.fn().mockResolvedValue({ + unsubscribe: vi.fn().mockResolvedValue(undefined), + eventEmitter: new EventEmitter(), + }), + }, + TracePubSub: class {}, + }; +}); + +// Same runtime-stall shape for the task metadata cache (queues concern). CI +// leaves TASK_META_CACHE_REDIS_HOST unset and gets the Noop implementation; +// pin the Noop cache here so env-configured local runs behave identically. +vi.mock("~/services/taskMetadataCacheInstance.server", async () => { + const { NoopTaskMetadataCache } = await vi.importActual( + "~/services/taskMetadataCache.server" + ); + return { taskMetadataCacheInstance: new NoopTaskMetadataCache() }; +}); + +vi.mock("~/v3/runEngine.server", () => ({ engine: noopProxy() })); +vi.mock("~/v3/marqs/index.server", () => ({ marqs: noopProxy(), MarQS: class {} })); +vi.mock("~/v3/marqs/devPubSub.server", () => ({ devPubSub: noopProxy() })); +vi.mock("~/v3/handleSocketIo.server", () => ({ + socketIo: noopProxy(), + roomFromFriendlyRunId: (id: string) => `room:${id}`, +})); diff --git a/apps/webapp/test/streamLoader.controlPlane.test.ts b/apps/webapp/test/streamLoader.controlPlane.test.ts new file mode 100644 index 00000000000..372ecda31c7 --- /dev/null +++ b/apps/webapp/test/streamLoader.controlPlane.test.ts @@ -0,0 +1,119 @@ +// Dedicated run-ops proof for the run-detail realtime stream loader after dropping its cross-DB +// control-plane include. The TaskRun scalar row lives on the dedicated run-ops client (PG17, subset +// schema, no control-plane tables); env lives on PG14. The DB is never mocked; the .count() proof +// shows the run does not exist on the control-plane side. +import { heteroRunOpsPostgresTest } from "@internal/testcontainers"; +import type { RunOpsPrismaClient } from "@internal/run-ops-database"; +import type { PrismaClient } from "@trigger.dev/database"; +import { describe, expect } from "vitest"; +import { ControlPlaneCache } from "~/v3/runOpsMigration/controlPlaneCache.server"; +import { ControlPlaneResolver } from "~/v3/runOpsMigration/controlPlaneResolver.server"; +import { resolveStreamBasin } from "~/services/realtime/v1StreamsGlobal.server"; + +vi.setConfig({ testTimeout: 60_000, hookTimeout: 60_000 }); + +let n = 0; +async function seedControlPlane(prisma: PrismaClient) { + const s = n++; + const organization = await prisma.organization.create({ + data: { title: `Org ${s}`, slug: `org-${s}`, streamBasinName: `basin-${s}` }, + }); + const project = await prisma.project.create({ + data: { + name: `P ${s}`, + slug: `p-${s}`, + externalRef: `proj_${s}`, + organizationId: organization.id, + }, + }); + const environment = await prisma.runtimeEnvironment.create({ + data: { + type: "PRODUCTION", + slug: `prod-${s}`, + projectId: project.id, + organizationId: organization.id, + apiKey: `tr_${s}`, + pkApiKey: `pk_${s}`, + shortcode: `sc_${s}`, + }, + }); + return { organization, project, environment }; +} + +// The run lives on the dedicated run-ops client; control-plane FKs are synthetic +// scalar ids pointing at PG14 rows (the dedicated DB has no control-plane tables). +async function seedRunOpsRun( + prisma: RunOpsPrismaClient, + ctx: { organizationId: string; projectId: string; environmentId: string } +) { + const s = n++; + return prisma.taskRun.create({ + data: { + friendlyId: `run_2abc${s}defghijklmnopqrst`, + taskIdentifier: "my-task", + status: "PENDING", + payload: "{}", + payloadType: "application/json", + traceId: `trace_${s}`, + spanId: `span_${s}`, + queue: "task/my-task", + runtimeEnvironmentId: ctx.environmentId, + projectId: ctx.projectId, + organizationId: ctx.organizationId, + environmentType: "PRODUCTION", + engine: "V2", + realtimeStreamsVersion: "v1", + streamBasinName: null, + }, + }); +} + +describe("run-detail stream loader cross-DB read-through (dedicated run-ops client)", () => { + heteroRunOpsPostgresTest( + "run-ops scalars resolve from the dedicated run-ops DB; env (slug/org/basin) resolves from control-plane with no cross-join", + async ({ prisma14, prisma17 }) => { + const cp = await seedControlPlane(prisma14 as unknown as PrismaClient); + const run = await seedRunOpsRun(prisma17, { + organizationId: cp.organization.id, + projectId: cp.project.id, + environmentId: cp.environment.id, + }); + + const found = await prisma17.taskRun.findFirst({ + where: { friendlyId: run.friendlyId, projectId: cp.project.id }, + select: { + id: true, + friendlyId: true, + realtimeStreamsVersion: true, + streamBasinName: true, + runtimeEnvironmentId: true, + projectId: true, + }, + }); + expect(found).not.toBeNull(); + expect(found!.friendlyId).toBe(run.friendlyId); + expect(found!.runtimeEnvironmentId).toBe(cp.environment.id); + + const resolver = new ControlPlaneResolver({ + controlPlanePrimary: prisma14 as unknown as PrismaClient, + controlPlaneReplica: prisma14 as unknown as PrismaClient, + cache: new ControlPlaneCache(), + splitEnabled: () => false, + }); + const environment = await resolver.resolveAuthenticatedEnv(found!.runtimeEnvironmentId); + expect(environment).not.toBeNull(); + expect(environment!.slug).toBe(cp.environment.slug); + expect(environment!.organization.id).toBe(cp.organization.id); + expect(environment!.organization.streamBasinName).toBe(cp.organization.streamBasinName); + + const basin = resolveStreamBasin({ + run: { streamBasinName: found!.streamBasinName }, + organization: { streamBasinName: environment!.organization.streamBasinName }, + }); + expect(basin).toBe(cp.organization.streamBasinName); + + // Inversion proof: no run on PG14 (control-plane). + expect(await (prisma14 as unknown as PrismaClient).taskRun.count()).toBe(0); + } + ); +});