diff --git a/apps/typegpu-docs/src/content/docs/ecosystem/typegpu-radiance-cascades.mdx b/apps/typegpu-docs/src/content/docs/ecosystem/typegpu-radiance-cascades.mdx index 3390c92bf3..68732319ce 100644 --- a/apps/typegpu-docs/src/content/docs/ecosystem/typegpu-radiance-cascades.mdx +++ b/apps/typegpu-docs/src/content/docs/ecosystem/typegpu-radiance-cascades.mdx @@ -260,6 +260,6 @@ Most users only need `createRadianceCascades`. The package also exports lower-le | `defaultRayMarch` | The built-in ray marcher used by the runner | | `RayMarchResult` | Return struct for custom ray marchers | | `getCascadeDim` | Computes internal cascade texture dimensions | -| `sdfSlot`, `colorSlot`, `rayMarchSlot`, `sdfResolutionSlot` | Slots used by the internal cascade compute pass | +| `sdfSlot`, `colorSlot`, `rayMarchSlot`, `traceSegmentSlot`, `maxRayStepsAccess`, `rayMarchStepSafetyAccess` | Injection points used by the internal cascade compute pass | See the [package source](https://github.com/software-mansion/TypeGPU/tree/main/packages/typegpu-radiance-cascades/src) for details that are not yet covered here. diff --git a/apps/typegpu-docs/src/examples/rendering/radiance-cascades-drawing/index.ts b/apps/typegpu-docs/src/examples/rendering/radiance-cascades-drawing/index.ts index 9d317a16a5..f5e9b3340f 100644 --- a/apps/typegpu-docs/src/examples/rendering/radiance-cascades-drawing/index.ts +++ b/apps/typegpu-docs/src/examples/rendering/radiance-cascades-drawing/index.ts @@ -16,6 +16,7 @@ context.configure({ }); const [width, height] = [canvas.width, canvas.height]; +const initialBrushRadius = 0.015; // Scene texture + views. const sceneTexture = root @@ -47,7 +48,7 @@ const paramsUniform = root.createUniform(DrawParams, { isDrawing: 0, lastMousePos: d.vec2f(0.5), mousePos: d.vec2f(0.5), - brushRadius: 0.05, + brushRadius: initialBrushRadius, lightColor: d.vec3f(1, 0.9, 0.7), }); @@ -182,8 +183,10 @@ function drawScene() { function updateScene() { if (sceneDirty) { - floodRunner.run(); - radianceRunner.run(); + const encoder = root.device.createCommandEncoder(); + floodRunner.run(encoder); + radianceRunner.run(encoder); + root.device.queue.submit([encoder.finish()]); sceneDirty = false; } } @@ -220,8 +223,8 @@ function frame(timestamp: number) { export const controls = defineControls({ ...drawInteraction.controls, 'Brush Size': { - initial: 0.015, - min: 0.015, + initial: initialBrushRadius, + min: initialBrushRadius, max: 0.15, step: 0.015, onSliderChange(value: number) { diff --git a/apps/typegpu-docs/src/examples/rendering/radiance-cascades/index.ts b/apps/typegpu-docs/src/examples/rendering/radiance-cascades/index.ts index 2895fbc41e..b301d9e67b 100644 --- a/apps/typegpu-docs/src/examples/rendering/radiance-cascades/index.ts +++ b/apps/typegpu-docs/src/examples/rendering/radiance-cascades/index.ts @@ -106,6 +106,46 @@ const cascadeSampler = root.createSampler({ minFilter: 'linear', }); +const part1By1 = tgpu.fn( + [d.u32], + d.u32, +)((v) => { + 'use gpu'; + const x0 = v & 0x0000ffff; + const x1 = (x0 | (x0 << 8)) & 0x00ff00ff; + const x2 = (x1 | (x1 << 4)) & 0x0f0f0f0f; + const x3 = (x2 | (x2 << 2)) & 0x33333333; + return (x3 | (x3 << 1)) & 0x55555555; +}); + +const compact1By1 = tgpu.fn( + [d.u32], + d.u32, +)((v) => { + 'use gpu'; + const x0 = v & 0x55555555; + const x1 = (x0 ^ (x0 >> 1)) & 0x33333333; + const x2 = (x1 ^ (x1 >> 2)) & 0x0f0f0f0f; + const x3 = (x2 ^ (x2 >> 4)) & 0x00ff00ff; + return (x3 ^ (x3 >> 8)) & 0x0000ffff; +}); + +const morton2D = tgpu.fn( + [d.u32, d.u32], + d.u32, +)((x, y) => { + 'use gpu'; + return part1By1(x) | (part1By1(y) << 1); +}); + +const unmorton2D = tgpu.fn( + [d.u32], + d.vec2u, +)((index) => { + 'use gpu'; + return d.vec2u(compact1By1(index), compact1By1(index >> 1)); +}); + const cascadePassPipeline = root .with(sceneDataAccess, sceneDataUniform) .createGuardedComputePipeline((x, y) => { @@ -136,7 +176,7 @@ const cascadePassPipeline = root for (const i of tgpu.unroll(std.range(4))) { const dirActual = dirStored * 2 + d.vec2u(i & 1, i >> 1); - const rayIndex = d.f32(dirActual.y * raysDimActual + dirActual.x) + 0.5; + const rayIndex = d.f32(morton2D(dirActual.x, dirActual.y)) + 0.5; const angle = (rayIndex / d.f32(rayCountActual)) * (Math.PI * 2) - Math.PI; const rayDir = d.vec2f(std.cos(angle), -std.sin(angle)); @@ -304,7 +344,8 @@ const overlayFrag = tgpu.fragmentFn({ const rayDist = sdf.sdLine(uv, probePos, probePos + rayDir * std.max(rayEndDistance, 0.01)); if (rayDist < minRayDist) { - const dirStored = d.vec2u((ri % raysDimActual) >> 1, d.u32(ri / raysDimActual) >> 1); + const dirActual = unmorton2D(ri); + const dirStored = d.vec2u(dirActual.x >> 1, dirActual.y >> 1); const sample = std.textureLoad( overlayDebugBGL.$.cascadeTex, d.vec2i(dirStored * probes + probe), diff --git a/packages/typegpu-radiance-cascades/README.md b/packages/typegpu-radiance-cascades/README.md index 12cf297cd2..7cd06be0eb 100644 --- a/packages/typegpu-radiance-cascades/README.md +++ b/packages/typegpu-radiance-cascades/README.md @@ -26,12 +26,65 @@ const runner = createRadianceCascades({ runner.run(); ``` -## TypeGPU is created by Software Mansion +`run()` batches all cascade passes and the final radiance-field build into one +command buffer. Pass your own encoder to batch it with surrounding work: -[![swm](https://logo.swmansion.com/logo?color=white&variant=desktop&width=150&tag=typegpu-github 'Software Mansion')](https://swmansion.com) +```ts +const encoder = root.device.createCommandEncoder(); +runner.run(encoder); +root.device.queue.submit([encoder.finish()]); +``` + +## Scene contract + +The default marcher expects `sdf(uv)` to return a signed distance in +short-axis-normalized UV units: + +- positive outside geometry +- negative inside geometry +- zero on the blocking or emitting surface + +`color(uv)` should return linear radiance/emission. If the source texture is +sRGB-like, linearize it before returning from `color`. + +## Quality and memory options + +```ts +const runner = createRadianceCascades({ + root, + size: { width, height }, + sdfResolution, + sdf, + color, + + // Direction density in the base cascade. 2 matches the classic default. + baseStoredRayDim: 2, + + // hardware keeps the single filtered upper sample. bilinear-fix forks toward + // the four upper probes before merging. + mergeMode: 'bilinear-fix', + + // Two active 2D ping-pong cascade textures are used by default. Enable this + // only when you need to inspect every cascade layer after a run. + keepCascadeLayers: false, + + erodeBiasPx: 1, + epsPx: 0.25, + minStepPx: 0.125, + maxRaySteps: 64, + stepSafety: 1, + intervalOverlapPx: 0, +}); +``` + +Available merge modes are: + +- `'hardware'` +- `'bilinear-fix'` + +`intervalOverlapPx` may also be set to `'upperProbeSpacing'` to use each +layer's upper-probe spacing as the overlap amount. -Since 2012 [Software Mansion](https://swmansion.com) is a software agency with -experience in building web and mobile apps. We are Core React Native -Contributors and experts in dealing with all kinds of React Native issues. We -can help you build your next dream product – -[Hire us](https://swmansion.com/contact/projects?utm_source=typegpu&utm_medium=readme). +If you provide an output texture view, `runner.output` is that view. When the +runner owns or can prove it has a sampled texture, `runner.outputTexture` is +also available for creating sampled views. diff --git a/packages/typegpu-radiance-cascades/src/cascades.ts b/packages/typegpu-radiance-cascades/src/cascades.ts index d63217faaf..d08d6f1a1f 100644 --- a/packages/typegpu-radiance-cascades/src/cascades.ts +++ b/packages/typegpu-radiance-cascades/src/cascades.ts @@ -1,44 +1,141 @@ -import tgpu, { std, d } from 'typegpu'; +import tgpu, { d, std } from 'typegpu'; + +export const PREAVERAGE_RAY_DIM = 2; +export const PREAVERAGE_RAY_COUNT = PREAVERAGE_RAY_DIM ** 2; + +export const MERGE_MODE_HARDWARE = 0; +export const MERGE_MODE_BILINEAR_FIX = 1; + +const F32_MAX = 3.40282346e38; + +export type MergeMode = 'hardware' | 'bilinear-fix'; + +export type BaseStoredRayDim = 1 | 2 | 4; + +export type CascadeInfoOptions = { + baseStoredRayDim?: BaseStoredRayDim; + minCascades?: number; +}; + +export type CascadeLayerInfo = { + layer: number; + probes: [number, number]; + probesU: [number, number]; + validDim: [number, number]; + raysDimStored: number; + raysDimActual: number; + startUv: number; + endUv: number; +}; + +export type CascadeInfo = { + baseProbes: [number, number]; + cascadeDim: [number, number]; + cascadeCount: number; + baseStoredRayDim: BaseStoredRayDim; + interval0: number; + maxRange: number; + layers: CascadeLayerInfo[]; +}; + +const MIN_BASE_PROBES = 16; +const DEFAULT_MIN_CASCADES = 5; + +function assertPositiveSize(width: number, height: number) { + if (!(width > 0) || !(height > 0)) { + throw new Error('Radiance cascade size must be positive.'); + } +} + +function assertBaseStoredRayDim(value: number): asserts value is BaseStoredRayDim { + if (value !== 1 && value !== 2 && value !== 4) { + throw new Error('baseStoredRayDim must be 1, 2, or 4.'); + } +} -const ERODE_BIAS = 2; +function shrByPow2(value: number, shift: number) { + return Math.max(Math.floor(value / 2 ** shift), 1); +} + +export function getCascadeInfo( + width: number, + height: number, + options: CascadeInfoOptions = {}, +): CascadeInfo { + assertPositiveSize(width, height); + + const baseStoredRayDim = options.baseStoredRayDim ?? 2; + assertBaseStoredRayDim(baseStoredRayDim); -export function getCascadeDim(width: number, height: number) { const aspect = width / height; - const diagonal = Math.sqrt(width ** 2 + height ** 2); - - const minPow2 = 16; - const closestPowerOfTwo = Math.max(minPow2, 2 ** Math.floor(Math.log2(diagonal))); - - let cascadeWidth: number; - let cascadeHeight: number; - if (aspect >= 1) { - cascadeWidth = closestPowerOfTwo; - cascadeHeight = Math.max(minPow2, Math.round(closestPowerOfTwo / aspect)); - } else { - cascadeWidth = Math.max(minPow2, Math.round(closestPowerOfTwo * aspect)); - cascadeHeight = closestPowerOfTwo; - } + const diagonal = Math.hypot(width, height); + + const closestPowerOfTwo = Math.max(MIN_BASE_PROBES, 2 ** Math.floor(Math.log2(diagonal))); - const cascadeDimX = cascadeWidth * 2; - const cascadeDimY = cascadeHeight * 2; + const [baseProbesX, baseProbesY] = + aspect >= 1 + ? [closestPowerOfTwo, Math.max(MIN_BASE_PROBES, Math.round(closestPowerOfTwo / aspect))] + : [Math.max(MIN_BASE_PROBES, Math.round(closestPowerOfTwo * aspect)), closestPowerOfTwo]; - const interval = 1 / closestPowerOfTwo; - const maxIntervalStart = 2.0; + const baseProbesMin = Math.min(baseProbesX, baseProbesY); + const interval0 = 1 / baseProbesMin; + const maxRange = diagonal / Math.min(width, height); - const minCascades = 5; - const cascadeAmount = Math.max( - minCascades, - Math.ceil(Math.log2((maxIntervalStart * 3) / interval + 1) / 2), + const cascadeCount = Math.max( + options.minCascades ?? DEFAULT_MIN_CASCADES, + Math.ceil(Math.log2((maxRange * 3) / interval0 + 1) / 2), ); - return [cascadeDimX, cascadeDimY, cascadeAmount] as const; + const maxStoredRayDim = baseStoredRayDim * 2 ** (cascadeCount - 1); + const cascadeDimX = Math.max(baseProbesX * baseStoredRayDim, maxStoredRayDim); + const cascadeDimY = Math.max(baseProbesY * baseStoredRayDim, maxStoredRayDim); + + const layers = Array.from({ length: cascadeCount }, (_, layer): CascadeLayerInfo => { + const probesX = shrByPow2(baseProbesX, layer); + const probesY = shrByPow2(baseProbesY, layer); + const probesUX = shrByPow2(baseProbesX, layer + 1); + const probesUY = shrByPow2(baseProbesY, layer + 1); + const raysDimStored = baseStoredRayDim * 2 ** layer; + const raysDimActual = raysDimStored * PREAVERAGE_RAY_DIM; + const pow4 = 4 ** layer; + const startUv = (interval0 * (pow4 - 1)) / 3; + const endUv = startUv + interval0 * pow4; + + return { + layer, + probes: [probesX, probesY], + probesU: [probesUX, probesUY], + validDim: [ + Math.min(cascadeDimX, probesX * raysDimStored), + Math.min(cascadeDimY, probesY * raysDimStored), + ], + raysDimStored, + raysDimActual, + startUv, + endUv, + }; + }); + + return { + baseProbes: [baseProbesX, baseProbesY], + cascadeDim: [cascadeDimX, cascadeDimY], + cascadeCount, + baseStoredRayDim, + interval0, + maxRange, + layers, + }; +} + +export function getCascadeDim(width: number, height: number, options: CascadeInfoOptions = {}) { + const info = getCascadeInfo(width, height, options); + return [...info.cascadeDim, info.cascadeCount] as const; } export const sdfSlot = tgpu.slot<(uv: d.v2f) => number>(); export const colorSlot = tgpu.slot<(uv: d.v2f) => d.v3f>(); - -// Slot for SDF resolution to calculate proper texel-based eps/minStep (so we don't do redundant sub-texel steps) -export const sdfResolutionSlot = tgpu.slot(); +export const maxRayStepsAccess = tgpu.accessor(d.u32, 64); +export const rayMarchStepSafetyAccess = tgpu.accessor(d.f32, 1); export const RayMarchResult = d.struct({ color: d.vec3f, @@ -50,13 +147,9 @@ export const defaultRayMarch = tgpu.fn( RayMarchResult, )((probePos, rayDir, startT, endT, eps, minStep, bias) => { 'use gpu'; - let rgb = d.vec3f(); - let T = d.f32(1); let t = startT; - let hitPos = d.vec2f(); - let didHit = false; - for (let step = 0; step < 64; step++) { + for (let step = d.u32(); step < maxRayStepsAccess.$; step++) { if (t > endT) { break; } @@ -65,179 +158,416 @@ export const defaultRayMarch = tgpu.fn( break; } - const dist = std.max(sdfSlot.$(pos) + bias, 0); - if (dist <= eps) { - hitPos = d.vec2f(pos); - didHit = true; - T = 0; - break; + const signedDist = sdfSlot.$(pos); + const hitDist = signedDist + bias; + if (hitDist <= eps) { + return RayMarchResult({ color: colorSlot.$(pos), transmittance: 0 }); } - t += std.max(dist, minStep); + t += std.max(std.max(signedDist, 0) * rayMarchStepSafetyAccess.$, minStep); } - if (didHit) { - rgb = colorSlot.$(hitPos); - } - - return RayMarchResult({ color: rgb, transmittance: T }); + return RayMarchResult({ color: d.vec3f(), transmittance: 1 }); }); export const rayMarchSlot = tgpu.slot(defaultRayMarch); -export const CascadeStaticParams = d.struct({ - baseProbes: d.vec2u, - cascadeDim: d.vec2u, - cascadeCount: d.u32, +export const defaultTraceSegment = tgpu.fn( + [d.vec2f, d.vec2f, d.f32, d.f32, d.f32, d.f32], + RayMarchResult, +)((p0, p1, aspect, eps, minStep, bias) => { + 'use gpu'; + const delta = p1 - p0; + const metricDelta = std.select( + d.vec2f(delta.x, delta.y / aspect), + d.vec2f(delta.x * aspect, delta.y), + aspect >= 1, + ); + const endT = std.length(metricDelta); + + if (endT <= 0) { + return RayMarchResult({ color: d.vec3f(), transmittance: 1 }); + } + + return rayMarchSlot.$(p0, delta / endT, 0, endT, eps, minStep, bias); +}); + +export const traceSegmentSlot = tgpu.slot(defaultTraceSegment); + +export const CascadeLayerParams = d.struct({ + layer: d.u32, + probes: d.vec2u, + probesU: d.vec2u, + validDim: d.vec2u, + raysDimStored: d.u32, + raysDimActual: d.u32, + startUv: d.f32, + endUv: d.f32, + intervalOverlapUv: d.f32, }); export const cascadePassBGL = tgpu.bindGroupLayout({ - staticParams: { uniform: CascadeStaticParams }, - layer: { uniform: d.u32 }, + layerParams: { uniform: CascadeLayerParams }, upper: { texture: d.texture2d() }, upperSampler: { sampler: 'filtering' }, dst: { storageTexture: d.textureStorage2d('rgba16float') }, }); -export const cascadePassCompute = tgpu.computeFn({ - workgroupSize: [8, 8], - in: { gid: d.builtin.globalInvocationId }, -})(({ gid }) => { +export type CascadePassSpecialization = { + hasUpperCascade: boolean; + mergeModeId: number; + renderAspect: number; + epsUv: number; + minStepUv: number; + hitBiasUv: number; +}; + +export type BuildRadianceFieldSpecialization = { + baseStoredRayDim: BaseStoredRayDim; + cascadeProbes: [number, number]; +}; + +const rayBoxExitUv = tgpu.fn( + [d.vec2f, d.vec2f], + d.f32, +)((p, dir) => { 'use gpu'; - const dim2 = std.textureDimensions(cascadePassBGL.$.dst); - if (gid.x >= dim2.x || gid.y >= dim2.y) { - return; + let tx = d.f32(F32_MAX); + let ty = d.f32(F32_MAX); + + if (std.abs(dir.x) > 1e-6) { + tx = std.select(-p.x / dir.x, (1 - p.x) / dir.x, dir.x > 0); + } + + if (std.abs(dir.y) > 1e-6) { + ty = std.select(-p.y / dir.y, (1 - p.y) / dir.y, dir.y > 0); } - const params = cascadePassBGL.$.staticParams; - const layer = cascadePassBGL.$.layer; - const probes = std.max( - d.vec2u(params.baseProbes.x >> layer, params.baseProbes.y >> layer), - d.vec2u(1, 1), + return std.max(0, std.min(tx, ty)); +}); + +const part1By1 = tgpu.fn( + [d.u32], + d.u32, +)((v) => { + 'use gpu'; + const x0 = v & 0x0000ffff; + const x1 = (x0 | (x0 << 8)) & 0x00ff00ff; + const x2 = (x1 | (x1 << 4)) & 0x0f0f0f0f; + const x3 = (x2 | (x2 << 2)) & 0x33333333; + return (x3 | (x3 << 1)) & 0x55555555; +}); + +const morton2D = tgpu.fn( + [d.u32, d.u32], + d.u32, +)((x, y) => { + 'use gpu'; + return part1By1(x) | (part1By1(y) << 1); +}); + +const traceHardwareMergeRay = ( + dim2: d.v2u, + probePos: d.v2f, + rayDir: d.v2f, + dirActual: d.v2u, + probesU: d.v2u, + startUv: number, + clippedMarchEndUv: number, + marchEndUv: number, + exitUv: number, + eps: number, + minStep: number, + biasUv: number, +) => { + 'use gpu'; + const marchResult = rayMarchSlot.$( + probePos, + rayDir, + startUv, + clippedMarchEndUv, + eps, + minStep, + biasUv, ); - const dirStored = gid.xy / probes; - const probe = gid.xy % probes; - const raysDimStored = d.u32(2) << layer; - const raysDimActual = raysDimStored * 2; - const rayCountActual = d.f32(raysDimActual) ** 2; + if (marchResult.transmittance > 0.01 && exitUv > marchEndUv) { + const tileOrigin = d.vec2f(dirActual * probesU); + const probePixel = std.clamp(probePos * d.vec2f(probesU), d.vec2f(0.5), d.vec2f(probesU) - 0.5); + const uvU = (tileOrigin + probePixel) / d.vec2f(dim2); - if (dirStored.x >= raysDimStored || dirStored.y >= raysDimStored) { - std.textureStore(cascadePassBGL.$.dst, gid.xy, d.vec4f(0, 0, 0, 1)); - return; + const upper = std.textureSampleLevel( + cascadePassBGL.$.upper, + cascadePassBGL.$.upperSampler, + uvU, + 0, + ); + return d.vec4f( + marchResult.color + upper.xyz * marchResult.transmittance, + marchResult.transmittance * upper.w, + ); + } + + return d.vec4f(marchResult.color, marchResult.transmittance); +}; + +const bilinearWeight = (forkOffset: d.v2u, bilinear: d.v2f) => { + 'use gpu'; + const weightX = std.select(bilinear.x, 1 - bilinear.x, forkOffset.x === 0); + const weightY = std.select(bilinear.y, 1 - bilinear.y, forkOffset.y === 0); + return weightX * weightY; +}; + +const traceBilinearFork = ( + tileOriginU: d.v2u, + upperProbe: d.v2u, + probesU: d.v2u, + probePos: d.v2f, + rayDir: d.v2f, + startUv: number, + clippedMarchEndUv: number, + marchEndUv: number, + exitUv: number, + aspect: number, + eps: number, + minStep: number, + biasUv: number, +) => { + 'use gpu'; + const upperProbePos = (d.vec2f(upperProbe) + 0.5) / d.vec2f(probesU); + const near = traceSegmentSlot.$( + probePos + rayDir * startUv, + upperProbePos + rayDir * clippedMarchEndUv, + aspect, + eps, + minStep, + biasUv, + ); + + if (near.transmittance > 0.01 && exitUv > marchEndUv) { + const upper = std.textureLoad(cascadePassBGL.$.upper, d.vec2i(tileOriginU + upperProbe), 0); + return d.vec4f(near.color + upper.xyz * near.transmittance, near.transmittance * upper.w); } - // const probePos = d.vec2f(probe).add(0.5).div(d.vec2f(probes)); - const probePos = (d.vec2f(probe) + 0.5) / d.vec2f(probes); - const aspect = params.baseProbes.x / params.baseProbes.y; - const cascadeProbesMinVal = d.f32(std.min(params.baseProbes.x, params.baseProbes.y)); - const interval0 = 1 / cascadeProbesMinVal; - const pow4 = d.f32(d.u32(1) << (layer * 2)); - const startUv = (interval0 * (pow4 - 1)) / 3; - const endUv = startUv + interval0 * pow4; - - const sdfDim = sdfResolutionSlot.$; - const texelSizeMin = 1.0 / d.f32(std.max(std.min(sdfDim.x, sdfDim.y), 1)); - // Use texel size as minimum threshold to avoid sub-texel stepping - const eps = std.max(texelSizeMin, 0.25 / cascadeProbesMinVal); - const minStep = std.max(texelSizeMin * 0.5, 0.125 / cascadeProbesMinVal); - const biasUv = d.f32(ERODE_BIAS) / cascadeProbesMinVal; - - let accum = d.vec4f(); - - for (let i = 0; i < 4; i++) { - const dirActual = dirStored * 2 + d.vec2u(i & 1, i >> 1); - const rayIndex = d.f32(dirActual.y * raysDimActual + dirActual.x) + 0.5; + return d.vec4f(near.color, near.transmittance); +}; + +const traceBilinearFixMergeRay = ( + probePos: d.v2f, + rayDir: d.v2f, + dirActual: d.v2u, + probesU: d.v2u, + startUv: number, + clippedMarchEndUv: number, + marchEndUv: number, + exitUv: number, + aspect: number, + eps: number, + minStep: number, + biasUv: number, +) => { + 'use gpu'; + const tileOriginU = dirActual * probesU; + const samplePos = std.clamp(probePos * d.vec2f(probesU) - 0.5, d.vec2f(0), d.vec2f(probesU) - 1); + const upperBaseProbe = d.vec2u(std.floor(samplePos)); + const bilinear = samplePos - d.vec2f(upperBaseProbe); + + let forkAccum = d.vec4f(); + + for (const fork of tgpu.unroll(std.range(PREAVERAGE_RAY_COUNT))) { + const forkOffset = d.vec2u(fork & 1, fork >> 1); + const upperProbe = std.min(upperBaseProbe + forkOffset, probesU - 1); + const weight = bilinearWeight(forkOffset, bilinear); + + if (weight > 0) { + forkAccum += + traceBilinearFork( + tileOriginU, + upperProbe, + probesU, + probePos, + rayDir, + startUv, + clippedMarchEndUv, + marchEndUv, + exitUv, + aspect, + eps, + minStep, + biasUv, + ) * weight; + } + } + + return forkAccum; +}; + +export function makeCascadePassCompute({ + hasUpperCascade, + mergeModeId, + renderAspect, + epsUv, + minStepUv, + hitBiasUv, +}: CascadePassSpecialization) { + const rayDirection = (rayIndex: number, rayCountActual: number) => { + 'use gpu'; const angle = (rayIndex / rayCountActual) * (Math.PI * 2) - Math.PI; const cosA = std.cos(angle); const sinA = -std.sin(angle); - let rayDir = d.vec2f(cosA, sinA); - if (aspect >= 1) { - rayDir = d.vec2f(cosA / aspect, sinA); + let dir = d.vec2f(); + + if (renderAspect >= 1) { + dir = d.vec2f(cosA / renderAspect, sinA); } else { - rayDir = d.vec2f(cosA, sinA * aspect); + dir = d.vec2f(cosA, sinA * renderAspect); } - const marchResult = rayMarchSlot.$(probePos, rayDir, startUv, endUv, eps, minStep, biasUv); - let rgb = d.vec3f(marchResult.color); - let T = d.f32(marchResult.transmittance); - - if (layer < params.cascadeCount - 1 && T > 0.01) { - const probesU = std.max(d.vec2u(probes.x >> 1, probes.y >> 1), d.vec2u(1)); - const tileOrigin = d.vec2f(dirActual) * d.vec2f(probesU); - const probePixel = std.clamp( - probePos * d.vec2f(probesU), - d.vec2f(0.5), - d.vec2f(probesU) - 0.5, - ); - const uvU = (tileOrigin + probePixel) / d.vec2f(dim2); - - const upper = std.textureSampleLevel( - cascadePassBGL.$.upper, - cascadePassBGL.$.upperSampler, - uvU, - 0, - ); - rgb = rgb + upper.xyz * T; - T *= upper.w; + return dir; + }; + + return tgpu.computeFn({ + workgroupSize: [8, 8], + in: { gid: d.builtin.globalInvocationId }, + })(({ gid }) => { + 'use gpu'; + const dim2 = std.textureDimensions(cascadePassBGL.$.dst); + if (gid.x >= dim2.x || gid.y >= dim2.y) { + return; } - accum += d.vec4f(rgb, T); - } + const layerParams = cascadePassBGL.$.layerParams; + const probes = layerParams.probes; + const raysDimActual = layerParams.raysDimActual; - std.textureStore(cascadePassBGL.$.dst, gid.xy, accum * 0.25); -}); + if (gid.x >= layerParams.validDim.x || gid.y >= layerParams.validDim.y) { + std.textureStore(cascadePassBGL.$.dst, gid.xy, d.vec4f(0, 0, 0, 1)); + return; + } -export const BuildRadianceFieldParams = d.struct({ - outputProbes: d.vec2u, - cascadeProbes: d.vec2u, -}); + const dirStored = std.div(gid.xy, probes); + const probe = gid.xy % probes; + const rayCountActual = d.f32(raysDimActual) ** 2; + const probePos = (d.vec2f(probe) + 0.5) / d.vec2f(probes); + const aspect = d.f32(renderAspect); + const eps = d.f32(epsUv); + const minStep = d.f32(minStepUv); + const biasUv = d.f32(hitBiasUv); + const startUv = layerParams.startUv; + const endUv = layerParams.endUv; + const marchEndUv = endUv + layerParams.intervalOverlapUv; + + let accum = d.vec4f(); + + for (const i of tgpu.unroll(std.range(PREAVERAGE_RAY_COUNT))) { + const dirActual = dirStored * PREAVERAGE_RAY_DIM + d.vec2u(i & 1, i >> 1); + const rayIndexU = morton2D(dirActual.x, dirActual.y); + const rayIndex = d.f32(rayIndexU) + 0.5; + const rayDir = rayDirection(rayIndex, rayCountActual); + const exitUv = rayBoxExitUv(probePos, rayDir); + const clippedMarchEndUv = std.min(marchEndUv, exitUv); + + if (exitUv <= startUv) { + accum += d.vec4f(0, 0, 0, 1); + } else if (hasUpperCascade) { + const probesU = layerParams.probesU; + + if (mergeModeId === MERGE_MODE_HARDWARE) { + accum += traceHardwareMergeRay( + dim2, + probePos, + rayDir, + dirActual, + probesU, + startUv, + clippedMarchEndUv, + marchEndUv, + exitUv, + eps, + minStep, + biasUv, + ); + } else { + accum += traceBilinearFixMergeRay( + probePos, + rayDir, + dirActual, + probesU, + startUv, + clippedMarchEndUv, + marchEndUv, + exitUv, + aspect, + eps, + minStep, + biasUv, + ); + } + } else { + const ray = rayMarchSlot.$( + probePos, + rayDir, + startUv, + clippedMarchEndUv, + eps, + minStep, + biasUv, + ); + accum += d.vec4f(ray.color, ray.transmittance); + } + } + + std.textureStore(cascadePassBGL.$.dst, gid.xy, accum / d.f32(PREAVERAGE_RAY_COUNT)); + }); +} export const buildRadianceFieldBGL = tgpu.bindGroupLayout({ - params: { uniform: BuildRadianceFieldParams }, src: { texture: d.texture2d() }, srcSampler: { sampler: 'filtering' }, dst: { storageTexture: d.textureStorage2d('rgba16float') }, }); -export const buildRadianceFieldCompute = tgpu.computeFn({ - workgroupSize: [8, 8], - in: { gid: d.builtin.globalInvocationId }, -})(({ gid }) => { - 'use gpu'; - const dim2 = std.textureDimensions(buildRadianceFieldBGL.$.dst); - if (gid.x >= dim2.x || gid.y >= dim2.y) { - return; - } - - const params = buildRadianceFieldBGL.$.params; - const cascadeDim = params.cascadeProbes * 2; +export function makeBuildRadianceFieldCompute({ + baseStoredRayDim, + cascadeProbes, +}: BuildRadianceFieldSpecialization) { + const [cascadeProbesX, cascadeProbesY] = cascadeProbes; + const storedRayCount = baseStoredRayDim * baseStoredRayDim; + const rayMask = baseStoredRayDim - 1; + const rayShift = baseStoredRayDim >> 1; + + return tgpu.computeFn({ + workgroupSize: [8, 8], + in: { gid: d.builtin.globalInvocationId }, + })(({ gid }) => { + 'use gpu'; + const dstDim = std.textureDimensions(buildRadianceFieldBGL.$.dst); + if (gid.x >= dstDim.x || gid.y >= dstDim.y) { + return; + } - const invCascadeDim = 1 / d.vec2f(cascadeDim); - const uv = (d.vec2f(gid.xy) + 0.5) / d.vec2f(params.outputProbes); + const srcDim = std.textureDimensions(buildRadianceFieldBGL.$.src); + const cascadeProbeDim = d.vec2f(cascadeProbesX, cascadeProbesY); + const invSrcDim = 1 / d.vec2f(srcDim); + const uv = (d.vec2f(gid.xy) + 0.5) / d.vec2f(dstDim); - const probePixel = std.clamp( - uv * d.vec2f(params.cascadeProbes), - d.vec2f(0.5), - d.vec2f(params.cascadeProbes) - 0.5, - ); + const probePixel = std.clamp(uv * cascadeProbeDim, d.vec2f(0.5), cascadeProbeDim - 0.5); - const uvStride = d.vec2f(params.cascadeProbes) * invCascadeDim; - const baseSampleUV = probePixel * invCascadeDim; + const uvStride = cascadeProbeDim * invSrcDim; + const baseSampleUV = probePixel * invSrcDim; - let sum = d.vec3f(); - for (let i = d.u32(0); i < 4; i++) { - const offset = d.vec2f(i & 1, i >> 1) * uvStride; - const sample = std.textureSampleLevel( - buildRadianceFieldBGL.$.src, - buildRadianceFieldBGL.$.srcSampler, - baseSampleUV + offset, - 0, - ); - sum = sum + sample.xyz; - } + let sum = d.vec3f(); - const avg = sum * 0.25; - const res = d.vec3f(avg); + for (const i of tgpu.unroll(std.range(storedRayCount))) { + const offset = d.vec2f(i & rayMask, i >> rayShift) * uvStride; + const sample = std.textureSampleLevel( + buildRadianceFieldBGL.$.src, + buildRadianceFieldBGL.$.srcSampler, + baseSampleUV + offset, + 0, + ); + sum += sample.xyz; + } - std.textureStore(buildRadianceFieldBGL.$.dst, gid.xy, d.vec4f(res, 1)); -}); + std.textureStore(buildRadianceFieldBGL.$.dst, gid.xy, d.vec4f(sum / d.f32(storedRayCount), 1)); + }); +} diff --git a/packages/typegpu-radiance-cascades/src/index.ts b/packages/typegpu-radiance-cascades/src/index.ts index 63100d7437..ea78cc2395 100644 --- a/packages/typegpu-radiance-cascades/src/index.ts +++ b/packages/typegpu-radiance-cascades/src/index.ts @@ -1,11 +1,22 @@ export { createRadianceCascades } from './runner.ts'; -export type { RadianceCascadesExecutor } from './runner.ts'; +export type { OwnedRadianceCascadesExecutor, RadianceCascadesExecutor } from './runner.ts'; export { colorSlot, + defaultTraceSegment, defaultRayMarch, getCascadeDim, + getCascadeInfo, + maxRayStepsAccess, + rayMarchStepSafetyAccess, RayMarchResult, rayMarchSlot, - sdfResolutionSlot, sdfSlot, + traceSegmentSlot, +} from './cascades.ts'; +export type { + BaseStoredRayDim, + CascadeInfo, + CascadeInfoOptions, + CascadeLayerInfo, + MergeMode, } from './cascades.ts'; diff --git a/packages/typegpu-radiance-cascades/src/runner.ts b/packages/typegpu-radiance-cascades/src/runner.ts index 466f3575e1..40ec7374f4 100644 --- a/packages/typegpu-radiance-cascades/src/runner.ts +++ b/packages/typegpu-radiance-cascades/src/runner.ts @@ -1,6 +1,7 @@ import { d, isTexture, + isTextureView, type SampledFlag, type StorageFlag, type TgpuBindGroup, @@ -9,36 +10,47 @@ import { type TgpuTextureView, } from 'typegpu'; import { + type BaseStoredRayDim, buildRadianceFieldBGL, - buildRadianceFieldCompute, - BuildRadianceFieldParams, cascadePassBGL, - cascadePassCompute, - CascadeStaticParams, + CascadeLayerParams, colorSlot, defaultRayMarch, - getCascadeDim, + defaultTraceSegment, + getCascadeInfo, + makeBuildRadianceFieldCompute, + makeCascadePassCompute, + MERGE_MODE_BILINEAR_FIX, + MERGE_MODE_HARDWARE, + maxRayStepsAccess, + type MergeMode, + rayMarchStepSafetyAccess, type RayMarchResult, rayMarchSlot, - sdfResolutionSlot, sdfSlot, + traceSegmentSlot, } from './cascades.ts'; -type OutputTexture = TgpuTexture<{ - size: [number, number]; +type RadianceTexture2D = TgpuTexture<{ size: [number, number]; format: 'rgba16float' }>; +type RadianceTextureArray = TgpuTexture<{ + size: [number, number, number]; format: 'rgba16float'; -}> & - StorageFlag & - SampledFlag; -type OutputTextureView = TgpuTextureView>; -type OutputResource = OutputTexture | OutputTextureView; -type Size = { width: number; height: number }; - -type CascadesOptions = { +}>; +type RadianceStorageView = TgpuTextureView>; + +type OutputTexture = (RadianceTexture2D & StorageFlag) | RadianceStorageView; + +type CascadeTexture2D = RadianceTexture2D & StorageFlag & SampledFlag; +type CascadeTextureArray = RadianceTextureArray & StorageFlag & SampledFlag; + +type CascadeTexture = CascadeTexture2D | CascadeTextureArray; +type OutputSize = { width: number; height: number }; + +type CascadesOptions = { root: TgpuRoot; sdf: (uv: d.v2f) => number; color: (uv: d.v2f) => d.v3f; - sdfResolution: Size; + sdfResolution: { width: number; height: number }; rayMarch?: ( probePos: d.v2f, rayDir: d.v2f, @@ -48,40 +60,169 @@ type CascadesOptions d.InferGPU; - output?: TOutput; - size?: Size; + traceSegment?: ( + p0: d.v2f, + p1: d.v2f, + aspect: number, + eps: number, + minStep: number, + bias: number, + ) => d.InferGPU; + output?: OutputTexture; + size?: OutputSize; + renderAspect?: number; + erodeBiasPx?: number; + epsPx?: number; + minStepPx?: number; + maxRaySteps?: number; + stepSafety?: number; + intervalOverlapPx?: number | 'upperProbeSpacing'; + baseStoredRayDim?: BaseStoredRayDim; + mergeMode?: MergeMode; + keepCascadeLayers?: boolean; }; -export type RadianceCascadesExecutor = { - run(): void; +export type RadianceCascadesExecutor = { + run(commandEncoder?: GPUCommandEncoder): void; with(bindGroup: TgpuBindGroup): RadianceCascadesExecutor; destroy(): void; readonly output: TOutput; + readonly outputTexture: CascadeTexture2D | undefined; + readonly ownsOutput: boolean; +}; + +export type OwnedRadianceCascadesExecutor = RadianceCascadesExecutor & { + readonly outputTexture: CascadeTexture2D; + readonly ownsOutput: true; }; +function assertPositiveOption(name: string, value: number) { + if (!(value > 0)) { + throw new Error(`${name} must be positive.`); + } +} + +function assertNonNegativeOption(name: string, value: number) { + if (value < 0) { + throw new Error(`${name} must be non-negative.`); + } +} + +function getOutputSize(output: OutputTexture | undefined, size: OutputSize | undefined) { + if (output === undefined) { + if (!size) { + throw new Error('Size is required when output texture is not provided.'); + } + return [size.width, size.height] as const; + } + + if (isTexture(output)) { + return output.props.size; + } + + const [width, height] = output.size ?? [size?.width, size?.height]; + if (!width || !height) { + throw new Error('Size could not be inferred from texture view, pass explicit size in options.'); + } + return [width, height] as const; +} + +function createCascadeTexture( + root: TgpuRoot, + cascadeDimX: number, + cascadeDimY: number, + cascadeCount: number, + keepCascadeLayers: boolean, +): CascadeTexture { + if (keepCascadeLayers) { + return root + .createTexture({ + size: [cascadeDimX, cascadeDimY, cascadeCount], + format: 'rgba16float', + }) + .$usage('storage', 'sampled'); + } + + return root + .createTexture({ + size: [cascadeDimX, cascadeDimY], + format: 'rgba16float', + }) + .$usage('storage', 'sampled'); +} + +function createCascadeSampleView( + texture: CascadeTexture, + layer: number, + keepCascadeLayers: boolean, +) { + if (keepCascadeLayers) { + return (texture as CascadeTextureArray).createView(d.texture2d(d.f32), { + baseArrayLayer: layer, + arrayLayerCount: 1, + }); + } + + return (texture as CascadeTexture2D).createView(d.texture2d(d.f32)); +} + +function createCascadeStorageView( + texture: CascadeTexture, + layer: number, + keepCascadeLayers: boolean, +) { + if (keepCascadeLayers) { + return (texture as CascadeTextureArray).createView(d.textureStorage2d('rgba16float'), { + baseArrayLayer: layer, + arrayLayerCount: 1, + }); + } + + return (texture as CascadeTexture2D).createView(d.textureStorage2d('rgba16float')); +} + export function createRadianceCascades( - options: CascadesOptions & { size: Size }, -): RadianceCascadesExecutor; -export function createRadianceCascades( - options: CascadesOptions & { output: TOutput }, + options: CascadesOptions & { output?: undefined; size: OutputSize }, +): OwnedRadianceCascadesExecutor; +export function createRadianceCascades( + options: CascadesOptions & { output: TOutput }, ): RadianceCascadesExecutor; -export function createRadianceCascades( - options: CascadesOptions, -): RadianceCascadesExecutor { - const { root, sdf, color, sdfResolution, output, size, rayMarch } = options; - - const outputSize = output - ? isTexture(output) - ? output.props.size - : (output.size ?? (size && [size.width, size.height])) - : size && [size.width, size.height]; - const outputWidth = outputSize?.[0]; - const outputHeight = outputSize?.[1]; - if (!outputWidth || !outputHeight) { - throw new Error('Size could not be inferred from output, pass explicit size in options.'); +export function createRadianceCascades(options: CascadesOptions): RadianceCascadesExecutor { + const { root, sdf, color, sdfResolution, output, size, rayMarch, traceSegment } = options; + + if (output !== undefined && !isTexture(output) && !isTextureView(output)) { + throw new Error('output must be a TypeGPU texture or texture view.'); + } + + const [outputWidth, outputHeight] = getOutputSize(output, size); + + if (!(sdfResolution.width > 0) || !(sdfResolution.height > 0)) { + throw new Error('sdfResolution must be positive.'); + } + + const mergeModeId = + options.mergeMode === 'bilinear-fix' ? MERGE_MODE_BILINEAR_FIX : MERGE_MODE_HARDWARE; + const keepCascadeLayers = options.keepCascadeLayers ?? false; + const baseStoredRayDim = options.baseStoredRayDim ?? 2; + const renderAspect = options.renderAspect ?? outputWidth / outputHeight; + const erodeBiasPx = options.erodeBiasPx ?? 1; + const epsPx = options.epsPx ?? 0.25; + const minStepPx = options.minStepPx ?? 0.125; + const maxRaySteps = Math.floor(options.maxRaySteps ?? 64); + const stepSafety = options.stepSafety ?? 1; + const intervalOverlapPx = options.intervalOverlapPx ?? 0; + + assertPositiveOption('renderAspect', renderAspect); + assertNonNegativeOption('erodeBiasPx', erodeBiasPx); + assertNonNegativeOption('epsPx', epsPx); + assertNonNegativeOption('minStepPx', minStepPx); + assertPositiveOption('maxRaySteps', maxRaySteps); + assertPositiveOption('stepSafety', stepSafety); + if (typeof intervalOverlapPx === 'number') { + assertNonNegativeOption('intervalOverlapPx', intervalOverlapPx); } - const dst: OutputResource = + const dst = output ?? root .createTexture({ @@ -90,127 +231,183 @@ export function createRadianceCascades( }) .$usage('storage', 'sampled'); - const [cascadeDimX, cascadeDimY, cascadeAmount] = getCascadeDim(outputWidth, outputHeight); + const ownsOutput = output === undefined; - const cascadeProbesX = cascadeDimX / 2; - const cascadeProbesY = cascadeDimY / 2; + const { + baseProbes: [cascadeProbesX, cascadeProbesY], + cascadeDim: [cascadeDimX, cascadeDimY], + cascadeCount, + layers, + } = getCascadeInfo(outputWidth, outputHeight, { baseStoredRayDim }); + const cascadeProbesMin = Math.min(cascadeProbesX, cascadeProbesY); + const sdfTexelSizeMin = 1 / Math.max(Math.min(sdfResolution.width, sdfResolution.height), 1); + const epsUv = Math.max(sdfTexelSizeMin, epsPx / cascadeProbesMin); + const minStepUv = Math.max(sdfTexelSizeMin * 0.5, minStepPx / cascadeProbesMin); + const hitBiasUv = erodeBiasPx / cascadeProbesMin; - const cascadeTextureA = root - .createTexture({ - size: [cascadeDimX, cascadeDimY, cascadeAmount], - format: 'rgba16float', - }) - .$usage('storage', 'sampled'); + const cascadeTextureA = createCascadeTexture( + root, + cascadeDimX, + cascadeDimY, + cascadeCount, + keepCascadeLayers, + ); - const cascadeTextureB = root - .createTexture({ - size: [cascadeDimX, cascadeDimY, cascadeAmount], - format: 'rgba16float', - }) - .$usage('storage', 'sampled'); + const cascadeTextureB = createCascadeTexture( + root, + cascadeDimX, + cascadeDimY, + cascadeCount, + keepCascadeLayers, + ); const cascadeSampler = root.createSampler({ magFilter: 'linear', minFilter: 'linear', }); - const staticParamsBuffer = root - .createBuffer(CascadeStaticParams, { - baseProbes: [cascadeProbesX, cascadeProbesY], - cascadeDim: [cascadeDimX, cascadeDimY], - cascadeCount: cascadeAmount, - }) - .$usage('uniform'); + const cascadePasses = layers.map((layerInfo) => { + const { layer, validDim } = layerInfo; + const isTopCascade = layer === cascadeCount - 1; + const intervalOverlapUv = isTopCascade + ? 0 + : typeof intervalOverlapPx === 'number' + ? intervalOverlapPx / cascadeProbesMin + : 1 / Math.min(layerInfo.probesU[0], layerInfo.probesU[1]); + const writeToA = (cascadeCount - 1 - layer) % 2 === 0; + const dstTexture = writeToA ? cascadeTextureA : cascadeTextureB; + const srcTexture = writeToA ? cascadeTextureB : cascadeTextureA; + const layerParams = root + .createBuffer(CascadeLayerParams, { + layer: layerInfo.layer, + probes: layerInfo.probes, + probesU: layerInfo.probesU, + validDim: layerInfo.validDim, + raysDimStored: layerInfo.raysDimStored, + raysDimActual: layerInfo.raysDimActual, + startUv: layerInfo.startUv, + endUv: layerInfo.endUv, + intervalOverlapUv, + }) + .$usage('uniform'); - const layerBuffer = root.createBuffer(d.u32).$usage('uniform'); + return { + layerParams, + bindGroup: root.createBindGroup(cascadePassBGL, { + layerParams, + upper: createCascadeSampleView( + srcTexture, + Math.min(layer + 1, cascadeCount - 1), + keepCascadeLayers, + ), + upperSampler: cascadeSampler, + dst: createCascadeStorageView(dstTexture, layer, keepCascadeLayers), + }), + workgroups: [Math.ceil(validDim[0] / 8), Math.ceil(validDim[1] / 8)] as const, + isTopCascade, + }; + }); - const cascadePassPipeline = root - .with(sdfResolutionSlot, d.vec2u(sdfResolution.width, sdfResolution.height)) + const cascadePipelineBase = root .with(sdfSlot, sdf) .with(colorSlot, color) + .with(maxRayStepsAccess, maxRaySteps) + .with(rayMarchStepSafetyAccess, stepSafety) .with(rayMarchSlot, rayMarch ?? defaultRayMarch) - .createComputePipeline({ compute: cascadePassCompute }); + .with(traceSegmentSlot, traceSegment ?? defaultTraceSegment); - const cascadePassBindGroups = Array.from({ length: cascadeAmount }, (_, layer) => { - const writeToA = (cascadeAmount - 1 - layer) % 2 === 0; - const dstTexture = writeToA ? cascadeTextureA : cascadeTextureB; - const srcTexture = writeToA ? cascadeTextureB : cascadeTextureA; + const cascadePassSpecialization = { + mergeModeId, + renderAspect, + epsUv, + minStepUv, + hitBiasUv, + }; - return root.createBindGroup(cascadePassBGL, { - staticParams: staticParamsBuffer, - layer: layerBuffer, - upper: srcTexture.createView(d.texture2d(d.f32), { - baseArrayLayer: Math.min(layer + 1, cascadeAmount - 1), - arrayLayerCount: 1, - }), - upperSampler: cascadeSampler, - dst: dstTexture.createView(d.textureStorage2d('rgba16float'), { - baseArrayLayer: layer, - arrayLayerCount: 1, - }), - }); + const topCascadePipeline = cascadePipelineBase.createComputePipeline({ + compute: makeCascadePassCompute({ + ...cascadePassSpecialization, + hasUpperCascade: false, + }), }); - const buildRadianceFieldPipeline = root.createComputePipeline({ - compute: buildRadianceFieldCompute, + const mergeCascadePipeline = cascadePipelineBase.createComputePipeline({ + compute: makeCascadePassCompute({ + ...cascadePassSpecialization, + hasUpperCascade: true, + }), }); - const radianceFieldParamsBuffer = root - .createBuffer(BuildRadianceFieldParams, { - outputProbes: [outputWidth, outputHeight], + const buildRadianceFieldPipeline = root.createComputePipeline({ + compute: makeBuildRadianceFieldCompute({ + baseStoredRayDim, cascadeProbes: [cascadeProbesX, cascadeProbesY], - }) - .$usage('uniform'); + }), + }); - const cascade0InA = (cascadeAmount - 1) % 2 === 0; + const cascade0InA = (cascadeCount - 1) % 2 === 0; const srcCascadeTexture = cascade0InA ? cascadeTextureA : cascadeTextureB; const buildRadianceFieldBG = root.createBindGroup(buildRadianceFieldBGL, { - params: radianceFieldParamsBuffer, - src: srcCascadeTexture.createView(d.texture2d(d.f32), { - baseArrayLayer: 0, - arrayLayerCount: 1, - }), + src: createCascadeSampleView(srcCascadeTexture, 0, keepCascadeLayers), srcSampler: cascadeSampler, dst, }); - const cascadeWorkgroupsX = Math.ceil(cascadeDimX / 8); - const cascadeWorkgroupsY = Math.ceil(cascadeDimY / 8); const outputWorkgroupsX = Math.ceil(outputWidth / 8); const outputWorkgroupsY = Math.ceil(outputHeight / 8); + const outputTexture = + isTexture(dst) && dst.usableAsSampled ? (dst as CascadeTexture2D) : undefined; + + let destroyed = false; function destroy() { + if (destroyed) { + return; + } + destroyed = true; + cascadeTextureA.destroy(); cascadeTextureB.destroy(); - if (!output && isTexture(dst)) { + for (const { layerParams } of cascadePasses) { + layerParams.destroy(); + } + if (ownsOutput && isTexture(dst)) { dst.destroy(); } } - function createExecutor( - additionalBindGroups: TgpuBindGroup[] = [], - ): RadianceCascadesExecutor { - const prebuiltCascadePipelines = cascadePassBindGroups.map((bg) => { - let p = cascadePassPipeline.with(bg); - for (const addBg of additionalBindGroups) { - p = p.with(addBg); - } - return p; - }); + function createExecutor(additionalBindGroups: TgpuBindGroup[] = []): RadianceCascadesExecutor { + const prebuiltCascadePasses = cascadePasses + .map(({ bindGroup, workgroups, isTopCascade }) => { + const cascadePassPipeline = isTopCascade ? topCascadePipeline : mergeCascadePipeline; + let pipeline = cascadePassPipeline.with(bindGroup); + for (const addBg of additionalBindGroups) { + pipeline = pipeline.with(addBg); + } + return { pipeline, workgroups }; + }) + .toReversed(); let prebuiltRadiancePipeline = buildRadianceFieldPipeline.with(buildRadianceFieldBG); for (const bg of additionalBindGroups) { prebuiltRadiancePipeline = prebuiltRadiancePipeline.with(bg); } - function run() { - for (let layer = cascadeAmount - 1; layer >= 0; layer--) { - layerBuffer.write(layer); - prebuiltCascadePipelines[layer]?.dispatchWorkgroups(cascadeWorkgroupsX, cascadeWorkgroupsY); + function run(commandEncoder?: GPUCommandEncoder) { + const encoder = commandEncoder ?? root.device.createCommandEncoder(); + + for (const { pipeline, workgroups } of prebuiltCascadePasses) { + pipeline.with(encoder).dispatchWorkgroups(...workgroups); } - prebuiltRadiancePipeline.dispatchWorkgroups(outputWorkgroupsX, outputWorkgroupsY); + prebuiltRadiancePipeline + .with(encoder) + .dispatchWorkgroups(outputWorkgroupsX, outputWorkgroupsY); + + if (!commandEncoder) { + root.device.queue.submit([encoder.finish()]); + } } return { @@ -218,6 +415,8 @@ export function createRadianceCascades( with: (bg) => createExecutor([...additionalBindGroups, bg]), destroy, output: dst, + outputTexture, + ownsOutput, }; } diff --git a/packages/typegpu-sdf/src/jumpFlood.ts b/packages/typegpu-sdf/src/jumpFlood.ts index 7ce91a6b4e..09cd006945 100644 --- a/packages/typegpu-sdf/src/jumpFlood.ts +++ b/packages/typegpu-sdf/src/jumpFlood.ts @@ -243,8 +243,8 @@ export type ColorTexture = TgpuTexture<{ export type Executor = { /** Run the jump flood algorithm. */ - run(): void; - /** The SDF output texture (rgba16float). */ + run(commandEncoder?: GPUCommandEncoder): void; + /** The SDF output texture (r32float). */ readonly sdfOutput: SdfTexture; /** The color output texture (rgba8unorm). */ readonly colorOutput: ColorTexture; @@ -324,16 +324,10 @@ export function createJumpFlood(options: JumpFloodOptions): Executor { }) .$usage('storage'); - const offsetUniform = root.createUniform(d.i32); - const initFromSeedPipeline = root .with(classifySlot, classify) .createComputePipeline({ compute: initFromSeedCompute }); - const jumpFloodPipeline = root - .with(offsetAccessor, offsetUniform) - .createComputePipeline({ compute: jumpFloodCompute }); - const finalizePipeline = root .with(sdfSlot, getSdf) .with(colorSlot, getColor) @@ -353,7 +347,7 @@ export function createJumpFlood(options: JumpFloodOptions): Executor { readView: floodTextureB.createView(d.textureStorage2d('rgba16uint', 'read-only')), writeView: floodTextureA.createView(d.textureStorage2d('rgba16uint', 'write-only')), }), - ]; + ] as const; const distWriteBG = root.createBindGroup(distWriteLayout, { sdfTexture: sdfTexture.createView(d.textureStorage2d('rgba16float', 'write-only')), @@ -367,7 +361,7 @@ export function createJumpFlood(options: JumpFloodOptions): Executor { root.createBindGroup(finalizeReadLayout, { readView: floodTextureB.createView(d.textureStorage2d('rgba16uint', 'read-only')), }), - ]; + ] as const; const workgroupsX = Math.ceil(width / 8); const workgroupsY = Math.ceil(height / 8); @@ -375,12 +369,24 @@ export function createJumpFlood(options: JumpFloodOptions): Executor { // Largest power-of-two strictly less than maxDim. const maxRange = 2 ** Math.floor(Math.log2(Math.max(maxDim - 1, 1))); + const offsets: number[] = []; + for (let offset = maxRange; offset >= 1; offset = Math.floor(offset / 2)) { + offsets.push(offset); + } + const offsetUniforms = offsets.map((offset) => root.createUniform(d.i32, offset)); + const jumpFloodPipelines = offsetUniforms.map((offsetUniform) => + root.with(offsetAccessor, offsetUniform).createComputePipeline({ compute: jumpFloodCompute }), + ); + const finalizeSourceIdx = offsets.length % 2; function destroy() { floodTextureA.destroy(); floodTextureB.destroy(); sdfTexture.destroy(); colorTexture.destroy(); + for (const offsetUniform of offsetUniforms) { + offsetUniform.buffer.destroy(); + } } function createExecutor(additionalBindGroups: TgpuBindGroup[] = []): Executor { @@ -390,37 +396,44 @@ export function createJumpFlood(options: JumpFloodOptions): Executor { prebuiltInitPipeline = prebuiltInitPipeline.with(bg); } - const prebuiltFloodPipelines = pingPongBGs.map((bg) => { - let p = jumpFloodPipeline.with(bg); + const prebuiltFloodPipelines = jumpFloodPipelines.map((pipeline, passIndex) => { + const bg = passIndex % 2 === 0 ? pingPongBGs[0] : pingPongBGs[1]; + let p = pipeline.with(bg); for (const addBg of additionalBindGroups) { p = p.with(addBg); } return p; }); - const prebuiltFinalizePipelines = finalizeReadBGs.map((bg) => { + const prebuildFinalizePipeline = (bg: (typeof finalizeReadBGs)[number]) => { let p = finalizePipeline.with(bg).with(distWriteBG); for (const addBg of additionalBindGroups) { p = p.with(addBg); } return p; - }); + }; + const prebuiltFinalizePipelines = [ + prebuildFinalizePipeline(finalizeReadBGs[0]), + prebuildFinalizePipeline(finalizeReadBGs[1]), + ] as const; + const prebuiltFinalizePipeline = + finalizeSourceIdx === 0 ? prebuiltFinalizePipelines[0] : prebuiltFinalizePipelines[1]; - function run() { - prebuiltInitPipeline.dispatchWorkgroups(workgroupsX, workgroupsY); + function run(commandEncoder?: GPUCommandEncoder) { + const encoder = commandEncoder ?? root.device.createCommandEncoder(); - let sourceIdx = 0; - let offset = maxRange; + prebuiltInitPipeline.with(encoder).dispatchWorkgroups(workgroupsX, workgroupsY); - while (offset >= 1) { - offsetUniform.write(offset); - prebuiltFloodPipelines[sourceIdx]?.dispatchWorkgroups(workgroupsX, workgroupsY); - sourceIdx ^= 1; - offset = Math.floor(offset / 2); + for (const floodPipeline of prebuiltFloodPipelines) { + floodPipeline.with(encoder).dispatchWorkgroups(workgroupsX, workgroupsY); } // Finalize: JFA+1 at offset=1 fused with distance field output - prebuiltFinalizePipelines[sourceIdx]?.dispatchWorkgroups(workgroupsX, workgroupsY); + prebuiltFinalizePipeline.with(encoder).dispatchWorkgroups(workgroupsX, workgroupsY); + + if (!commandEncoder) { + root.device.queue.submit([encoder.finish()]); + } } return { diff --git a/packages/typegpu/src/indexNamedExports.ts b/packages/typegpu/src/indexNamedExports.ts index 8a4ba4d469..bfef77057d 100644 --- a/packages/typegpu/src/indexNamedExports.ts +++ b/packages/typegpu/src/indexNamedExports.ts @@ -14,7 +14,7 @@ export { export { isBuffer, isUsableAsVertex } from './core/buffer/buffer.ts'; export { isAccessor, isLazy, isMutableAccessor, isSlot } from './core/slot/slotTypes.ts'; export { isComparisonSampler, isSampler } from './core/sampler/sampler.ts'; -export { isTexture } from './core/texture/texture.ts'; +export { isTexture, isTextureView } from './core/texture/texture.ts'; export { isUsableAsRender, isUsableAsSampled } from './core/texture/usageExtension.ts'; export { isUsableAsStorage } from './extension.ts'; export { isUsableAsUniform } from './core/buffer/bufferUsage.ts';