blob: 27d68d887c327c9990fe50089cee714c4601b53f [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
import { assert } from "./support";
import { Pointer } from "./ctypes";
import { Memory } from "./memory";
import { Disposable } from "./types";
/** A pointer to points to the raw address space. */
export type GPUPointer = number;
export interface GPUDeviceDetectOutput {
adapter: GPUAdapter;
adapterInfo: GPUAdapterInfo;
device: GPUDevice;
}
/**
* DetectGPU device in the environment.
*/
export async function detectGPUDevice(powerPreference: "low-power" | "high-performance" = "high-performance"): Promise<GPUDeviceDetectOutput | undefined> {
if (typeof navigator !== "undefined" && navigator.gpu !== undefined) {
const adapter = await navigator.gpu.requestAdapter({ powerPreference });
if (adapter == null) {
throw Error(
"Unable to find a compatible GPU. This issue might be because your computer doesn't have a GPU, or your system settings are not configured properly. " +
"Please check if your device has a GPU properly set up and if your your browser supports WebGPU. " +
"You can also consult your browser's compatibility chart to see if it supports WebGPU. " +
"For more information about WebGPU support in your browser, visit https://webgpureport.org/"
);
}
const computeMB = (value: number) => {
return Math.ceil(value / (1 << 20)) + "MB";
}
// more detailed error message
let requiredMaxBufferSize = 1 << 30; // 1GB
if (requiredMaxBufferSize > adapter.limits.maxBufferSize) {
// If 1GB is too large, try 256MB (default size stated in WebGPU doc)
const backupRequiredMaxBufferSize = 1 << 28; // 256MB
console.log(
`Requested maxBufferSize exceeds limit. \n` +
`requested=${computeMB(requiredMaxBufferSize)}, \n` +
`limit=${computeMB(adapter.limits.maxBufferSize)}. \n` +
`WARNING: Falling back to ${computeMB(backupRequiredMaxBufferSize)}...`
);
requiredMaxBufferSize = backupRequiredMaxBufferSize;
if (backupRequiredMaxBufferSize > adapter.limits.maxBufferSize) {
// Fail if 256MB is still too big
throw Error(
`Cannot initialize runtime because of requested maxBufferSize ` +
`exceeds limit. requested=${computeMB(backupRequiredMaxBufferSize)}, ` +
`limit=${computeMB(adapter.limits.maxBufferSize)}. ` +
`Consider upgrading your browser.`
);
}
}
let requiredMaxStorageBufferBindingSize = 1 << 30; // 1GB
if (requiredMaxStorageBufferBindingSize > adapter.limits.maxStorageBufferBindingSize) {
// If 1GB is too large, try 128MB (default size stated in WebGPU doc)
const backupRequiredMaxStorageBufferBindingSize = 1 << 27; // 128MB
console.log(
`Requested maxStorageBufferBindingSize exceeds limit. \n` +
`requested=${computeMB(requiredMaxStorageBufferBindingSize)}, \n` +
`limit=${computeMB(adapter.limits.maxStorageBufferBindingSize)}. \n` +
`WARNING: Falling back to ${computeMB(backupRequiredMaxStorageBufferBindingSize)}...`
);
requiredMaxStorageBufferBindingSize = backupRequiredMaxStorageBufferBindingSize;
if (backupRequiredMaxStorageBufferBindingSize > adapter.limits.maxStorageBufferBindingSize) {
// Fail if 128MB is still too big
throw Error(
`Cannot initialize runtime because of requested maxStorageBufferBindingSize ` +
`exceeds limit. requested=${computeMB(backupRequiredMaxStorageBufferBindingSize)}, ` +
`limit=${computeMB(adapter.limits.maxStorageBufferBindingSize)}. `
);
}
}
const requiredMaxComputeWorkgroupStorageSize = 32 << 10;
if (requiredMaxComputeWorkgroupStorageSize > adapter.limits.maxComputeWorkgroupStorageSize) {
throw Error(
`Cannot initialize runtime because of requested maxComputeWorkgroupStorageSize ` +
`exceeds limit. requested=${requiredMaxComputeWorkgroupStorageSize}, ` +
`limit=${adapter.limits.maxComputeWorkgroupStorageSize}. `
);
}
const requiredMaxStorageBuffersPerShaderStage = 10; // default is 8
if (requiredMaxStorageBuffersPerShaderStage > adapter.limits.maxStorageBuffersPerShaderStage) {
throw Error(
`Cannot initialize runtime because of requested maxStorageBuffersPerShaderStage ` +
`exceeds limit. requested=${requiredMaxStorageBuffersPerShaderStage}, ` +
`limit=${adapter.limits.maxStorageBuffersPerShaderStage}. `
);
}
const requiredFeatures: GPUFeatureName[] = [];
// Always require f16 if available
if (adapter.features.has("shader-f16")) {
requiredFeatures.push("shader-f16");
}
// requestAdapterInfo() is deprecated, causing requestAdapterInfo to raise
// issue when building. However, it is still needed for older browsers, hence `as any`.
const adapterInfo = adapter.info || await (adapter as any).requestAdapterInfo();
const device = await adapter.requestDevice({
requiredLimits: {
maxBufferSize: requiredMaxBufferSize,
maxStorageBufferBindingSize: requiredMaxStorageBufferBindingSize,
maxComputeWorkgroupStorageSize: requiredMaxComputeWorkgroupStorageSize,
maxStorageBuffersPerShaderStage: requiredMaxStorageBuffersPerShaderStage,
},
requiredFeatures
});
return {
adapter: adapter,
adapterInfo: adapterInfo,
device: device
};
} else {
return undefined;
}
}
/**
* Create GPU buffer with `createBuffer()` but with error catching; destroy if error caught.
* @param device The GPUDevice used to create a buffer.
* @param descriptor The GPUBufferDescriptor passed to `createBuffer()`.
* @returns The buffer created by `createBuffer()`.
*
* @note We treat any error occurred at `createBuffer()` fatal and expect the user to handle
* `device.destroy()` with `device.lost.then()`.
*/
function tryCreateBuffer(device: GPUDevice, descriptor: GPUBufferDescriptor) {
device.pushErrorScope("out-of-memory");
device.pushErrorScope("validation");
device.pushErrorScope("internal");
const buffer = device.createBuffer(descriptor);
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
device.popErrorScope().then((error) => {if (error) {device.destroy(); console.error(error);}});
return buffer;
}
const canvasRenderWGSL = `
@group(0) @binding(0) var my_sampler : sampler;
@group(0) @binding(1) var my_texture : texture_2d<f32>;
struct VertexOutput {
@builtin(position) position : vec4<f32>,
@location(0) uv : vec2<f32>,
}
@vertex
fn vertex_main(@builtin(vertex_index) vidx : u32) -> VertexOutput {
const pos = array(
vec2( 1.0, 1.0),
vec2( 1.0, -1.0),
vec2(-1.0, -1.0),
vec2( 1.0, 1.0),
vec2(-1.0, -1.0),
vec2(-1.0, 1.0),
);
const uv = array(
vec2(1.0, 0.0),
vec2(1.0, 1.0),
vec2(0.0, 1.0),
vec2(1.0, 0.0),
vec2(0.0, 1.0),
vec2(0.0, 0.0),
);
var output : VertexOutput;
output.position = vec4(pos[vidx], 0.0, 1.0);
output.uv = uv[vidx];
return output;
}
@fragment
fn fragment_main(@location(0) uv : vec2<f32>) -> @location(0) vec4<f32> {
return textureSample(my_texture, my_sampler, uv);
}
@fragment
fn fragment_clear(@location(0) uv : vec2<f32>) -> @location(0) vec4<f32> {
return vec4(1.0, 1.0, 1.0, 1.0);
}
`
class CanvasRenderManager implements Disposable {
private device: GPUDevice;
private canvasContext: GPUCanvasContext;
private stagingTexture: GPUTexture;
private renderSampler: GPUSampler;
private renderPipeline: GPURenderPipeline;
private clearPipeline: GPURenderPipeline;
private canvasTextureFormat: GPUTextureFormat;
constructor(device: GPUDevice, canvas: HTMLCanvasElement) {
this.device = device;
const ctx = canvas.getContext("webgpu");
if (ctx == null) {
throw Error("Cannot bind WebGPU context");
}
// avoid possible ts complain
this.canvasContext = ctx as any;
this.canvasTextureFormat = navigator.gpu.getPreferredCanvasFormat();
this.canvasContext.configure({
device: this.device,
format: this.canvasTextureFormat,
alphaMode: "opaque",
});
this.renderPipeline = device.createRenderPipeline({
layout: "auto",
vertex: {
module: device.createShaderModule({
code: canvasRenderWGSL,
}),
entryPoint: "vertex_main",
},
fragment: {
module: device.createShaderModule({
code: canvasRenderWGSL,
}),
entryPoint: "fragment_main",
targets: [{
format: this.canvasTextureFormat,
}],
},
primitive: {
topology: "triangle-list",
},
});
this.clearPipeline = device.createRenderPipeline({
layout: "auto",
vertex: {
module: device.createShaderModule({
code: canvasRenderWGSL,
}),
entryPoint: "vertex_main",
},
fragment: {
module: device.createShaderModule({
code: canvasRenderWGSL,
}),
entryPoint: "fragment_clear",
targets: [{
format: this.canvasTextureFormat,
}],
},
primitive: {
topology: "triangle-list",
},
});
this.renderSampler = device.createSampler({
magFilter: "linear",
minFilter: "linear",
});
// staging texture always be in RGBA
this.stagingTexture = device.createTexture({
size: [canvas.height, canvas.width, 1],
format: "rgba8unorm",
usage:
GPUTextureUsage.TEXTURE_BINDING |
GPUTextureUsage.COPY_DST |
GPUTextureUsage.RENDER_ATTACHMENT,
});
}
clear() {
const commandEncoder = this.device.createCommandEncoder();
const passEncoder = commandEncoder.beginRenderPass({
colorAttachments: [
{
view: this.canvasContext.getCurrentTexture().createView(),
clearValue: { r: 0.0, g: 0.0, b: 0.0, a: 1.0 },
loadOp: "clear",
storeOp: "store",
},
],
});
passEncoder.setPipeline(this.clearPipeline);
const renderBindingGroup = this.device.createBindGroup({
layout: this.renderPipeline.getBindGroupLayout(0),
entries: [
{ binding: 0, resource: this.renderSampler },
{ binding: 1, resource: this.stagingTexture.createView() },
],
});
passEncoder.setBindGroup(0, renderBindingGroup);
passEncoder.draw(6, 1, 0, 0);
passEncoder.end();
this.device.queue.submit([commandEncoder.finish()]);
}
draw(buffer: GPUBuffer, height: number, width: number) {
// resize the staging texture
if (height != this.stagingTexture.height || width != this.stagingTexture.width) {
this.stagingTexture.destroy();
this.stagingTexture = this.device.createTexture({
size: [height, width, 1],
format: "rgba8unorm",
usage:
GPUTextureUsage.TEXTURE_BINDING |
GPUTextureUsage.COPY_DST |
GPUTextureUsage.RENDER_ATTACHMENT,
});
}
const commandEncoder = this.device.createCommandEncoder();
commandEncoder.copyBufferToTexture({
buffer: buffer,
offset: 0,
bytesPerRow: this.stagingTexture.width * 4
}, {
texture: this.stagingTexture
}, {
width: this.stagingTexture.width,
height: this.stagingTexture.height
});
const passEncoder = commandEncoder.beginRenderPass({
colorAttachments: [
{
view: this.canvasContext.getCurrentTexture().createView(),
clearValue: { r: 0.0, g: 0.0, b: 0.0, a: 1.0 },
loadOp: "clear",
storeOp: "store",
},
],
});
passEncoder.setPipeline(this.renderPipeline);
const renderBindingGroup = this.device.createBindGroup({
layout: this.renderPipeline.getBindGroupLayout(0),
entries: [
{ binding: 0, resource: this.renderSampler },
{ binding: 1, resource: this.stagingTexture.createView() },
],
});
passEncoder.setBindGroup(0, renderBindingGroup);
passEncoder.draw(6, 1, 0, 0);
passEncoder.end();
this.device.queue.submit([commandEncoder.finish()]);
}
dispose(): void {
this.stagingTexture.destroy();
}
}
/**
* Function info from the API
*/
export interface FunctionInfo {
name: string;
arg_types: Array<string>;
launch_param_tags: Array<string>;
}
/**
* WebGPU context
* Manages all the webgpu resources here.
*/
export class WebGPUContext {
device: GPUDevice;
memory: Memory;
// internal data
private bufferTable: Array<GPUBuffer | undefined> = [undefined];
private bufferTableFreeId: Array<number> = [];
private podArgStagingBuffers: Array<GPUBuffer> = [];
private canvasRenderManager?: CanvasRenderManager = undefined;
// number of pod arg staging buffers
private maxNumPodArgsStagingBuffers = 2;
// flags for debugging
// stats of the runtime.
// peak allocation
private peakAllocatedBytes = 0;
// current allocation
private currAllocatedBytes = 0;
// all allocation(ignoring free)
private allAllocatedBytes = 0;
// shader submit counter
private shaderSubmitCounter = 0;
// limite number of shaders to be submitted, useful for debugging, default to -1
protected debugShaderSubmitLimit = -1;
// log and sync each step
protected debugLogFinish = false;
constructor(memory: Memory, device: GPUDevice) {
this.memory = memory;
this.device = device;
}
/**
* Dispose context.
*/
dispose() {
this.canvasRenderManager?.dispose();
this.bufferTableFreeId = [];
while (this.bufferTable.length != 0) {
this.bufferTable.pop()?.destroy();
}
while (this.podArgStagingBuffers.length != 0) {
this.podArgStagingBuffers.pop()?.destroy();
}
this.device.destroy();
}
/**
* Wait for all pending GPU tasks to complete
*/
async sync(): Promise<void> {
await this.device.queue.onSubmittedWorkDone();
}
/**
* Obtain the runtime information in readable format.
*/
runtimeStatsText(): string {
let info = "peak-memory=" + Math.ceil(this.peakAllocatedBytes / (1 << 20)) + " MB";
info += ", all-memory=" + Math.ceil(this.allAllocatedBytes / (1 << 20)) + " MB";
info += ", shader-submissions=" + this.shaderSubmitCounter;
return info;
}
/**
* Draw image from data in storage buffer.
* @param ptr The GPU ptr
* @param height The height of the image.
* @param width The width of the image.
*/
drawImageFromBuffer(ptr: GPUPointer, height: number, width: number) {
if (this.canvasRenderManager == undefined) {
throw Error("Do not have a canvas context, call bindCanvas first");
}
this.canvasRenderManager.draw(this.gpuBufferFromPtr(ptr), height, width);
}
/**
* Copy raw bytes into buffer ptr.
*
* @param rawBytes The raw bytes
* @param toPtr The target gpu buffer ptr
* @param toOffset The beginning offset
* @param nbytes Number of bytes
*/
copyRawBytesToBuffer(
rawBytes: Uint8Array,
toPtr: GPUPointer,
toOffset: number,
nbytes: number
): void {
// Perhaps it would be more useful to use a staging buffer?
this.device.queue.writeBuffer(
this.gpuBufferFromPtr(toPtr),
toOffset,
rawBytes,
0,
nbytes
);
}
/**
* Clear canvas
*/
clearCanvas() {
this.canvasRenderManager?.clear();
}
/**
* Bind a canvas element to the runtime.
* @param canvas The HTML canvas/
*/
bindCanvas(canvas: HTMLCanvasElement) {
this.canvasRenderManager = new CanvasRenderManager(this.device, canvas);
}
/**
* Create a PackedFunc that runs the given shader
* via createComputePipeline
*
* @param info The function information already parsed as a record.
* @param code The shader data(in WGSL)
* @returns The shader
*/
createShader(finfo: FunctionInfo, code: string): Function {
return this.createShadeInternal(finfo, code, false) as Function;
}
/**
* Create a PackedFunc that runs the given shader asynchronously
* via createComputePipelineAsync
*
* @param info The function information already parsed as a record.
* @param code The shader data(in WGSL)
* @returns The shader
*/
async createShaderAsync(finfo: FunctionInfo, code: string): Promise<Function> {
return await (this.createShadeInternal(finfo, code, true) as Promise<Function>);
}
/**
* Get the pod arg staging buffer
* \param nbytes The minimum size.
* \return The allocated buffer
*/
private getPodArgsBuffer(nbytes: number): GPUBuffer {
let buffer: GPUBuffer | undefined = undefined;
if (this.podArgStagingBuffers.length >= this.maxNumPodArgsStagingBuffers) {
buffer = this.podArgStagingBuffers.shift();
}
// minimum of 16 bytes
let allocSize = 16;
if (buffer !== undefined) {
allocSize = buffer.size;
if (buffer.size < nbytes) {
buffer.destroy();
buffer = undefined;
}
}
while (allocSize < nbytes) {
allocSize *= 2;
}
if (buffer == undefined) {
// create uniform buffer
buffer = tryCreateBuffer(this.device, {
size: allocSize,
usage: GPUBufferUsage.UNIFORM | GPUBufferUsage.COPY_DST,
});
}
assert(nbytes <= buffer.size);
return buffer;
}
/**
* Internal impl of createShader for both async and sync mode.
*
* @param info The function information already parsed as a record.
* @param code The shader data(in WGSL)
* @param asyncMode Whether use async mode.
* @returns The shader function or promise of shader func.
*/
private createShadeInternal(
finfo: FunctionInfo,
code: string,
asyncMode: boolean
): Function | Promise<Function> {
const dispatchToDim: Array<number> = [];
let paramWriteAccess: Array<number> = [];
for (let i = 0; i < finfo.launch_param_tags.length; ++i) {
const tag: string = finfo.launch_param_tags[i];
if (tag.startsWith("blockIdx.")) {
const target: number = tag.charCodeAt(tag.length - 1) - ("x".charCodeAt(0));
assert(target >= 0 && target < 3);
dispatchToDim.push(target);
} else if (tag.startsWith("threadIdx.")) {
const target: number = tag.charCodeAt(tag.length - 1) - ("x".charCodeAt(0));
assert(target >= 0 && target < 3);
dispatchToDim.push(target + 3);
} else if (tag.startsWith("paramWriteAccess:")) {
paramWriteAccess = JSON.parse(tag.substring(17));
} else {
throw new Error("Cannot handle thread_axis " + tag);
}
}
const layoutEntries: Array<GPUBindGroupLayoutEntry> = [];
const bufferArgIndices: Array<number> = [];
const podArgIndices: Array<number> = [];
for (let i = 0; i < finfo.arg_types.length; ++i) {
const dtype = finfo.arg_types[i];
if (dtype == "handle") {
layoutEntries.push({
binding: bufferArgIndices.length,
visibility: GPUShaderStage.COMPUTE,
buffer: {
type: paramWriteAccess[bufferArgIndices.length] ? "storage" : "read-only-storage"
}
});
bufferArgIndices.push(i);
} else if (dtype.startsWith("int") || dtype.startsWith("uint") || dtype.startsWith("float")) {
podArgIndices.push(i);
} else {
throw new Error("Cannot handle argument type " + dtype + " in WebGPU shader");
}
}
assert(paramWriteAccess.length == bufferArgIndices.length);
// POD arguments are pass in the end
layoutEntries.push({
binding: bufferArgIndices.length,
visibility: GPUShaderStage.COMPUTE,
buffer: {
type: "uniform"
}
});
const bindGroupLayout = this.device.createBindGroupLayout({
entries: layoutEntries
});
const pipelineLayout = this.device.createPipelineLayout({
bindGroupLayouts: [bindGroupLayout]
});
// Function to create the pipeline.
const createShaderFunc = (pipeline: GPUComputePipeline): Function => {
const submitShader = (...args: Array<GPUPointer | number>): void => {
if (this.debugShaderSubmitLimit != -1 &&
this.shaderSubmitCounter >= this.debugShaderSubmitLimit) {
this.shaderSubmitCounter += 1;
return;
}
const commandEncoder = this.device.createCommandEncoder();
const compute = commandEncoder.beginComputePass();
compute.setPipeline(pipeline);
const bindGroupEntries: Array<GPUBindGroupEntry> = [];
const numBufferOrPodArgs = bufferArgIndices.length + podArgIndices.length;
assert(args.length == numBufferOrPodArgs + dispatchToDim.length);
const workDim: Array<number> = [1, 1, 1, 1, 1, 1];
for (let i = 0; i < dispatchToDim.length; ++i) {
workDim[dispatchToDim[i]] = args[numBufferOrPodArgs + i];
}
// get around 65535 restriction of blockIdx.x
if (workDim[2] != 1) {
throw Error("WebGPU: blockIdx.z is reserved for internal use");
}
const packDimX = workDim[0];
// spread thinsg out into blockIdx.z
if (workDim[0] >= (1 << 16)) {
let wl_x = workDim[0];
let wl_z = workDim[2];
while (wl_x >= (1 << 16)) {
if (wl_x % 2 == 0) {
wl_x = wl_x / 2;
} else {
// pad up
wl_x = (wl_x + 1) / 2;
}
wl_z *= 2;
}
workDim[0] = wl_x;
workDim[2] = wl_z;
assert(wl_x * wl_z >= packDimX);
}
for (let i = 0; i < bufferArgIndices.length; ++i) {
bindGroupEntries.push({
binding: i,
resource: {
buffer: this.gpuBufferFromPtr(args[bufferArgIndices[i]])
}
});
}
// push pod buffer
const sizeOfI32 = 4;
const podArgBuffer = this.getPodArgsBuffer((podArgIndices.length + 1) * sizeOfI32);
const i32View = new Int32Array(podArgIndices.length + 1);
const u32View = new Uint32Array(i32View.buffer);
const f32View = new Float32Array(i32View.buffer);
for (let i = 0; i < podArgIndices.length; ++i) {
const value = args[podArgIndices[i]];
const dtype = finfo.arg_types[podArgIndices[i]];
if (dtype.startsWith("int")) {
i32View[i] = value;
} else if (dtype.startsWith("uint")) {
u32View[i] = value;
} else if (dtype.startsWith("float")) {
f32View[i] = value;
} else {
throw Error("Unknown pod dtype " + dtype);
}
}
// always pass in dim z launching grid size in
u32View[podArgIndices.length] = packDimX;
this.device.queue.writeBuffer(podArgBuffer, 0, i32View.buffer);
bindGroupEntries.push({
binding: bufferArgIndices.length,
resource: {
buffer: podArgBuffer,
size: i32View.buffer.byteLength
}
});
compute.setBindGroup(0, this.device.createBindGroup({
layout: bindGroupLayout,
entries: bindGroupEntries
}));
compute.dispatchWorkgroups(workDim[0], workDim[1], workDim[2])
compute.end()
const command = commandEncoder.finish();
this.device.queue.submit([command]);
if (this.debugLogFinish) {
const currCounter = this.shaderSubmitCounter;
this.device.queue.onSubmittedWorkDone().then(() => {
console.log("[" + currCounter + "][Debug] finish shader" + finfo.name);
});
}
this.shaderSubmitCounter += 1;
};
return submitShader;
};
const shaderModule = this.device.createShaderModule({
code: code,
compilationHints: [
{
entryPoint: "main",
layout: pipelineLayout
}
]
});
if (asyncMode) {
return this.device.createComputePipelineAsync({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: finfo.name
}
}).then((pipeline: GPUComputePipeline) => {
return createShaderFunc(pipeline);
});
} else {
const pipeline = this.device.createComputePipeline({
layout: pipelineLayout,
compute: {
module: shaderModule,
entryPoint: finfo.name
}
});
return createShaderFunc(pipeline);
}
}
/**
* Get the device API according to its name
* @param The name of the API.
* @returns The corresponding device api.
*/
getDeviceAPI(name: string): Function {
if (name == "deviceAllocDataSpace") {
return (nbytes: number): GPUPointer => {
return this.deviceAllocDataSpace(nbytes);
};
} else if (name == "deviceFreeDataSpace") {
return (ptr: GPUPointer): void => {
return this.deviceFreeDataSpace(ptr);
};
} else if (name == "deviceCopyToGPU") {
return (
from: Pointer,
to: GPUPointer,
toOffset: number,
nbytes: number
): void => {
this.deviceCopyToGPU(from, to, toOffset, nbytes);
};
} else if (name == "deviceCopyFromGPU") {
return (
from: GPUPointer,
fromOffset: number,
to: Pointer,
nbytes: number
): void => {
this.deviceCopyFromGPU(from, fromOffset, to, nbytes);
};
} else if (name == "deviceCopyWithinGPU") {
return (
from: GPUPointer,
fromOffset: number,
to: Pointer,
toOffset: number,
nbytes: number
): void => {
this.deviceCopyWithinGPU(from, fromOffset, to, toOffset, nbytes);
};
} else {
throw new Error("Unknown DeviceAPI function " + name);
}
}
// DeviceAPI
private deviceAllocDataSpace(nbytes: number): GPUPointer {
// allocate 0 bytes buffer as 1 bytes buffer.
if (nbytes == 0) {
nbytes = 1;
}
const buffer = tryCreateBuffer(this.device, {
size: nbytes,
usage: GPUBufferUsage.STORAGE | GPUBufferUsage.COPY_SRC | GPUBufferUsage.COPY_DST,
});
this.currAllocatedBytes += nbytes;
this.allAllocatedBytes += nbytes;
if (this.currAllocatedBytes > this.peakAllocatedBytes) {
this.peakAllocatedBytes = this.currAllocatedBytes;
}
const ptr = this.attachToBufferTable(buffer);
return ptr;
}
private deviceFreeDataSpace(ptr: GPUPointer): void {
const idx = ptr;
const buffer = this.bufferTable[idx];
this.bufferTable[idx] = undefined;
assert(buffer !== undefined);
this.bufferTableFreeId.push(idx);
this.currAllocatedBytes -= buffer.size;
buffer.destroy();
}
private deviceCopyToGPU(
from: Pointer,
to: GPUPointer,
toOffset: number,
nbytes: number
): void {
// Perhaps it would be more useful to use a staging buffer?
let rawBytes = this.memory.loadRawBytes(from, nbytes);
if (rawBytes.length % 4 !== 0) {
// writeBuffer requires length to be multiples of 4, so we pad here
const toPad = 4 - rawBytes.length % 4;
rawBytes = new Uint8Array(rawBytes.length + toPad);
rawBytes.set(rawBytes);
nbytes = nbytes + toPad;
}
this.device.queue.writeBuffer(
this.gpuBufferFromPtr(to),
toOffset,
rawBytes,
0,
nbytes
);
}
private deviceCopyFromGPU(
from: GPUPointer,
fromOffset: number,
to: Pointer,
nbytes: number
): void {
// Perhaps it would be more useful to resuse a staging buffer?
const gpuTemp = tryCreateBuffer(this.device, {
size: nbytes,
usage: GPUBufferUsage.MAP_READ | GPUBufferUsage.COPY_DST,
});
const copyEncoder = this.device.createCommandEncoder();
copyEncoder.copyBufferToBuffer(
this.gpuBufferFromPtr(from),
fromOffset,
gpuTemp,
0,
nbytes
);
const copyCommands = copyEncoder.finish();
this.device.queue.submit([copyCommands]);
gpuTemp.mapAsync(GPUMapMode.READ).then(() => {
const data = gpuTemp.getMappedRange();
this.memory.storeRawBytes(to, new Uint8Array(data));
gpuTemp.destroy();
});
}
private deviceCopyWithinGPU(
from: GPUPointer,
fromOffset: number,
to: Pointer,
toOffset: number,
nbytes: number
): void {
const copyEncoder = this.device.createCommandEncoder();
copyEncoder.copyBufferToBuffer(
this.gpuBufferFromPtr(from),
fromOffset,
this.gpuBufferFromPtr(to),
toOffset,
nbytes
);
const copyCommands = copyEncoder.finish();
this.device.queue.submit([copyCommands]);
}
private gpuBufferFromPtr(ptr: GPUPointer): GPUBuffer {
const buffer = this.bufferTable[ptr];
assert(buffer !== undefined);
return buffer;
}
private attachToBufferTable(buffer: GPUBuffer): GPUPointer {
if (this.bufferTableFreeId.length != 0) {
const idx = this.bufferTableFreeId.pop() as number;
this.bufferTable[idx] = buffer;
return idx;
} else {
const idx = this.bufferTable.length;
this.bufferTable.push(buffer);
return idx;
}
}
}