Last active
April 30, 2018 18:24
-
-
Save trxcllnt/a37bca8dd3ddd8ff99a0e39068271ad8 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// const memwatch = require('memwatch-next'); | |
// require('segfault-handler').registerHandler("async.log"); | |
// memwatch.on('leak', ({ growth, reason }) => console.log(`Leak: ${round(growth >> 10, 2)}mb ${reason}`)); | |
let eventID = 0; | |
const assert = require('assert'); | |
const cl = require('node-opencl'); | |
const { promisify } = require('util'); | |
const sleep = promisify(setTimeout); | |
const MemoryPool = createMemoryPoolClass(); | |
const roundToSig = (x, d) => Math.round(x * Math.pow(10, d)) / Math.pow(10, d); | |
const source = ` | |
__kernel void square( | |
__global uint* input, | |
__global uint* output, | |
unsigned int count) | |
{ | |
unsigned int i = get_global_id(0); | |
if (i < count) | |
output[i] = input[i] * input[i]; | |
} | |
`; | |
// const DEVICE_TYPE = cl.DEVICE_TYPE_CPU; | |
const DEVICE_TYPE = cl.DEVICE_TYPE_GPU; | |
const enqueueMarkerWithWaitList = cl.enqueueMarkerWithWaitList || ((queue) => cl.enqueueMarker(queue, true)); | |
const [platformID, deviceIDs] = cl.getPlatformIDs().reduce((platformAndDeviceIDs, platformID) => [ | |
...platformAndDeviceIDs, | |
...cl.getDeviceIDs(platformID, cl.DEVICE_TYPE_ALL) | |
.filter((d) => cl.getDeviceInfo(d, cl.DEVICE_TYPE) === DEVICE_TYPE) | |
.reduce(([_, deviceIDs = []], deviceID) => [platformID, [...deviceIDs, deviceID]], []) | |
], []); | |
console.log(`${ | |
cl.getPlatformInfo(platformID, cl.PLATFORM_VERSION)} - ${ | |
cl.getDeviceInfo(deviceIDs[0], cl.DEVICE_NAME) | |
}`); | |
((async () => { | |
const context = cl.createContext ? | |
cl.createContext([cl.CONTEXT_PLATFORM, platformID], [deviceIDs[0]]) : | |
cl.createContextFromType([cl.CONTEXT_PLATFORM, platformID], DEVICE_TYPE, null, null); | |
const program = cl.createProgramWithSource(context, source); cl.buildProgram(program); | |
const kernel = cl.createKernel(program, `square`); | |
const memFlags = cl.MEM_READ_WRITE | cl.MEM_ALLOC_HOST_PTR; | |
const byteLength = 32 * 1024 * Uint32Array.BYTES_PER_ELEMENT; // <-- 128MB | |
const inputs = cl.createBuffer(context, memFlags, byteLength); | |
const outputs = cl.createBuffer(context, memFlags, byteLength); | |
const queue = (cl.createCommandQueueWithProperties || cl.createCommandQueue)(context, deviceIDs[0], null); | |
const logInterval = 525; | |
const { startTime, getTime } = createTimer(); | |
const runTest = bindTest({ context, queue, kernel, inputs, outputs, byteLength }); | |
for await (const i of range(9975)) { | |
const async = !!true; | |
// Or alternate sync/async on each iteration | |
// const async = i % 2 !== 0; | |
await runTest(async, i); | |
if (i % logInterval === 0) { | |
console.log(`iteration: ${i} (async=${async}, dTime=${getTime()})`); | |
} | |
} | |
return `success (total=${getTime(startTime)})`; | |
})() | |
.then( | |
console.log.bind(console, `result:`), | |
console.error.bind(console, `error:`) | |
).catch(console.error.bind(console, `caught:`))); | |
function bindTest({ context, queue, kernel, inputs, outputs, byteLength }) { | |
const setKernelArg = cl.setKernelArg.bind(cl, kernel); | |
const length = byteLength / Uint32Array.BYTES_PER_ELEMENT; | |
const bufferPool = new MemoryPool(Uint32Array, byteLength, 3); | |
const enqueueNDRangeKernel = (async, gws, lws, events) => [ | |
cl.enqueueNDRangeKernel(queue, kernel, 1, null, gws, lws, events, async)].filter(eventIsActive); | |
const writeInputs = bindMapBuffer(queue, cl.MAP_WRITE, byteLength, (hostPtr, data) => | |
!hostPtr.event ? memcpy(hostPtr, data) || hostPtr : eventAsPromise(hostPtr.event).then(() => | |
memcpy(hostPtr, data) || hostPtr)); | |
const readOutputs = bindMapBuffer(queue, cl.MAP_READ, byteLength, (hostPtr, data) => | |
!hostPtr.event ? memcpy(data, hostPtr) || hostPtr : eventAsPromise(hostPtr.event).then(() => | |
memcpy(data, hostPtr) || hostPtr)); | |
const out = new Uint32Array(length); | |
setKernelArg(2, `uint`, byteLength); | |
return async function runTest(async, i = 0) { | |
const in_ = bufferPool.get(); | |
// alternate input/output buffers on each iteration | |
const [inMem, outMem] = (i % 2 === 0 ? [inputs, outputs] : [outputs, inputs]); | |
setKernelArg(0, `uint*`, inMem); | |
setKernelArg(1, `uint*`, outMem); | |
// The compact form... | |
// await eventAsPromise( | |
// await readOutputs(async, outMem, | |
// enqueueNDRangeKernel(async, [length], null, | |
// await writeInputs(async, inMem, [], in_)), out), queue); | |
// ...or unrolled to measure timings | |
let events, getTime, debug; | |
// Flip this flag to see the write/read/map/unmap timings | |
(debug = false) && debug && ({ getTime } = createTimer()); | |
(events = await writeInputs(async, inMem, [], in_)) && debug && console.log(`map write: ${getTime()}`); | |
(events = enqueueNDRangeKernel(async, [length], null, events)) && debug && console.log(`enqueue K: ${getTime()}`); | |
(events = await readOutputs(async, outMem, events, out)) && debug && console.log(` map read: ${getTime()}`); | |
(await eventAsPromise(events, queue)) && debug && console.log(`enqueue M: ${getTime()}`); | |
// Verify kernel results (slow) | |
// (() => { | |
// assert.equal(out.length, in_.length, | |
// `expected out.length to equal ${in_.length}`); | |
// for (let i = -1, n = in_.length; ++i < n;) { | |
// assert.equal(isNaN(out[i]), false, `expected out[${i}] not NaN`); | |
// assert.equal(out[i], in_[i] ** 2, | |
// `expected out[${i}] to equal ${in_[i] ** 2}`); | |
// } | |
// })(); | |
} | |
} | |
function bindMapBuffer(q, mapFlags, size, sel) { | |
const map = cl.enqueueMapBuffer; | |
const unmap = cl.enqueueUnmapMemObject; | |
const done = (...e) => e.filter(eventIsActive); | |
const mapSync = (mem, es, ...xs) => done(unmap(q, mem, sel(map(q, mem, !0, mapFlags, 0, size, es, !1), ...xs), [], !1)); | |
const mapAsync = async (mem, es, ...xs) => done(unmap(q, mem, await sel(map(q, mem, !1, mapFlags, 0, size, es, !0), ...xs), [], !0)); | |
return (async, memObj, events, ...rest) => (!async ? mapSync : mapAsync)(memObj, (events || []).filter(eventIsActive), ...rest); | |
} | |
function eventIsActive(e) { | |
if (Object(e) !== e) return false; | |
if (e.hasOwnProperty('refCount')) return e.refCount > 0; | |
try { | |
return (e.refCount = cl.getEventInfo(e, cl.EVENT_REFERENCE_COUNT)) > 0; | |
} catch (_) { return false; } | |
} | |
function eventAsPromise(e, queue) { | |
let tmp = e; | |
// const { startTime, getTime } = createTimer(); | |
if (Array.isArray(tmp) && (tmp = tmp.filter(eventIsActive)).length > 0) { | |
return tmp.length === 1 ? eventAsPromise(tmp[0]) | |
: !queue ? Promise.all(tmp.map((e) => eventAsPromise(e))) | |
: eventAsPromise(enqueueMarkerWithWaitList(queue, tmp, true)); | |
} else if (!eventIsActive(e)) { return Promise.resolve(e); } | |
return new Promise((resolve) => cl.setEventCallback(e, cl.COMPLETE, async () => { | |
// console.log(`event ${eventID++} time: ${getTime()}`); | |
resolve(e); | |
// without this sleep, node exits after the first event's refCount drops to 0 | |
// resolve(e) || (await sleep(25, e)); // either style is valid | |
setTimeout(() => { | |
try { | |
let refCount = !e ? 0 : cl.getEventInfo(e, cl.EVENT_REFERENCE_COUNT); | |
while (refCount-- > 0) { e.refCount = refCount; cl.releaseEvent(e); } | |
} catch (_) { console.error(`Error releasing event\n\t${_ && _.stack || _}`); } | |
}, 25); | |
})); | |
} | |
function memcpy(target, source, byteLength = source.byteLength) { | |
const T = byteLength % 8 === 0 ? Float64Array : | |
byteLength % 4 === 0 ? Float32Array : | |
byteLength % 2 === 0 ? Uint16Array : Uint8Array; | |
asTypedArray(T, target, byteLength / T.BYTES_PER_ELEMENT). | |
set(asTypedArray(T, source, byteLength / T.BYTES_PER_ELEMENT)); | |
} | |
function asTypedArray(TypedArray, data, length) { | |
if (data instanceof TypedArray) return data; | |
if (data instanceof ArrayBuffer) return new TypedArray(data, 0, length); | |
if (!data) return new TypedArray(length); | |
return !data || !ArrayBuffer.isView(data) ? | |
new TypedArray(data || [], 0, length) : | |
new TypedArray(data.buffer, data.byteOffset, data.byteLength / TypedArray.BYTES_PER_ELEMENT); | |
} | |
function* range(n) { | |
for (let i = -1; ++i < n;) { | |
yield i; | |
} | |
} | |
function createMemoryPoolClass() { | |
return class MemoryPool { | |
constructor(TypedArray, bytesTotal, poolSize = 16) { | |
this.buffers = []; | |
this.buffersIndex = 0; | |
this.bytesTotal = bytesTotal; | |
this.TypedArray = TypedArray; | |
for (let i = -1; ++i < poolSize;) { | |
this.buffers.push(randomData(new TypedArray(bytesTotal / TypedArray.BYTES_PER_ELEMENT))); | |
} | |
} | |
get() { | |
return this.buffers[(this.buffersIndex = (this.buffersIndex + 1) % this.buffers.length)]; | |
} | |
} | |
function randomData(tArray) { | |
for (let i = -1, n = tArray.length; ++i < n;) { | |
tArray[i] = Math.pow(10, 2) * Math.random() | 0; | |
} | |
return tArray; | |
} | |
} | |
function createTimer() { | |
const startTime = process.hrtime(); | |
const getTime = ((prev) => (time = prev) => { | |
prev = process.hrtime(); | |
const [s, ns] = process.hrtime(time); | |
return `${roundToSig((s * 1000) + (ns / 1000000), 2)}ms`; | |
})(startTime); | |
return { startTime, getTime }; | |
} | |
console.log("== Initial loop terminated =="); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment