Skip to content

Instantly share code, notes, and snippets.

View athas's full-sized avatar
🐈
how did this get here i am not good with compute

Troels Henriksen athas

🐈
how did this get here i am not good with compute
View GitHub Profile
__kernel void segmap(__global int8_t *mem)
{
mem[get_global_id(0)]++;
}
let hist2 [n] 'a (k: i32) (op: a -> a -> a) (ne: a) (is: [n](i32, i32)) (vs: [n](a,a)) : [k]a =
let x = reduce_by_index (replicate k ne) op ne (map (.0) is) (map (.0) vs)
let y = reduce_by_index (replicate k ne) op ne (map (.1) is) (map (.1) vs)
in map2 op x y
let get_keys_from_image [rows][cols] 'a 'b (f: (u8,u8) -> a -> b) (img: [rows][cols]a): []b =
let size = rows*cols
let n_r = \r -> (norm_to_u8 r rows)
let n_c = \c -> (norm_to_u8 c cols)
let xys = tabulate_2d rows cols (\r c -> (n_r r, n_c c)) |> flatten_to size
let img = flatten_to size img
in map2 (\a b -> f a b) xys img
let get_keys_from_lum_image =
-- $ futhark bench imgrep.fut --backend=opencl
-- Compiling imgrep.fut...
-- Results for imgrep.fut:test_a:
-- dataset [2000][2000][3]u8: 240.00μs (avg. of 10 runs; RSD: 0.08)
-- Results for imgrep.fut:test_b:
-- dataset [2000][2000]u8 [2000][2000]u8 [2000][2000]u8: 172.90μs (avg. of 10 runs; RSD: 0.01)
-- Results for imgrep.fut:test_c:
-- dataset [2000][2000]u32: 216.70μs (avg. of 10 runs; RSD: 0.01)
-- ==
-- $ futhark bench imgrep.fut
-- Compiling imgrep.fut...
-- Results for imgrep.fut:test_a:
-- dataset [1000][1000][3]u8: 7739.90μs (avg. of 10 runs; RSD: 0.03)
-- Results for imgrep.fut:test_b:
-- dataset [1000][1000]u8 [1000][1000]u8 [1000][1000]u8: 6137.70μs (avg. of 10 runs; RSD: 0.10)
-- Results for imgrep.fut:test_c:
-- dataset [1000][1000]u32: 1073.40μs (avg. of 10 runs; RSD: 0.24)
-- ==
cabal-version: >=1.10
name: futhark-ad
version: 0.1.0.0
license-file: LICENSE
author: Troels Henriksen
maintainer: athas@sigkill.dk
build-type: Simple
extra-source-files: CHANGELOG.md
module Main (main) where
import Control.Monad.State
import System.Environment (getArgs)
import qualified Futhark.Compiler as Compiler
import qualified Futhark.Internalise.Defunctorise as Defunctorise
import qualified Futhark.Internalise.Defunctionalise as Defunctionalise
import qualified Futhark.Internalise.Monomorphise as Monomorphise
import Futhark.Util.Pretty (pretty)
int futhark_mc_parloop_44110(void *args, int start, int end, int ntask_44065)
{
struct futhark_mc_parloop_struct_44109 *futhark_mc_parloop_struct_44109 =
(struct futhark_mc_parloop_struct_44109 *) args;
struct futhark_context *ctx = futhark_mc_parloop_struct_44109->ctx;
int64_t futhark_mc_parloop_44110_start, futhark_mc_parloop_44110_end;
if (ctx->profiling)
futhark_mc_parloop_44110_start = get_wall_time();
aabbHit Raytracing Raytracing.hs:(74,1)-(98,41) 67.2 0.0
objsHit Raytracing Raytracing.hs:(101,1)-(110,63) 25.8 61.5
sphereHit Raytracing Raytracing.hs:(50,1)-(69,54) 4.2 13.3
rayColour Raytracing Raytracing.hs:(159,1)-(170,64) 0.7 3.6
sched Control.Monad.Par.Scheds.TraceInternal Control/Monad/Par/Scheds/TraceInternal.hs:(60,1)-(106,18) 0.3 6.1
mkImage Image Image.hs:(33,1)-(36,64) 0.1 2.2
spawn Control.Monad.Par.Scheds.Trace Control/Monad/Par/Scheds/Trace.hs:43:1-55 0.1 1.7
get Control.Monad.Par.Scheds.TraceInternal Control/Monad/Par/Scheds/TraceInternal.hs:336:1-27 0.0 1.2
parMap Control.Monad.Par.Combinato
Sun Apr 12 12:40 2020 Time and Allocation Profiling Report (Final)
bench-ray +RTS -p -RTS rendering/rgbbox/200x200
total time = 15.28 secs (15281 ticks @ 1000 us, 1 processor)
total alloc = 8,437,413,448 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc
aabbHit Raytracing Raytracing.hs:(73,1)-(97,41) 46.1 0.0