Skip to content

Instantly share code, notes, and snippets.

View syoyo's full-sized avatar
💗
ray tracing

Syoyo Fujita syoyo

💗
ray tracing
View GitHub Profile
@syoyo
syoyo / gist:1414870
Created December 1, 2011 08:09
Mount Solaris SRP target with Linux SRP initiator!
scsi host6: ib_srp: new target: id_ext 0002c90200201e28 ioc_guid 0002c90200201e28 pkey ffff service_id 0002c90200201e28 dgid fe80:0000:0000:0000:0002:c902:0020:1e29
scsi6 : SRP.T10:0002C90200201E28
scsi 6:0:0:2: Direct-Access OI COMSTAR 1.0 PQ: 0 ANSI: 5
sd 6:0:0:2: Attached scsi generic sg2 type 0
sd 6:0:0:2: [sdc] 754974720 512-byte logical blocks: (386 GB/360 GiB)
sd 6:0:0:2: [sdc] Write Protect is off
sd 6:0:0:2: [sdc] Mode Sense: 53 00 00 00
sd 6:0:0:2: [sdc] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
sdc: sdc1 sdc2
sd 6:0:0:2: [sdc] Attached SCSI disk
#ifdef __arm__
typedef float32x4_t float4;
typedef int32x4_t int4;
typedef uint32x4_t uint4;
#else
typedef __m128 float4;
typedef __m128i int4;
#if defined(__AVX__)
typedef __m256 float8;
typedef __m256d double4;
@syoyo
syoyo / gist:4665840
Created January 29, 2013 17:05
valgrind --tool=memcheck --leak-check=full ../../ispc --emit-c++ --target=generic-16 -h ao_ispc.h -o bora.cc ao.ispc
This file has been truncated, but you can view the full file.
==20402== Memcheck, a memory error detector
==20402== Copyright (C) 2002-2009, and GNU GPL'd, by Julian Seward et al.
==20402== Using Valgrind-3.5.0 and LibVEX; rerun with -h for copyright info
==20402== Command: ../../ispc --emit-c++ --target=generic-16 -h ao_ispc.h -o bora.cc ao.ispc
==20402==
==20402== Conditional jump or move depends on uninitialised value(s)
==20402== at 0xBC0085: clang::SourceManager::getColumnNumber(clang::FileID, unsigned int, bool*) const (in /home/syoyo/work/ispc/ispc)
==20402== by 0xBC3838: clang::SourceManager::getPresumedLoc(clang::SourceLocation) const (in /home/syoyo/work/ispc/ispc)
==20402== by 0x50734D: (anonymous namespace)::PrintPPOutputPPCallbacks::FileChanged(clang::SourceLocation, clang::PPCallbacks::FileChangeReason, clang::SrcMgr::CharacteristicKind, clang::FileID) (in /home/syoyo/work/ispc/ispc)
==20402== by 0xC17F3B: clang::Preprocessor::HandleDigitDirective(clang::Token&) (in /home/syoyo/work/ispc/ispc)
@syoyo
syoyo / gist:4945410
Created February 13, 2013 15:30
clang's vector extension -> ARM NEON codegen test.
typedef float float4 __attribute__((ext_vector_type(4)));
void
clang_test(float* out, float* in)
{
float4 a, b;
b = *((float4*)in);
a = b * b;
@syoyo
syoyo / gist:4945456
Created February 13, 2013 15:35
clang's ARM NEON codegen test.
void
clang_test(float* out, float* in)
{
for (int i = 0; i < 4; i++) {
out[i] = in[i] * in[i];
}
}
// asm output
@syoyo
syoyo / gist:7245357
Created October 31, 2013 07:01
Bullet3 on Radeon R9 280X(1050 MHz OC model)
Demo settings:
SelectedDemo=1, demoname = BoxBox
x_dim=30, y_dim=30, z_dim=30
x_gap=16.299999, y_gap=6.300000, z_gap=16.299999
OpenCL settings:
Preferred cl_device index 1
Preferred cl_platform index-1
Platform info:
@syoyo
syoyo / gist:8399386
Created January 13, 2014 12:21
pocl CLinfo.
CL_DEVICE_NAME: pthread-Intel(R) Xeon(R) CPU E5-2670 0 @ 2.60GHz
CL_DEVICE_VENDOR:
CL_DEVICE_OPENCL_C_VERSION: OpenCL C 1.2
CL_DEVICE_PROFILE: FULL_PROFILE
CL_DEVICE_VERSION: OpenCL 1.2 pocl
CL_DRIVER_VERSION: 0.9
CL_DEVICE_EXTENSIONS: cl_khr_fp64 cl_khr_fp16 cl_khr_byte_addressable_store
CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS: 3
CL_DEVICE_IMAGE_SUPPORT: 1
CL_DEVICE_IMAGE2D_MAX_WIDTH: 8192
@syoyo
syoyo / gist:23569871d2557a68f452
Created January 12, 2015 04:38
Simple pipeline analysis
// == sim start ==
00000001 : fmadd %f102, %f66, %f64, %f112
00000002 : fmadd %f2, %f6, %f4, %f64
-- STALL -- : f112(5), f64(6)
-- STALL -- : f112(4), f64(5)
-- STALL -- : f112(3), f64(4)
-- STALL -- : f112(2), f64(3)
-- STALL -- : f112(1), f64(2)
-- STALL -- : f64(1)
@syoyo
syoyo / gist:07dc264f4c5952a456be
Created April 4, 2015 15:48
exp() approximation in HPC-ACE(const value loading is not yet optimised)
#include <cstdio>
#include <cmath>
#include <emmintrin.h>
#define _mm_set1_pd(x) _mm_set_pd((x), (x))
// Based on http://www.chokkan.org/blog/archives/340
inline __m128d myexp(__m128d v)
{
@syoyo
syoyo / gist:2f89e50edd74d4179d03
Created April 4, 2015 16:39
SIMD log2() approximate function in HPC-ACE(not yet optimised)
// Based on glsl-sse2
inline __m128d mylog2(const __m128d v) {
int ibuf[4];
__m128d o = _mm_set_pd(1.0, 1.0);
__m128i infVal = _mm_set_epi32(0x7FF00000, 0x00000000, 0x7FF00000, 0x00000000);
__m128d c = *(reinterpret_cast<__m128d*>(&infVal));
__m128d f = _mm_sub_pd(_mm_or_pd(_mm_andnot_pd(c, v),
_mm_and_pd(c, o)), o);
//const __m128i iVal = *(reinterpret_cast<const __m128i*>(&v));
//__m128i a = _mm_sub_epi32(_mm_srli_epi32(iVal, 20),