Created
July 8, 2010 00:16
-
-
Save djg/467475 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// One of many test.cpp I found. I think this one was for examining the generated assembly. | |
#include "xmmintrin.h" | |
#if 0 | |
struct Vec3 | |
{ | |
float x, y, z; | |
__forceinline Vec3() {}; | |
__forceinline Vec3(float X, float Y, float Z) : x(X), y(Y), z(Z) {}; | |
__forceinline Vec3 operator*(const Vec3& t) const { return Vec3(x*t.x, y*t.y, z*t.z); }; | |
__forceinline Vec3 operator+(const Vec3& t) const { return Vec3(x+t.x, y+t.y, z+t.z); }; | |
}; | |
typedef const Vec3& Vec3Arg; | |
typedef Vec3 Vec3Res; | |
#endif | |
#if 1 | |
struct Vec3 | |
{ | |
// union | |
// { | |
__m128 vMM; | |
// struct { float x, y, z; }; | |
// }; | |
__forceinline Vec3() {}; | |
// __forceinline Vec3(float X, float Y, float Z) : x(X), y(Y), z(Z) {}; | |
__forceinline Vec3(__m128 t) : vMM(t) {}; | |
__forceinline Vec3(Vec3& v) : vMM(v.vMM) {}; | |
__forceinline Vec3& operator=(Vec3& v) { vMM = v.vMM; return *this; }; | |
__forceinline Vec3 operator*(const Vec3& t) const { return Vec3(_mm_mul_ps(vMM, t.vMM)); }; | |
__forceinline Vec3 operator+(const Vec3& t) const { return Vec3(_mm_add_ps(vMM, t.vMM)); }; | |
}; | |
typedef const Vec3 Vec3Arg; | |
typedef const Vec3 Vec3Res; | |
#endif | |
#if 0 | |
typedef __m128 Vec3; | |
typedef __m128 Vec3Arg; | |
typedef __m128 Vec3Res; | |
__forceinline Vec3Res operator+(Vec3Arg a, Vec3Arg b) { return _mm_add_ps(a,b); }; | |
__forceinline Vec3Res operator*(Vec3Arg a, Vec3Arg b) { return _mm_mul_ps(a,b); }; | |
#endif | |
__declspec(noinline) Vec3Res MulAdd(Vec3Arg a, Vec3Arg b, Vec3Arg c) | |
{ | |
return a * b + c; | |
} | |
// Dump of file test.obj | |
// File Type: COFF OBJECT | |
// | |
// ?MulAdd@@YA?AUVec3@@ABU1@00@Z (struct Vec3 __cdecl MulAdd(struct Vec3 const &,struct Vec3 const &,struct Vec3 const &)): | |
// 00000000: 8B 44 24 08 mov eax,dword ptr [esp+8] | |
// 00000004: 8B 4C 24 0C mov ecx,dword ptr [esp+0Ch] | |
// 00000008: F3 0F 10 00 movss xmm0,dword ptr [eax] | |
// 0000000C: F3 0F 10 09 movss xmm1,dword ptr [ecx] | |
// 00000010: F3 0F 10 51 04 movss xmm2,dword ptr [ecx+4] | |
// 00000015: F3 0F 10 59 08 movss xmm3,dword ptr [ecx+8] | |
// 0000001A: 8B 4C 24 10 mov ecx,dword ptr [esp+10h] | |
// 0000001E: 0F 5A C0 cvtps2pd xmm0,xmm0 | |
// 00000021: 0F 5A C9 cvtps2pd xmm1,xmm1 | |
// 00000024: F2 0F 59 C1 mulsd xmm0,xmm1 | |
// 00000028: F3 0F 10 48 04 movss xmm1,dword ptr [eax+4] | |
// 0000002D: 66 0F 5A C0 cvtpd2ps xmm0,xmm0 | |
// 00000031: 0F 5A D2 cvtps2pd xmm2,xmm2 | |
// 00000034: 0F 5A C9 cvtps2pd xmm1,xmm1 | |
// 00000037: F2 0F 59 CA mulsd xmm1,xmm2 | |
// 0000003B: F3 0F 10 50 08 movss xmm2,dword ptr [eax+8] | |
// 00000040: 8B 44 24 04 mov eax,dword ptr [esp+4] | |
// 00000044: F3 0F 5A C0 cvtss2sd xmm0,xmm0 | |
// 00000048: 0F 5A DB cvtps2pd xmm3,xmm3 | |
// 0000004B: 0F 5A D2 cvtps2pd xmm2,xmm2 | |
// 0000004E: F2 0F 59 D3 mulsd xmm2,xmm3 | |
// 00000052: F3 0F 10 19 movss xmm3,dword ptr [ecx] | |
// 00000056: 0F 5A DB cvtps2pd xmm3,xmm3 | |
// 00000059: F2 0F 58 D8 addsd xmm3,xmm0 | |
// 0000005D: 66 0F 5A C3 cvtpd2ps xmm0,xmm3 | |
// 00000061: F3 0F 11 00 movss dword ptr [eax],xmm0 | |
// 00000065: F3 0F 10 41 04 movss xmm0,dword ptr [ecx+4] | |
// 0000006A: 0F 5A C0 cvtps2pd xmm0,xmm0 | |
// 0000006D: 66 0F 5A C9 cvtpd2ps xmm1,xmm1 | |
// 00000071: F3 0F 5A C9 cvtss2sd xmm1,xmm1 | |
// 00000075: F2 0F 58 C1 addsd xmm0,xmm1 | |
// 00000079: 66 0F 5A C0 cvtpd2ps xmm0,xmm0 | |
// 0000007D: F3 0F 11 40 04 movss dword ptr [eax+4],xmm0 | |
// 00000082: F3 0F 10 41 08 movss xmm0,dword ptr [ecx+8] | |
// 00000087: 66 0F 5A D2 cvtpd2ps xmm2,xmm2 | |
// 0000008B: 0F 5A C0 cvtps2pd xmm0,xmm0 | |
// 0000008E: F3 0F 5A CA cvtss2sd xmm1,xmm2 | |
// 00000092: F2 0F 58 C1 addsd xmm0,xmm1 | |
// 00000096: 66 0F 5A C0 cvtpd2ps xmm0,xmm0 | |
// 0000009A: F3 0F 11 40 08 movss dword ptr [eax+8],xmm0 | |
// 0000009F: C3 ret | |
// File Type: COFF OBJECT | |
// | |
// ?MulAdd@@YA?BUVec3@@ABU1@00@Z (struct Vec3 const __cdecl MulAdd(struct Vec3 const &,struct Vec3 const &,struct Vec3 const &)): | |
// 00000000: 55 push ebp | |
// 00000001: 8B EC mov ebp,esp | |
// 00000003: 83 E4 F0 and esp,0FFFFFFF0h | |
// 00000006: 8B 4D 0C mov ecx,dword ptr [ebp+0Ch] | |
// 00000009: 0F 28 01 movaps xmm0,xmmword ptr [ecx] | |
// 0000000C: 8B 55 10 mov edx,dword ptr [ebp+10h] | |
// 0000000F: 0F 28 0A movaps xmm1,xmmword ptr [edx] | |
// 00000012: 8B 4D 14 mov ecx,dword ptr [ebp+14h] | |
// 00000015: 8B 45 08 mov eax,dword ptr [ebp+8] | |
// 00000018: 0F 59 C1 mulps xmm0,xmm1 | |
// 0000001B: 0F 28 09 movaps xmm1,xmmword ptr [ecx] | |
// 0000001E: 0F 58 C1 addps xmm0,xmm1 | |
// 00000021: 0F 29 00 movaps xmmword ptr [eax],xmm0 | |
// 00000024: 8B E5 mov esp,ebp | |
// 00000026: 5D pop ebp | |
// 00000027: C3 ret | |
// File Type: COFF OBJECT | |
// ?MulAdd@@YA?AT__m128@@T1@00@Z (union __m128 __cdecl MulAdd(union __m128,union __m128,union __m128)): | |
// 00000000: 0F 59 C1 mulps xmm0,xmm1 | |
// 00000003: 0F 58 C2 addps xmm0,xmm2 | |
// 00000006: C3 ret |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment