This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from typing import Optional, Union | |
import os | |
import numpy as np | |
import torch | |
import triton | |
import triton.language as tl | |
import math | |
from triton.runtime.jit import TensorWrapper, reinterpret | |
from numpy.random import RandomState |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/include/triton/Conversion/CMakeLists.txt b/include/triton/Conversion/CMakeLists.txt | |
index ae31ac93..691104f3 100644 | |
--- a/include/triton/Conversion/CMakeLists.txt | |
+++ b/include/triton/Conversion/CMakeLists.txt | |
@@ -1,4 +1,4 @@ | |
-add_subdirectory(TritonCPUToLLVM) | |
+# add_subdirectory(TritonCPUToLLVM) | |
add_subdirectory(TritonGPUToLLVM) | |
-add_subdirectory(TritonToTritonCPU) | |
+# add_subdirectory(TritonToTritonCPU) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
This is a version (aka dlmalloc) of malloc/free/realloc written by | |
Doug Lea and released to the public domain, as explained at | |
http://creativecommons.org/publicdomain/zero/1.0/ Send questions, | |
comments, complaints, performance data, etc to [email protected] | |
* Version 2.8.6 Wed Aug 29 06:57:58 2012 Doug Lea | |
Note: There may be an updated version of this malloc obtainable at | |
ftp://gee.cs.oswego.edu/pub/misc/malloc.c | |
Check before installing! |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define i32 @_Z4testv() #0 { | |
entry: | |
%a = call i32 @get() | |
%b = call i32 @get() | |
%b = call i32 @process(i32 %a, i32 %b) | |
ret i32 %b | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
%a = call i32 @get() | |
%b = call i32 @get() | |
%xor = xor i32 %b, %a | |
%xor1 = xor i32 %a, %xor | |
; => %xor1 = %a ^ %xor | |
; => %xor1 = %a ^ (%b ^ %a) ; a ^ (b ^ a) = b ^ 0 | |
; => %xor1 = %b ^ 0 ; b ^ 0 = b | |
; => %xor1 = %b ; 이후 %xor1 사용처를 모두 %b로 바꿈 | |
; => %xor1 삭제 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define i32 @_Z4testv() #0 { | |
entry: | |
%call = call i32 @_Z3getv() ; a = get(); | |
%call1 = call i32 @_Z3getv() ; b = get(); | |
; temp_swap(a, b)는 사라짐 | |
%xor.i = xor i32 %call1, %call ; xor_swap(a, b)가 xor_swap(b, a)로 바뀜 | |
%xor1.i = xor i32 %call, %xor.i | |
%xor2.i = xor i32 %xor.i, %xor1.i | |
%call2 = call i32 @_Z7processii(i32 %xor2.i, i32 %xor1.i) | |
ret i32 %call2 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define linkonce_odr void @void temp_swap<int>(int&, int&)(i32* %a, i32* %b) #2 { | |
entry: ; %0 | |
%0 = load i32, i32* %a, align 4 ; W %1 | |
%1 = load i32, i32* %b, align 4 ; | W | |
store i32 %1, i32* %a, align 4 ; R | | |
store i32 %0, i32* %b, align 4 ; R | |
ret void | |
} | |
define linkonce_odr void @void xor_swap<int>(int&, int&)(i32* %a, i32* %b) #2 { |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define i32 @_Z4testv() #0 { | |
entry: | |
%a = alloca i32, align 4 | |
%b = alloca i32, align 4 | |
%call = call i32 @_Z3getv() ; int a = get(); | |
store i32 %call, i32* %a, align 4 | |
%call1 = call i32 @_Z3getv() ; int b = get(); | |
store i32 %call1, i32* %b, align 4 | |
%0 = load i32, i32* %a, align 4 ; temp_swap(a, b); | |
%1 = load i32, i32* %b, align 4 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ ./opt -O3 -debug-pass=Structure -o swap.opt.bc swap.bc | |
Pass Arguments: -tti -tbaa -scoped-noalias -assumption-cache-tracker -targetlibinfo -verify -simplifycfg -domtree -sroa -early-cse -basicaa -aa -memdep -memoryssa -gvn-hoist -lower-expect | |
Target Transform Information | |
Type-Based Alias Analysis | |
Scoped NoAlias Alias Analysis | |
Assumption Cache Tracker | |
Target Library Information | |
FunctionPass Manager | |
Module Verifier | |
Simplify the CFG |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
define linkonce_odr void @void temp_swap<int>(int&, int&)(i32* %a, i32* %b) #2 { | |
entry: ; %0 | |
%0 = load i32, i32* %a, align 4 ; W %1 | |
%1 = load i32, i32* %b, align 4 ; | W | |
store i32 %1, i32* %a, align 4 ; R | | |
store i32 %0, i32* %b, align 4 ; R | |
ret void | |
} | |
define linkonce_odr void @void xor_swap<int>(int&, int&)(i32* %a, i32* %b) #2 { |
NewerOlder