Skip to content

Instantly share code, notes, and snippets.

@tmbdev
Created August 12, 2022 01:48
Show Gist options
  • Select an option

  • Save tmbdev/b1ed5c3964fc4ded0d530dc2869e0aaf to your computer and use it in GitHub Desktop.

Select an option

Save tmbdev/b1ed5c3964fc4ded0d530dc2869e0aaf to your computer and use it in GitHub Desktop.
using Enzyme
using CUDA
using CUDAKernels
using KernelAbstractions
using KernelGradients
using Test
CUDA.allowscalar(false)
device = CUDA.device()
@show device
##
d = 10
n = 20
m = 25
a, b, result = rand(Float32, d, n), rand(Float32, d, m), zeros(Float32, n, m)
ca, cb, cresult = CuArray(a), CuArray(b), CuArray(result)
function kconfig(a::Array)
return (CPU(), 4)
end
function kconfig(a::CuArray)
return (CUDADevice(), 256)
end
function same_device(xs...)
device = KernelAbstractions.get_device(xs[1])
for x in xs
if KernelAbstractions.get_device(x) != device
return false
end
end
return true
end
@kernel function dist_kernel!(@Const(x), @Const(y), result)
i, j = @index(Global, NTuple)
tmp = zero(eltype(result))
for k = 1:size(x, 1)
tmp += (x[k, i] - y[k, j]) ^ 4
end
tmp = tmp ^ (1.0 / 4)
result[i, j] = tmp
end
function dist!(x, y, result)
@assert size(x, 1) == size(y, 1)
@assert size(x, 2) == size(result, 1)
@assert size(y, 2) == size(result, 2)
@assert same_device(x, y, result)
kernel! = dist_kernel!(kconfig(x)...)
ev = kernel!(x, y, result; ndrange=size(result))
wait(ev)
end
function Δdist!(x, y, result, ∂y)
@assert same_device(x, y, result, ∂y)
deriv = Enzyme.autodiff(dist_kernel!(kconfig(x)...))
ev = deriv(x, Duplicated(y, ∂y), result; ndrange=size(result))
wait(ev)
end
println("cpu")
@time dist!(a, b, result)
@time dist!(a, b, result)
∂b = zero(b)
@time Δdist!(a, b, result, ∂b)
@time Δdist!(a, b, result, ∂b)
println("gpu")
@time dist!(ca, cb, cresult)
@time dist!(ca, cb, cresult)
c∂b = zero(cb)
@time Δdist!(ca, cb, cresult, c∂b)
@time Δdist!(ca, cb, cresult, c∂b)
# This file is machine-generated - editing it directly is not advised
julia_version = "1.7.3"
manifest_format = "2.0"
[[deps.AbstractFFTs]]
deps = ["ChainRulesCore", "LinearAlgebra"]
git-tree-sha1 = "69f7020bd72f069c219b5e8c236c1fa90d2cb409"
uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c"
version = "1.2.1"
[[deps.Adapt]]
deps = ["LinearAlgebra"]
git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f"
uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e"
version = "3.4.0"
[[deps.ArgTools]]
uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f"
[[deps.Artifacts]]
uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33"
[[deps.Atomix]]
deps = ["UnsafeAtomics"]
git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be"
uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458"
version = "0.1.0"
[[deps.BFloat16s]]
deps = ["LinearAlgebra", "Printf", "Random", "Test"]
git-tree-sha1 = "a598ecb0d717092b5539dbbe890c98bac842b072"
uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b"
version = "0.2.0"
[[deps.Base64]]
uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f"
[[deps.CEnum]]
git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90"
uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82"
version = "0.4.2"
[[deps.CUDA]]
deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"]
git-tree-sha1 = "49549e2c28ffb9cc77b3689dc10e46e6271e9452"
uuid = "052768ef-5323-5732-b1bb-66c8b64840ba"
version = "3.12.0"
[[deps.CUDAKernels]]
deps = ["Adapt", "CUDA", "KernelAbstractions", "StaticArrays", "UnsafeAtomicsLLVM"]
git-tree-sha1 = "bbab4d1a4001ec322c384dfff0889cec4118da93"
uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
version = "0.4.3"
[[deps.ChainRulesCore]]
deps = ["Compat", "LinearAlgebra", "SparseArrays"]
git-tree-sha1 = "80ca332f6dcb2508adba68f22f551adb2d00a624"
uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4"
version = "1.15.3"
[[deps.ChangesOfVariables]]
deps = ["ChainRulesCore", "LinearAlgebra", "Test"]
git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8"
uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0"
version = "0.1.4"
[[deps.Compat]]
deps = ["Dates", "LinearAlgebra", "UUIDs"]
git-tree-sha1 = "924cdca592bc16f14d2f7006754a621735280b74"
uuid = "34da2185-b29b-5c13-b0c7-acf172513d20"
version = "4.1.0"
[[deps.CompilerSupportLibraries_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae"
[[deps.Dates]]
deps = ["Printf"]
uuid = "ade2ca70-3891-5945-98fb-dc099432e06a"
[[deps.DiffRules]]
deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"]
git-tree-sha1 = "28d605d9a0ac17118fe2c5e9ce0fbb76c3ceb120"
uuid = "b552c78f-8df3-52c6-915a-8e097449b14b"
version = "1.11.0"
[[deps.DocStringExtensions]]
deps = ["LibGit2"]
git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9"
uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae"
version = "0.9.1"
[[deps.Downloads]]
deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"]
uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
[[deps.Enzyme]]
deps = ["Adapt", "CEnum", "Enzyme_jll", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "ObjectFile", "Printf", "Random"]
git-tree-sha1 = "8ab9eb44fbcfc9161b3f81be7814a7618f2a3460"
uuid = "7da242da-08ed-463a-9acd-ee780be4f1d9"
version = "0.10.4"
[[deps.Enzyme_jll]]
deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"]
git-tree-sha1 = "722aa3b554e883118e0e3111629ec40e176cee2c"
uuid = "7cc45869-7501-5eee-bdea-0790c847d4ef"
version = "0.0.33+0"
[[deps.ExprTools]]
git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d"
uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04"
version = "0.1.8"
[[deps.FileWatching]]
uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee"
[[deps.GPUArrays]]
deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"]
git-tree-sha1 = "73145f1d724b5ee0e90098aec39a65e9697429a6"
uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7"
version = "8.4.2"
[[deps.GPUArraysCore]]
deps = ["Adapt"]
git-tree-sha1 = "d88b17a38322e153c519f5a9ed8d91e9baa03d8f"
uuid = "46192b85-c4d5-4398-a991-12ede77f4527"
version = "0.1.1"
[[deps.GPUCompiler]]
deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"]
git-tree-sha1 = "122d7bcc92abf94cf1a86281ad7a4d0e838ab9e0"
uuid = "61eb1bfa-7361-4325-ad38-22787b887f55"
version = "0.16.3"
[[deps.InteractiveUtils]]
deps = ["Markdown"]
uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240"
[[deps.InverseFunctions]]
deps = ["Test"]
git-tree-sha1 = "b3364212fb5d870f724876ffcd34dd8ec6d98918"
uuid = "3587e190-3f89-42d0-90ee-14403ec27112"
version = "0.1.7"
[[deps.IrrationalConstants]]
git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151"
uuid = "92d709cd-6900-40b7-9082-c6be49f344b6"
version = "0.1.1"
[[deps.JLLWrappers]]
deps = ["Preferences"]
git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1"
uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210"
version = "1.4.1"
[[deps.KernelAbstractions]]
deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"]
git-tree-sha1 = "02838ecfc5f925ac408ffe6b359ac59ef8865272"
uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
version = "0.8.3"
[[deps.KernelGradients]]
deps = ["Enzyme", "KernelAbstractions"]
git-tree-sha1 = "6dbcc9f869625fa50e1c7483f1c4200c65f17f9c"
uuid = "e5faadeb-7f6c-408e-9747-a7a26e81c66a"
version = "0.1.2"
[[deps.LLVM]]
deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"]
git-tree-sha1 = "e7e9184b0bf0158ac4e4aa9daf00041b5909bf1a"
uuid = "929cbde3-209d-540e-8aea-75f648917ca0"
version = "4.14.0"
[[deps.LLVMExtra_jll]]
deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"]
git-tree-sha1 = "771bfe376249626d3ca12bcd58ba243d3f961576"
uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab"
version = "0.0.16+0"
[[deps.LazyArtifacts]]
deps = ["Artifacts", "Pkg"]
uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3"
[[deps.LibCURL]]
deps = ["LibCURL_jll", "MozillaCACerts_jll"]
uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21"
[[deps.LibCURL_jll]]
deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"]
uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0"
[[deps.LibGit2]]
deps = ["Base64", "NetworkOptions", "Printf", "SHA"]
uuid = "76f85450-5226-5b5a-8eaa-529ad045b433"
[[deps.LibSSH2_jll]]
deps = ["Artifacts", "Libdl", "MbedTLS_jll"]
uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8"
[[deps.Libdl]]
uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
[[deps.LinearAlgebra]]
deps = ["Libdl", "libblastrampoline_jll"]
uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e"
[[deps.LogExpFunctions]]
deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"]
git-tree-sha1 = "361c2b088575b07946508f135ac556751240091c"
uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688"
version = "0.3.17"
[[deps.Logging]]
uuid = "56ddb016-857b-54e1-b83d-db4d58db5568"
[[deps.MacroTools]]
deps = ["Markdown", "Random"]
git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf"
uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09"
version = "0.5.9"
[[deps.Markdown]]
deps = ["Base64"]
uuid = "d6f4376e-aef5-505a-96c1-9c027394607a"
[[deps.MbedTLS_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1"
[[deps.MozillaCACerts_jll]]
uuid = "14a3606d-f60d-562e-9121-12d972cd8159"
[[deps.NaNMath]]
deps = ["OpenLibm_jll"]
git-tree-sha1 = "a7c3d1da1189a1c2fe843a3bfa04d18d20eb3211"
uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3"
version = "1.0.1"
[[deps.NetworkOptions]]
uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908"
[[deps.ObjectFile]]
deps = ["Reexport", "StructIO"]
git-tree-sha1 = "55ce61d43409b1fb0279d1781bf3b0f22c83ab3b"
uuid = "d8793406-e978-5875-9003-1fc021f44a92"
version = "0.3.7"
[[deps.OpenBLAS_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"]
uuid = "4536629a-c528-5b80-bd46-f80d51c5b363"
[[deps.OpenLibm_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "05823500-19ac-5b8b-9628-191a04bc5112"
[[deps.OpenSpecFun_jll]]
deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"]
git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1"
uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e"
version = "0.5.5+0"
[[deps.Pkg]]
deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"]
uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f"
[[deps.Preferences]]
deps = ["TOML"]
git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d"
uuid = "21216c6a-2e73-6563-6e65-726566657250"
version = "1.3.0"
[[deps.Printf]]
deps = ["Unicode"]
uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7"
[[deps.REPL]]
deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"]
uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb"
[[deps.Random]]
deps = ["SHA", "Serialization"]
uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
[[deps.Random123]]
deps = ["Random", "RandomNumbers"]
git-tree-sha1 = "7a1a306b72cfa60634f03a911405f4e64d1b718b"
uuid = "74087812-796a-5b5d-8853-05524746bad3"
version = "1.6.0"
[[deps.RandomNumbers]]
deps = ["Random", "Requires"]
git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111"
uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143"
version = "1.5.3"
[[deps.Reexport]]
git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b"
uuid = "189a3867-3050-52da-a836-e630ba90ab69"
version = "1.2.2"
[[deps.Requires]]
deps = ["UUIDs"]
git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7"
uuid = "ae029012-a4dd-5104-9daa-d747884805df"
version = "1.3.0"
[[deps.SHA]]
uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce"
[[deps.Serialization]]
uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b"
[[deps.Sockets]]
uuid = "6462fe0b-24de-5631-8697-dd941f90decc"
[[deps.SparseArrays]]
deps = ["LinearAlgebra", "Random"]
uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf"
[[deps.SpecialFunctions]]
deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"]
git-tree-sha1 = "d75bda01f8c31ebb72df80a46c88b25d1c79c56d"
uuid = "276daf66-3868-5448-9aa4-cd146d93841b"
version = "2.1.7"
[[deps.StaticArrays]]
deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"]
git-tree-sha1 = "23368a3313d12a2326ad0035f0db0c0966f438ef"
uuid = "90137ffa-7385-5640-81b9-e52037218182"
version = "1.5.2"
[[deps.StaticArraysCore]]
git-tree-sha1 = "66fe9eb253f910fe8cf161953880cfdaef01cdf0"
uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c"
version = "1.0.1"
[[deps.Statistics]]
deps = ["LinearAlgebra", "SparseArrays"]
uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2"
[[deps.StructIO]]
deps = ["Test"]
git-tree-sha1 = "010dc73c7146869c042b49adcdb6bf528c12e859"
uuid = "53d494c1-5632-5724-8f4c-31dff12d585f"
version = "0.3.0"
[[deps.TOML]]
deps = ["Dates"]
uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76"
[[deps.Tar]]
deps = ["ArgTools", "SHA"]
uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e"
[[deps.Test]]
deps = ["InteractiveUtils", "Logging", "Random", "Serialization"]
uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
[[deps.TimerOutputs]]
deps = ["ExprTools", "Printf"]
git-tree-sha1 = "464d64b2510a25e6efe410e7edab14fffdc333df"
uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f"
version = "0.5.20"
[[deps.Tullio]]
deps = ["ChainRulesCore", "DiffRules", "LinearAlgebra", "Requires"]
git-tree-sha1 = "859e2e9a7222553a0c052e423557cedb49376da9"
uuid = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
version = "0.3.4"
[[deps.UUIDs]]
deps = ["Random", "SHA"]
uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4"
[[deps.Unicode]]
uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5"
[[deps.UnsafeAtomics]]
git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278"
uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f"
version = "0.2.1"
[[deps.UnsafeAtomicsLLVM]]
deps = ["LLVM", "UnsafeAtomics"]
git-tree-sha1 = "33af9d2031d0dc09e2be9a0d4beefec4466def8e"
uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249"
version = "0.1.0"
[[deps.Zlib_jll]]
deps = ["Libdl"]
uuid = "83775a58-1f1d-513f-b197-d71354ab007a"
[[deps.libblastrampoline_jll]]
deps = ["Artifacts", "Libdl", "OpenBLAS_jll"]
uuid = "8e850b90-86db-534c-a0d3-1478176c7d93"
[[deps.nghttp2_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d"
[[deps.p7zip_jll]]
deps = ["Artifacts", "Libdl"]
uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0"
device = CuDevice(0)
cpu
0.177147 seconds (1.03 M allocations: 55.361 MiB, 4.22% gc time, 99.71% compilation time)
0.000385 seconds (15 allocations: 816 bytes)
10.042157 seconds (24.48 M allocations: 1.296 GiB, 3.90% gc time, 69.11% compilation time)
0.000470 seconds (141 allocations: 2.844 KiB)
gpu
10.081786 seconds (21.57 M allocations: 1.137 GiB, 4.98% gc time, 31.50% compilation time)
0.000098 seconds (62 allocations: 2.859 KiB)
ERROR: LoadError: Enzyme compilation failed.
Current scope:
; Function Attrs: willreturn mustprogress
define void @preprocess_julia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !474 {
entry:
%4 = alloca [2 x i64], align 8
%5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !475
%6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !475
%.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !475
%.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !475
store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !475
%.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !475
%.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !475
store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !475
%.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !475
%.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !475
store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !475
%.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !475
%.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !475
store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !475
%.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !475
%.fca.1.extract32 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 1, !dbg !475
%.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !475
%.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !475
%.fca.3.extract38 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 3, !dbg !475
%.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !475
%.fca.1.extract14 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 1, !dbg !475
%.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !475
%.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !475
%.fca.3.extract20 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 3, !dbg !475
%.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !475
%.fca.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 1, !dbg !475
%.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !475
%.fca.2.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 1, !dbg !475
%.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !475
%7 = bitcast [2 x i64]* %4 to i8*
call void @llvm.lifetime.start.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14
%8 = call {}*** @julia.get_pgcstack() #14
%9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !476, !range !65
%10 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !485
%11 = zext i32 %9 to i64, !dbg !499
%12 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !501
br i1 %10, label %pass.i, label %fail.i, !dbg !501
L302.i: ; preds = %pass.i
call fastcc void @julia___index_Global_NTuple_5574([2 x i64]* noalias nocapture noundef nonnull writeonly sret([2 x i64]) align 8 dereferenceable(16) %4, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(32) %6) #12, !dbg !504
%13 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !506
%14 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !509
%.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !510
%15 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !510
br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !514
L319.i.preheader: ; preds = %L302.i
%16 = load i64, i64* %13, align 8, !tbaa !301
%17 = icmp sgt i64 %.fca.2.1.extract36, 0
%18 = select i1 %17, i64 %.fca.2.1.extract36, i64 0
%19 = icmp sgt i64 %16, 0
%20 = icmp sle i64 %16, %18
%21 = and i1 %19, %20
%22 = add i64 %16, -1
%23 = mul i64 %22, %15
%24 = load i64, i64* %14, align 8
%25 = icmp sgt i64 %.fca.2.0.extract16, 0
%26 = select i1 %25, i64 %.fca.2.0.extract16, i64 0
%27 = icmp sgt i64 %.fca.2.1.extract18, 0
%28 = select i1 %27, i64 %.fca.2.1.extract18, i64 0
%29 = icmp sgt i64 %24, 0
%30 = icmp sle i64 %24, %28
%31 = and i1 %29, %30
%32 = add i64 %24, -1
%33 = mul i64 %32, %26
br label %L319.i, !dbg !515
L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader
%iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ]
%value_phi13.i = phi float [ %284, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ]
%iv.next = add nuw nsw i64 %iv, 1, !dbg !519
%34 = icmp ule i64 %iv.next, %15, !dbg !519
%35 = and i1 %34, %21, !dbg !524
br i1 %35, label %L340.i, label %L342.i, !dbg !515
L340.i: ; preds = %L319.i
%36 = add i64 %23, %iv.next, !dbg !525
%37 = shl i64 %36, 2, !dbg !532
%38 = add i64 %37, -4, !dbg !532
%39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !537
%40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !538
%41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !538
%42 = icmp ule i64 %iv.next, %26, !dbg !519
%43 = and i1 %42, %31, !dbg !524
br i1 %43, label %L405.i, label %L407.i, !dbg !515
L342.i: ; preds = %L319.i
%44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515
unreachable, !dbg !515
L405.i: ; preds = %L340.i
%45 = add i64 %33, %iv.next, !dbg !525
%46 = shl i64 %45, 2, !dbg !532
%47 = add i64 %46, -4, !dbg !532
%48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !537
%49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !538
%50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !538
%51 = fsub float %41, %50, !dbg !543
%52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not59 = icmp eq i32 %52, 0, !dbg !544
br i1 %.not59, label %__nv_fabsf.exit.i.i, label %54, !dbg !544
L407.i: ; preds = %L340.i
%53 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515
unreachable, !dbg !515
54: ; preds = %L405.i
%55 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit.i.i, !dbg !544
__nv_fabsf.exit.i.i: ; preds = %54, %L405.i
%.08.i.i = phi float [ %55, %54 ], [ 0.000000e+00, %L405.i ], !dbg !544
%56 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !544
%57 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not60 = icmp eq i32 %57, 0, !dbg !544
br i1 %.not60, label %60, label %58, !dbg !544
58: ; preds = %__nv_fabsf.exit.i.i
%59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit1.i.i, !dbg !544
60: ; preds = %__nv_fabsf.exit.i.i
%61 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit1.i.i, !dbg !544
__nv_fabsf.exit1.i.i: ; preds = %60, %58
%.09.i.i = phi float [ %59, %58 ], [ %61, %60 ], !dbg !544
%62 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not61 = icmp eq i32 %62, 0, !dbg !544
%63 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !544
%64 = fmul float %.09.i.i, 0x4170000000000000, !dbg !544
%65 = and i1 %63, %.not61, !dbg !544
%.116.i.i = select i1 %65, float %64, float %.09.i.i, !dbg !544
%expo.i.i.1.i.i = select i1 %65, float -1.510000e+02, float -1.270000e+02, !dbg !544
%66 = bitcast float %.116.i.i to i32, !dbg !544
%67 = and i32 %66, 8388607, !dbg !544
%68 = or i32 %67, 1065353216, !dbg !544
%69 = bitcast i32 %68 to float, !dbg !544
%70 = lshr i32 %66, 23, !dbg !544
%71 = uitofp i32 %70 to float, !dbg !544
%72 = fadd float %expo.i.i.1.i.i, %71, !dbg !544
%73 = fcmp ogt float %69, 0x3FF6A09E60000000, !dbg !544
%74 = fmul float %69, 5.000000e-01, !dbg !544
%75 = fadd float %72, 1.000000e+00, !dbg !544
%expo.i.i.2.i.i = select i1 %73, float %75, float %72, !dbg !544
%m.i.i.0.i.i = select i1 %73, float %74, float %69, !dbg !544
%76 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !544
%77 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !544
%78 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %77) #17, !dbg !544, !srcloc !362
%79 = fmul float %76, 2.000000e+00, !dbg !544
%80 = fmul float %78, %79, !dbg !544
%81 = fmul float %80, %80, !dbg !544
%82 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not62 = icmp eq i32 %82, 0, !dbg !544
br i1 %.not62, label %85, label %83, !dbg !544
83: ; preds = %__nv_fabsf.exit1.i.i
%84 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %81, float noundef 0x3F8995EC60000000) #16, !dbg !544
br label %__internal_fmad.exit.i.i.i.i, !dbg !544
85: ; preds = %__nv_fabsf.exit1.i.i
%86 = call float @llvm.fma.f32(float %81, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !544
br label %__internal_fmad.exit.i.i.i.i, !dbg !544
__internal_fmad.exit.i.i.i.i: ; preds = %85, %83
%.020.i.i = phi float [ %84, %83 ], [ %86, %85 ], !dbg !544
%87 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not63 = icmp eq i32 %87, 0, !dbg !544
br i1 %.not63, label %90, label %88, !dbg !544
88: ; preds = %__internal_fmad.exit.i.i.i.i
%89 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #16, !dbg !544
br label %__internal_fmad.exit3.i.i.i.i, !dbg !544
90: ; preds = %__internal_fmad.exit.i.i.i.i
%91 = call float @llvm.fma.f32(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #14, !dbg !544
br label %__internal_fmad.exit3.i.i.i.i, !dbg !544
__internal_fmad.exit3.i.i.i.i: ; preds = %90, %88
%.021.i.i = phi float [ %89, %88 ], [ %91, %90 ], !dbg !544
%92 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not64 = icmp eq i32 %92, 0, !dbg !544
br i1 %.not64, label %95, label %93, !dbg !544
93: ; preds = %__internal_fmad.exit3.i.i.i.i
%94 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %81) #16, !dbg !544
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544
95: ; preds = %__internal_fmad.exit3.i.i.i.i
%96 = fmul float %81, %.021.i.i, !dbg !544
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544
__nv_fmul_rn.exit4.i.i.i.i: ; preds = %95, %93
%.022.i.i = phi float [ %94, %93 ], [ %96, %95 ], !dbg !544
%97 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not65 = icmp eq i32 %97, 0, !dbg !544
br i1 %.not65, label %100, label %98, !dbg !544
98: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%99 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %80) #16, !dbg !544
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544
100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%101 = fmul float %80, %.022.i.i, !dbg !544
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544
__nv_fmul_rn.exit5.i.i.i.i: ; preds = %100, %98
%.024.i.i = phi float [ %99, %98 ], [ %101, %100 ], !dbg !544
%102 = fsub float %76, %80, !dbg !544
%103 = fmul float %102, 2.000000e+00, !dbg !544
%104 = fneg float %80, !dbg !544
%105 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not66 = icmp eq i32 %105, 0, !dbg !544
br i1 %.not66, label %108, label %106, !dbg !544
106: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%107 = call float @llvm.nvvm.fma.rn.ftz.f(float %104, float %76, float %103) #16, !dbg !544
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544
108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%109 = call float @llvm.fma.f32(float %104, float %76, float %103) #14, !dbg !544
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit.i.i.i.i: ; preds = %108, %106
%.025.i.i = phi float [ %107, %106 ], [ %109, %108 ], !dbg !544
%110 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not67 = icmp eq i32 %110, 0, !dbg !544
br i1 %.not67, label %113, label %111, !dbg !544
111: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%112 = call float @llvm.nvvm.mul.rn.ftz.f(float %78, float %.025.i.i) #16, !dbg !544
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544
113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%114 = fmul float %78, %.025.i.i, !dbg !544
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544
__nv_fmul_rn.exit6.i.i.i.i: ; preds = %113, %111
%.026.i.i = phi float [ %112, %111 ], [ %114, %113 ], !dbg !544
%115 = fadd float %80, %.024.i.i, !dbg !544
%116 = fsub float %80, %115, !dbg !544
%117 = fadd float %.024.i.i, %116, !dbg !544
%118 = fadd float %117, %.026.i.i, !dbg !544
%119 = fadd float %115, %118, !dbg !544
%120 = fsub float %115, %119, !dbg !544
%121 = fadd float %118, %120, !dbg !544
%122 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not68 = icmp eq i32 %122, 0, !dbg !544
br i1 %.not68, label %125, label %123, !dbg !544
123: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%124 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !544
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544
125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%126 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !544
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544
__nv_fmul_rn.exit2.i.i.i.i: ; preds = %125, %123
%.019.i.i = phi float [ %124, %123 ], [ %126, %125 ], !dbg !544
%127 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not69 = icmp eq i32 %127, 0, !dbg !544
br i1 %.not69, label %130, label %128, !dbg !544
128: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%129 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !544
br label %__internal_log_ep.exit.i.i.i, !dbg !544
130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%131 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !544
br label %__internal_log_ep.exit.i.i.i, !dbg !544
__internal_log_ep.exit.i.i.i: ; preds = %130, %128
%.018.i.i = phi float [ %129, %128 ], [ %131, %130 ], !dbg !544
%132 = fadd float %119, %.019.i.i, !dbg !544
%133 = fsub float %.019.i.i, %132, !dbg !544
%134 = fadd float %119, %133, !dbg !544
%135 = fadd float %121, %134, !dbg !544
%136 = fadd float %135, %.018.i.i, !dbg !544
%137 = fadd float %132, %136, !dbg !544
%138 = fsub float %132, %137, !dbg !544
%139 = fadd float %136, %138, !dbg !544
%140 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not70 = icmp eq i32 %140, 0, !dbg !544
br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %141, !dbg !544
141: ; preds = %__internal_log_ep.exit.i.i.i
%142 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit.i6.i.i, !dbg !544
__nv_fabsf.exit.i6.i.i: ; preds = %141, %__internal_log_ep.exit.i.i.i
%.027.i.i = phi float [ %142, %141 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !544
%143 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !544
%.013.i.i = select i1 %143, float 0x3F40000000000000, float 4.000000e+00, !dbg !544
%144 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not71 = icmp eq i32 %144, 0, !dbg !544
br i1 %.not71, label %147, label %145, !dbg !544
145: ; preds = %__nv_fabsf.exit.i6.i.i
%146 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %137) #16, !dbg !544
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544
147: ; preds = %__nv_fabsf.exit.i6.i.i
%148 = fmul float %137, %.013.i.i, !dbg !544
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544
__nv_fmul_rn.exit.i.i.i.i: ; preds = %147, %145
%.028.i.i = phi float [ %146, %145 ], [ %148, %147 ], !dbg !544
%149 = fneg float %.028.i.i, !dbg !544
%150 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not72 = icmp eq i32 %150, 0, !dbg !544
br i1 %.not72, label %153, label %151, !dbg !544
151: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%152 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %137, float %149) #16, !dbg !544
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544
153: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%154 = call float @llvm.fma.f32(float noundef %.013.i.i, float %137, float %149) #14, !dbg !544
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544
__nv_fmaf_rn.exit.i3.i.i.i: ; preds = %153, %151
%.029.i.i = phi float [ %152, %151 ], [ %154, %153 ], !dbg !544
%155 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not73 = icmp eq i32 %155, 0, !dbg !544
br i1 %.not73, label %158, label %156, !dbg !544
156: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%157 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %.029.i.i) #16, !dbg !544
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544
158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%159 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %.029.i.i) #14, !dbg !544
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit1.i.i.i.i: ; preds = %158, %156
%.030.i.i = phi float [ %157, %156 ], [ %159, %158 ], !dbg !544
%160 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not74 = icmp eq i32 %160, 0, !dbg !544
br i1 %.not74, label %163, label %161, !dbg !544
161: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%162 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %137, float %.030.i.i) #16, !dbg !544
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544
163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%164 = call float @llvm.fma.f32(float %137, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !544
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit2.i.i.i.i: ; preds = %163, %161
%.031.i.i = phi float [ %162, %161 ], [ %164, %163 ], !dbg !544
%165 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not75 = icmp eq i32 %165, 0, !dbg !544
br i1 %.not75, label %168, label %166, !dbg !544
166: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%167 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !544
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544
168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%169 = fadd float %.028.i.i, %.031.i.i, !dbg !544
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544
__nv_fadd_rn.exit.i.i.i.i: ; preds = %168, %166
%.032.i.i = phi float [ %167, %166 ], [ %169, %168 ], !dbg !544
%170 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not76 = icmp eq i32 %170, 0, !dbg !544
br i1 %.not76, label %174, label %171, !dbg !544
171: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%172 = fneg float %.032.i.i, !dbg !544
%173 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %172) #16, !dbg !544
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544
174: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%175 = fsub float %.028.i.i, %.032.i.i, !dbg !544
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544
__nv_fadd_rn.exit3.i.i.i.i: ; preds = %174, %171
%.033.i.i = phi float [ %173, %171 ], [ %175, %174 ], !dbg !544
%176 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not77 = icmp eq i32 %176, 0, !dbg !544
br i1 %.not77, label %179, label %177, !dbg !544
177: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%178 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !544
br label %__internal_dsmul.exit.i.i.i, !dbg !544
179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%180 = fadd float %.031.i.i, %.033.i.i, !dbg !544
br label %__internal_dsmul.exit.i.i.i, !dbg !544
__internal_dsmul.exit.i.i.i: ; preds = %179, %177
%.034.i.i = phi float [ %178, %177 ], [ %180, %179 ], !dbg !544
%181 = bitcast float %.032.i.i to i32, !dbg !544
%182 = icmp eq i32 %181, 1118925336, !dbg !544
%183 = add i32 %181, -1, !dbg !544
%184 = bitcast i32 %183 to float, !dbg !544
%185 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !544
%prod.i.044.0.i.i = select i1 %182, float %185, float %.034.i.i, !dbg !544
%prod.i.145.0.i.i = select i1 %182, float %184, float %.032.i.i, !dbg !544
%186 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not78 = icmp eq i32 %186, 0, !dbg !544
br i1 %.not78, label %189, label %187, !dbg !544
187: ; preds = %__internal_dsmul.exit.i.i.i
%188 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !544
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544
189: ; preds = %__internal_dsmul.exit.i.i.i
%190 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !544
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544
__nv_fmul_rn.exit.i10.i.i.i: ; preds = %189, %187
%.017.i.i = phi float [ %188, %187 ], [ %190, %189 ], !dbg !544
%191 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !544
%192 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not79 = icmp eq i32 %192, 0, !dbg !544
br i1 %.not79, label %195, label %193, !dbg !544
193: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%194 = call float @llvm.nvvm.fabs.ftz.f(float %191) #16, !dbg !544
br label %__nv_fabsf.exit.i.i.i.i, !dbg !544
195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%196 = call float @llvm.fabs.f32(float %191) #14, !dbg !544
br label %__nv_fabsf.exit.i.i.i.i, !dbg !544
__nv_fabsf.exit.i.i.i.i: ; preds = %195, %193
%.023.i.i = phi float [ %194, %193 ], [ %196, %195 ], !dbg !544
%197 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !544
%198 = bitcast float %191 to i32, !dbg !544
%199 = and i32 %198, -2147483648, !dbg !544
%200 = or i32 %199, 1123811328, !dbg !544
%201 = bitcast i32 %200 to float, !dbg !544
%j.i.i.0.i.i = select i1 %197, float %201, float %191, !dbg !544
%202 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not80 = icmp eq i32 %202, 0, !dbg !544
br i1 %.not80, label %205, label %203, !dbg !544
203: ; preds = %__nv_fabsf.exit.i.i.i.i
%204 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !544
br label %__internal_fmad.exit4.i.i.i.i, !dbg !544
205: ; preds = %__nv_fabsf.exit.i.i.i.i
%206 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !544
br label %__internal_fmad.exit4.i.i.i.i, !dbg !544
__internal_fmad.exit4.i.i.i.i: ; preds = %205, %203
%.035.i.i = phi float [ %204, %203 ], [ %206, %205 ], !dbg !544
%207 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not81 = icmp eq i32 %207, 0, !dbg !544
br i1 %.not81, label %210, label %208, !dbg !544
208: ; preds = %__internal_fmad.exit4.i.i.i.i
%209 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !544
br label %__internal_expf_kernel.exit.i.i.i, !dbg !544
210: ; preds = %__internal_fmad.exit4.i.i.i.i
%211 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !544
br label %__internal_expf_kernel.exit.i.i.i, !dbg !544
__internal_expf_kernel.exit.i.i.i: ; preds = %210, %208
%.036.i.i = phi float [ %209, %208 ], [ %211, %210 ], !dbg !544
%212 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !544
%213 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !544
%214 = bitcast float %213 to i32, !dbg !544
%215 = shl i32 %214, 23, !dbg !544
%216 = bitcast i32 %215 to float, !dbg !544
%217 = call float @llvm.nvvm.ex2.approx.ftz.f(float %212) #16, !dbg !544
%218 = fmul float %217, %216, !dbg !544
%219 = fcmp une float %218, 0x7FF0000000000000, !dbg !544
br i1 %219, label %220, label %__internal_accurate_powf.exit.i.i, !dbg !544
220: ; preds = %__internal_expf_kernel.exit.i.i.i
%221 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not94 = icmp eq i32 %221, 0, !dbg !544
br i1 %.not94, label %224, label %222, !dbg !544
222: ; preds = %220
%223 = call float @llvm.nvvm.fma.rn.ftz.f(float %218, float %prod.i.044.0.i.i, float %218) #16, !dbg !544
br label %__internal_accurate_powf.exit.i.i, !dbg !544
224: ; preds = %220
%225 = call float @llvm.fma.f32(float %218, float %prod.i.044.0.i.i, float %218) #14, !dbg !544
br label %__internal_accurate_powf.exit.i.i, !dbg !544
__internal_accurate_powf.exit.i.i: ; preds = %224, %222, %__internal_expf_kernel.exit.i.i.i
%t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %223, %222 ], [ %225, %224 ], !dbg !544
%226 = fcmp olt float %51, 0.000000e+00, !dbg !544
%227 = and i1 %226, %56, !dbg !544
%228 = bitcast float %t.i.0.i.i to i32, !dbg !544
%229 = xor i32 %228, -2147483648, !dbg !544
%230 = bitcast i32 %229 to float, !dbg !544
%.010.i.i = select i1 %227, float %230, float %t.i.0.i.i, !dbg !544
%231 = fcmp oeq float %51, 0.000000e+00, !dbg !544
%232 = fadd float %51, %51, !dbg !544
%233 = select i1 %56, float %232, float 0.000000e+00, !dbg !544
%.212.i.i = select i1 %231, float %233, float %.010.i.i, !dbg !544
%234 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not = icmp eq i32 %234, 0, !dbg !544
br i1 %.not, label %237, label %235, !dbg !544
235: ; preds = %__internal_accurate_powf.exit.i.i
%236 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit.i.i.i, !dbg !544
237: ; preds = %__internal_accurate_powf.exit.i.i
%238 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit.i.i.i, !dbg !544
__nv_fabsf.exit.i.i.i: ; preds = %237, %235
%.01.i.i = phi float [ %236, %235 ], [ %238, %237 ], !dbg !544
%239 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not82 = icmp eq i32 %239, 0, !dbg !544
br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %240, !dbg !544
240: ; preds = %__nv_fabsf.exit.i.i.i
%241 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit1.i.i.i, !dbg !544
__nv_fabsf.exit1.i.i.i: ; preds = %240, %__nv_fabsf.exit.i.i.i
%.02.i.i = phi float [ %241, %240 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !544
%242 = fadd float %.01.i.i, %.02.i.i, !dbg !544
%243 = bitcast float %242 to i32, !dbg !544
%244 = icmp sgt i32 %243, 2139095039, !dbg !544
br i1 %244, label %245, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
245: ; preds = %__nv_fabsf.exit1.i.i.i
%246 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not88 = icmp eq i32 %246, 0, !dbg !544
br i1 %.not88, label %249, label %247, !dbg !544
247: ; preds = %245
%248 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_isnanf.exit.i.i.i, !dbg !544
249: ; preds = %245
%250 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_isnanf.exit.i.i.i, !dbg !544
__nv_isnanf.exit.i.i.i: ; preds = %249, %247
%.06.i.i = phi float [ %248, %247 ], [ %250, %249 ], !dbg !544
%251 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !544
br i1 %251, label %257, label %252, !dbg !544
252: ; preds = %__nv_isnanf.exit.i.i.i
%253 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not89 = icmp eq i32 %253, 0, !dbg !544
br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %254, !dbg !544
254: ; preds = %252
%255 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_isnanf.exit11.i.i.i, !dbg !544
__nv_isnanf.exit11.i.i.i: ; preds = %254, %252
%.07.i.i = phi float [ %255, %254 ], [ 4.000000e+00, %252 ], !dbg !544
%256 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !544
br i1 %256, label %257, label %259, !dbg !544
257: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i
%258 = fadd float %51, 4.000000e+00, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
259: ; preds = %__nv_isnanf.exit11.i.i.i
%260 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not90 = icmp eq i32 %260, 0, !dbg !544
br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %261, !dbg !544
261: ; preds = %259
%262 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_isinff.exit8.i.i.i, !dbg !544
__nv_isinff.exit8.i.i.i: ; preds = %261, %259
%.05.i.i = phi float [ %262, %261 ], [ 4.000000e+00, %259 ], !dbg !544
%263 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !544
br i1 %263, label %264, label %274, !dbg !544
264: ; preds = %__nv_isinff.exit8.i.i.i
%265 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not93 = icmp eq i32 %265, 0, !dbg !544
br i1 %.not93, label %268, label %266, !dbg !544
266: ; preds = %264
%267 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit5.i.i.i, !dbg !544
268: ; preds = %264
%269 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit5.i.i.i, !dbg !544
__nv_fabsf.exit5.i.i.i: ; preds = %268, %266
%.04.i.i = phi float [ %267, %266 ], [ %269, %268 ], !dbg !544
%270 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !544
%271 = fcmp oeq float %51, -1.000000e+00, !dbg !544
%272 = select i1 %270, float 0x7FF0000000000000, float 0.000000e+00, !dbg !544
%273 = select i1 %271, float 1.000000e+00, float %272, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
274: ; preds = %__nv_isinff.exit8.i.i.i
%275 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not91 = icmp eq i32 %275, 0, !dbg !544
br i1 %.not91, label %278, label %276, !dbg !544
276: ; preds = %274
%277 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_isinff.exit.i.i.i, !dbg !544
278: ; preds = %274
%279 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_isinff.exit.i.i.i, !dbg !544
__nv_isinff.exit.i.i.i: ; preds = %278, %276
%.03.i.i = phi float [ %277, %276 ], [ %279, %278 ], !dbg !544
%280 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !544
br i1 %280, label %281, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
281: ; preds = %__nv_isinff.exit.i.i.i
%282 = select i1 %227, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
__internal_powf_infinite_cases.exit.i.i: ; preds = %281, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %257, %__nv_fabsf.exit1.i.i.i
%.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %258, %257 ], [ %273, %__nv_fabsf.exit5.i.i.i ], [ %282, %281 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !544
%283 = fcmp oeq float %51, 1.000000e+00, !dbg !544
%t.0.i.i = select i1 %283, float 1.000000e+00, float %.3.i.i, !dbg !544
%284 = fadd float %value_phi13.i, %t.0.i.i, !dbg !547
%.not83 = icmp eq i64 %iv.next, %15, !dbg !548
%285 = add nuw i64 %iv.next, 1, !dbg !550
br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !551
L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i
br label %L467.i, !dbg !552
L467.i: ; preds = %L467.i.loopexit, %L302.i
%value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %284, %L467.i.loopexit ]
%286 = fpext float %value_phi17.i to double, !dbg !552
%287 = call i32 @llvm.nvvm.d2i.hi(double %286) #16, !dbg !558
%288 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !558
%289 = and i32 %288, 2146435072, !dbg !558
%290 = icmp eq i32 %289, 1072693248, !dbg !558
%291 = call double @llvm.fabs.f64(double %286) #14, !dbg !558
%292 = call fastcc double @__internal_accurate_pow(double %291) #16, !dbg !558
%293 = icmp slt i32 %287, 0, !dbg !558
%294 = and i1 %293, %290, !dbg !558
br i1 %294, label %295, label %300, !dbg !558
295: ; preds = %L467.i
%296 = call i32 @llvm.nvvm.d2i.hi(double %292) #16, !dbg !558
%297 = call i32 @llvm.nvvm.d2i.lo(double %292) #16, !dbg !558
%298 = xor i32 %296, -2147483648, !dbg !558
%299 = call double @llvm.nvvm.lohi.i2d(i32 %297, i32 %298) #16, !dbg !558
br label %300, !dbg !558
300: ; preds = %295, %L467.i
%t.0.i37.i = phi double [ %299, %295 ], [ %292, %L467.i ], !dbg !558
%301 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !558
br i1 %301, label %302, label %306, !dbg !558
302: ; preds = %300
%spec.select = select i1 %290, i32 %287, i32 0, !dbg !558
%303 = icmp slt i32 %288, 0, !dbg !558
%304 = or i32 %spec.select, 2146435072, !dbg !558
%thi.1.i.i = select i1 %303, i32 %304, i32 %spec.select, !dbg !558
%305 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !558
br label %307, !dbg !558
306: ; preds = %300
%spec.select97 = select i1 %293, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !558
br label %307, !dbg !558
307: ; preds = %306, %302
%t.2.i.i = phi double [ %305, %302 ], [ %spec.select97, %306 ], !dbg !558
%308 = fadd double %286, 2.500000e-01, !dbg !558
%309 = call i32 @llvm.nvvm.d2i.hi(double %308) #16, !dbg !558
%310 = and i32 %309, 2146435072, !dbg !558
%311 = icmp eq i32 %310, 2146435072, !dbg !558
br i1 %311, label %312, label %__nv_pow.exit.i, !dbg !558
312: ; preds = %307
%313 = fcmp ugt double %291, 0x7FF0000000000000, !dbg !558
br i1 %313, label %__nv_pow.exit.i, label %314, !dbg !558
314: ; preds = %312
%315 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%316 = icmp eq i32 %315, 200, !dbg !558
br i1 %316, label %.critedge, label %317, !dbg !558
317: ; preds = %314
%318 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%319 = icmp eq i32 %318, 350, !dbg !558
br i1 %319, label %.critedge, label %320, !dbg !558
320: ; preds = %317
%321 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%322 = icmp eq i32 %321, 370, !dbg !558
br i1 %322, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !558
__nv_isinfd.exit4.i.i: ; preds = %320
%323 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !558
%324 = and i32 %288, 2147483647, !dbg !558
%325 = icmp eq i32 %324, 2146435072, !dbg !558
%326 = icmp eq i32 %323, 0, !dbg !558
%327 = and i1 %326, %325, !dbg !558
br i1 %327, label %328, label %.critedge, !dbg !558
328: ; preds = %__nv_isinfd.exit4.i.i
%329 = fcmp ogt double %291, 1.000000e+00, !dbg !558
%thi.2.i.i = select i1 %329, i32 2146435072, i32 0, !dbg !558
%330 = icmp slt i32 %288, 0, !dbg !558
%331 = xor i32 %thi.2.i.i, 2146435072
%spec.select8 = select i1 %330, i32 %331, i32 %thi.2.i.i, !dbg !558
%332 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !558
%thi.4.i.i = select i1 %332, i32 1072693248, i32 %spec.select8, !dbg !558
%333 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !558
br label %__nv_pow.exit.i, !dbg !558
.critedge: ; preds = %__nv_isinfd.exit4.i.i, %320, %317, %314
%334 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%335 = icmp eq i32 %334, 200, !dbg !558
br i1 %335, label %342, label %336, !dbg !558
336: ; preds = %.critedge
%337 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%338 = icmp eq i32 %337, 350, !dbg !558
br i1 %338, label %342, label %339, !dbg !558
339: ; preds = %336
%340 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%341 = icmp eq i32 %340, 370, !dbg !558
br i1 %341, label %342, label %344, !dbg !558
342: ; preds = %339, %336, %.critedge
%343 = fcmp oeq double %291, 0x7FF0000000000000, !dbg !558
br label %__nv_isinfd.exit.i.i, !dbg !558
344: ; preds = %339
%345 = call i32 @llvm.nvvm.d2i.lo(double %286) #16, !dbg !558
%346 = and i32 %287, 2147483647, !dbg !558
%347 = icmp eq i32 %346, 2146435072, !dbg !558
%348 = icmp eq i32 %345, 0, !dbg !558
%349 = and i1 %348, %347, !dbg !558
br label %__nv_isinfd.exit.i.i, !dbg !558
__nv_isinfd.exit.i.i: ; preds = %344, %342
%.0.i39.i.in = phi i1 [ %343, %342 ], [ %349, %344 ]
br i1 %.0.i39.i.in, label %350, label %__nv_pow.exit.i, !dbg !558
350: ; preds = %__nv_isinfd.exit.i.i
%.inv87 = icmp slt i32 %288, 0, !dbg !558
%spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !558
%351 = and i32 %288, 2147483647, !dbg !558
%352 = icmp ne i32 %351, 1071644672, !dbg !558
%or.cond11 = and i1 %352, %294, !dbg !558
%353 = or i32 %spec.select9, -2147483648, !dbg !558
%thi.6.i.i = select i1 %or.cond11, i32 %353, i32 %spec.select9, !dbg !558
%354 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !558
br label %__nv_pow.exit.i, !dbg !558
__nv_pow.exit.i: ; preds = %350, %__nv_isinfd.exit.i.i, %328, %312, %307
%t.6.i.i = phi double [ %t.2.i.i, %307 ], [ %333, %328 ], [ %354, %350 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %308, %312 ], !dbg !558
%355 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !559
%356 = select i1 %355, i64 %.fca.2.0.extract, i64 0, !dbg !559
%357 = load i64, i64* %13, align 8, !dbg !569, !tbaa !301
%358 = load i64, i64* %14, align 8, !dbg !573, !tbaa !301
%359 = add i64 %358, -1, !dbg !573
%360 = mul i64 %359, %356, !dbg !576
%361 = add i64 %360, %357, !dbg !577
%362 = icmp sgt i64 %.fca.3.extract, 0, !dbg !578
%363 = select i1 %362, i64 %.fca.3.extract, i64 0, !dbg !578
%364 = icmp slt i64 %361, 1, !dbg !587
%365 = icmp sgt i64 %361, %363, !dbg !587
%366 = or i1 %364, %365, !dbg !589
br i1 %366, label %L493.i, label %L491.i, !dbg !589
L491.i: ; preds = %__nv_pow.exit.i
%367 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !558
%368 = fptrunc double %t.6.i.i to float, !dbg !590
%369 = select i1 %367, float 1.000000e+00, float %368, !dbg !590
%370 = add nsw i64 %361, -1, !dbg !592
%371 = getelementptr inbounds float, float addrspace(1)* %12, i64 %370, !dbg !599
store float %369, float addrspace(1)* %371, align 4, !dbg !599, !tbaa !435
br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !600
L493.i: ; preds = %__nv_pow.exit.i
%372 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !589
unreachable, !dbg !589
fail.i: ; preds = %entry
call fastcc void @gpu_report_exception() #14, !dbg !501
call fastcc void @gpu_signal_exception() #14, !dbg !501
call void asm sideeffect "exit;", ""() #16, !dbg !501
unreachable, !dbg !501
pass.i: ; preds = %entry
%373 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !601, !range !127
%374 = udiv i64 %11, %.fca.1.0.0.0.0.extract, !dbg !501
%375 = mul i64 %374, %.fca.1.0.0.0.0.extract, !dbg !606
%376 = sub i64 %11, %375, !dbg !608
%377 = zext i32 %373 to i64, !dbg !499
%378 = lshr i64 %377, 8, !dbg !501
%379 = and i64 %377, 255, !dbg !608
%380 = add nuw nsw i64 %379, 1, !dbg !609
%381 = shl i64 %376, 8, !dbg !610
%382 = add i64 %380, %381, !dbg !614
%383 = add nuw nsw i64 %374, 1, !dbg !615
%384 = add nuw nsw i64 %383, %378, !dbg !614
%385 = icmp sgt i64 %382, 0, !dbg !618
%386 = icmp sle i64 %382, %.fca.0.0.0.0.extract, !dbg !618
%387 = and i1 %385, %386, !dbg !623
%388 = icmp sle i64 %384, %.fca.0.0.1.0.extract, !dbg !618
%389 = and i1 %388, %387, !dbg !624
br i1 %389, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !626
julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i
call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14, !dbg !627
ret void, !dbg !475
}
; Function Attrs: willreturn mustprogress
define void @preprocess_julia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !474 {
entry:
%4 = alloca [2 x i64], align 8
%5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !475
%6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !475
%.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !475
%.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !475
store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !475
%.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !475
%.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !475
store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !475
%.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !475
%.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !475
store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !475
%.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !475
%.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !475
store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !475
%.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !475
%.fca.1.extract32 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 1, !dbg !475
%.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !475
%.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !475
%.fca.3.extract38 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 3, !dbg !475
%.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !475
%.fca.1.extract14 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 1, !dbg !475
%.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !475
%.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !475
%.fca.3.extract20 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 3, !dbg !475
%.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !475
%.fca.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 1, !dbg !475
%.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !475
%.fca.2.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 1, !dbg !475
%.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !475
%7 = bitcast [2 x i64]* %4 to i8*
call void @llvm.lifetime.start.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14
%8 = call {}*** @julia.get_pgcstack() #14
%9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !476, !range !65
%10 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !485
%11 = zext i32 %9 to i64, !dbg !499
%12 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !501
br i1 %10, label %pass.i, label %fail.i, !dbg !501
L302.i: ; preds = %pass.i
call fastcc void @julia___index_Global_NTuple_5574([2 x i64]* noalias nocapture noundef nonnull writeonly sret([2 x i64]) align 8 dereferenceable(16) %4, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(32) %6) #12, !dbg !504
%13 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !506
%14 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !509
%.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !510
%15 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !510
br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !514
L319.i.preheader: ; preds = %L302.i
%16 = load i64, i64* %13, align 8, !tbaa !301
%17 = icmp sgt i64 %.fca.2.1.extract36, 0
%18 = select i1 %17, i64 %.fca.2.1.extract36, i64 0
%19 = icmp sgt i64 %16, 0
%20 = icmp sle i64 %16, %18
%21 = and i1 %19, %20
%22 = add i64 %16, -1
%23 = mul i64 %22, %15
%24 = load i64, i64* %14, align 8
%25 = icmp sgt i64 %.fca.2.0.extract16, 0
%26 = select i1 %25, i64 %.fca.2.0.extract16, i64 0
%27 = icmp sgt i64 %.fca.2.1.extract18, 0
%28 = select i1 %27, i64 %.fca.2.1.extract18, i64 0
%29 = icmp sgt i64 %24, 0
%30 = icmp sle i64 %24, %28
%31 = and i1 %29, %30
%32 = add i64 %24, -1
%33 = mul i64 %32, %26
br label %L319.i, !dbg !515
L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader
%iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ]
%value_phi13.i = phi float [ %284, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ]
%iv.next = add nuw nsw i64 %iv, 1, !dbg !519
%34 = icmp ule i64 %iv.next, %15, !dbg !519
%35 = and i1 %34, %21, !dbg !524
br i1 %35, label %L340.i, label %L342.i, !dbg !515
L340.i: ; preds = %L319.i
%36 = add i64 %23, %iv.next, !dbg !525
%37 = shl i64 %36, 2, !dbg !532
%38 = add i64 %37, -4, !dbg !532
%39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !537
%40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !538
%41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !538
%42 = icmp ule i64 %iv.next, %26, !dbg !519
%43 = and i1 %42, %31, !dbg !524
br i1 %43, label %L405.i, label %L407.i, !dbg !515
L342.i: ; preds = %L319.i
%44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515
unreachable, !dbg !515
L405.i: ; preds = %L340.i
%45 = add i64 %33, %iv.next, !dbg !525
%46 = shl i64 %45, 2, !dbg !532
%47 = add i64 %46, -4, !dbg !532
%48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !537
%49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !538
%50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !538
%51 = fsub float %41, %50, !dbg !543
%52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not59 = icmp eq i32 %52, 0, !dbg !544
br i1 %.not59, label %__nv_fabsf.exit.i.i, label %54, !dbg !544
L407.i: ; preds = %L340.i
%53 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515
unreachable, !dbg !515
54: ; preds = %L405.i
%55 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit.i.i, !dbg !544
__nv_fabsf.exit.i.i: ; preds = %54, %L405.i
%.08.i.i = phi float [ %55, %54 ], [ 0.000000e+00, %L405.i ], !dbg !544
%56 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !544
%57 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not60 = icmp eq i32 %57, 0, !dbg !544
br i1 %.not60, label %60, label %58, !dbg !544
58: ; preds = %__nv_fabsf.exit.i.i
%59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit1.i.i, !dbg !544
60: ; preds = %__nv_fabsf.exit.i.i
%61 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit1.i.i, !dbg !544
__nv_fabsf.exit1.i.i: ; preds = %60, %58
%.09.i.i = phi float [ %59, %58 ], [ %61, %60 ], !dbg !544
%62 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not61 = icmp eq i32 %62, 0, !dbg !544
%63 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !544
%64 = fmul float %.09.i.i, 0x4170000000000000, !dbg !544
%65 = and i1 %63, %.not61, !dbg !544
%.116.i.i = select i1 %65, float %64, float %.09.i.i, !dbg !544
%expo.i.i.1.i.i = select i1 %65, float -1.510000e+02, float -1.270000e+02, !dbg !544
%66 = bitcast float %.116.i.i to i32, !dbg !544
%67 = and i32 %66, 8388607, !dbg !544
%68 = or i32 %67, 1065353216, !dbg !544
%69 = bitcast i32 %68 to float, !dbg !544
%70 = lshr i32 %66, 23, !dbg !544
%71 = uitofp i32 %70 to float, !dbg !544
%72 = fadd float %expo.i.i.1.i.i, %71, !dbg !544
%73 = fcmp ogt float %69, 0x3FF6A09E60000000, !dbg !544
%74 = fmul float %69, 5.000000e-01, !dbg !544
%75 = fadd float %72, 1.000000e+00, !dbg !544
%expo.i.i.2.i.i = select i1 %73, float %75, float %72, !dbg !544
%m.i.i.0.i.i = select i1 %73, float %74, float %69, !dbg !544
%76 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !544
%77 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !544
%78 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %77) #17, !dbg !544, !srcloc !362
%79 = fmul float %76, 2.000000e+00, !dbg !544
%80 = fmul float %78, %79, !dbg !544
%81 = fmul float %80, %80, !dbg !544
%82 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not62 = icmp eq i32 %82, 0, !dbg !544
br i1 %.not62, label %85, label %83, !dbg !544
83: ; preds = %__nv_fabsf.exit1.i.i
%84 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %81, float noundef 0x3F8995EC60000000) #16, !dbg !544
br label %__internal_fmad.exit.i.i.i.i, !dbg !544
85: ; preds = %__nv_fabsf.exit1.i.i
%86 = call float @llvm.fma.f32(float %81, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !544
br label %__internal_fmad.exit.i.i.i.i, !dbg !544
__internal_fmad.exit.i.i.i.i: ; preds = %85, %83
%.020.i.i = phi float [ %84, %83 ], [ %86, %85 ], !dbg !544
%87 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not63 = icmp eq i32 %87, 0, !dbg !544
br i1 %.not63, label %90, label %88, !dbg !544
88: ; preds = %__internal_fmad.exit.i.i.i.i
%89 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #16, !dbg !544
br label %__internal_fmad.exit3.i.i.i.i, !dbg !544
90: ; preds = %__internal_fmad.exit.i.i.i.i
%91 = call float @llvm.fma.f32(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #14, !dbg !544
br label %__internal_fmad.exit3.i.i.i.i, !dbg !544
__internal_fmad.exit3.i.i.i.i: ; preds = %90, %88
%.021.i.i = phi float [ %89, %88 ], [ %91, %90 ], !dbg !544
%92 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not64 = icmp eq i32 %92, 0, !dbg !544
br i1 %.not64, label %95, label %93, !dbg !544
93: ; preds = %__internal_fmad.exit3.i.i.i.i
%94 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %81) #16, !dbg !544
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544
95: ; preds = %__internal_fmad.exit3.i.i.i.i
%96 = fmul float %81, %.021.i.i, !dbg !544
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544
__nv_fmul_rn.exit4.i.i.i.i: ; preds = %95, %93
%.022.i.i = phi float [ %94, %93 ], [ %96, %95 ], !dbg !544
%97 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not65 = icmp eq i32 %97, 0, !dbg !544
br i1 %.not65, label %100, label %98, !dbg !544
98: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%99 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %80) #16, !dbg !544
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544
100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%101 = fmul float %80, %.022.i.i, !dbg !544
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544
__nv_fmul_rn.exit5.i.i.i.i: ; preds = %100, %98
%.024.i.i = phi float [ %99, %98 ], [ %101, %100 ], !dbg !544
%102 = fsub float %76, %80, !dbg !544
%103 = fmul float %102, 2.000000e+00, !dbg !544
%104 = fneg float %80, !dbg !544
%105 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not66 = icmp eq i32 %105, 0, !dbg !544
br i1 %.not66, label %108, label %106, !dbg !544
106: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%107 = call float @llvm.nvvm.fma.rn.ftz.f(float %104, float %76, float %103) #16, !dbg !544
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544
108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%109 = call float @llvm.fma.f32(float %104, float %76, float %103) #14, !dbg !544
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit.i.i.i.i: ; preds = %108, %106
%.025.i.i = phi float [ %107, %106 ], [ %109, %108 ], !dbg !544
%110 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not67 = icmp eq i32 %110, 0, !dbg !544
br i1 %.not67, label %113, label %111, !dbg !544
111: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%112 = call float @llvm.nvvm.mul.rn.ftz.f(float %78, float %.025.i.i) #16, !dbg !544
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544
113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%114 = fmul float %78, %.025.i.i, !dbg !544
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544
__nv_fmul_rn.exit6.i.i.i.i: ; preds = %113, %111
%.026.i.i = phi float [ %112, %111 ], [ %114, %113 ], !dbg !544
%115 = fadd float %80, %.024.i.i, !dbg !544
%116 = fsub float %80, %115, !dbg !544
%117 = fadd float %.024.i.i, %116, !dbg !544
%118 = fadd float %117, %.026.i.i, !dbg !544
%119 = fadd float %115, %118, !dbg !544
%120 = fsub float %115, %119, !dbg !544
%121 = fadd float %118, %120, !dbg !544
%122 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not68 = icmp eq i32 %122, 0, !dbg !544
br i1 %.not68, label %125, label %123, !dbg !544
123: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%124 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !544
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544
125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%126 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !544
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544
__nv_fmul_rn.exit2.i.i.i.i: ; preds = %125, %123
%.019.i.i = phi float [ %124, %123 ], [ %126, %125 ], !dbg !544
%127 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not69 = icmp eq i32 %127, 0, !dbg !544
br i1 %.not69, label %130, label %128, !dbg !544
128: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%129 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !544
br label %__internal_log_ep.exit.i.i.i, !dbg !544
130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%131 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !544
br label %__internal_log_ep.exit.i.i.i, !dbg !544
__internal_log_ep.exit.i.i.i: ; preds = %130, %128
%.018.i.i = phi float [ %129, %128 ], [ %131, %130 ], !dbg !544
%132 = fadd float %119, %.019.i.i, !dbg !544
%133 = fsub float %.019.i.i, %132, !dbg !544
%134 = fadd float %119, %133, !dbg !544
%135 = fadd float %121, %134, !dbg !544
%136 = fadd float %135, %.018.i.i, !dbg !544
%137 = fadd float %132, %136, !dbg !544
%138 = fsub float %132, %137, !dbg !544
%139 = fadd float %136, %138, !dbg !544
%140 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not70 = icmp eq i32 %140, 0, !dbg !544
br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %141, !dbg !544
141: ; preds = %__internal_log_ep.exit.i.i.i
%142 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit.i6.i.i, !dbg !544
__nv_fabsf.exit.i6.i.i: ; preds = %141, %__internal_log_ep.exit.i.i.i
%.027.i.i = phi float [ %142, %141 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !544
%143 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !544
%.013.i.i = select i1 %143, float 0x3F40000000000000, float 4.000000e+00, !dbg !544
%144 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not71 = icmp eq i32 %144, 0, !dbg !544
br i1 %.not71, label %147, label %145, !dbg !544
145: ; preds = %__nv_fabsf.exit.i6.i.i
%146 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %137) #16, !dbg !544
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544
147: ; preds = %__nv_fabsf.exit.i6.i.i
%148 = fmul float %137, %.013.i.i, !dbg !544
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544
__nv_fmul_rn.exit.i.i.i.i: ; preds = %147, %145
%.028.i.i = phi float [ %146, %145 ], [ %148, %147 ], !dbg !544
%149 = fneg float %.028.i.i, !dbg !544
%150 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not72 = icmp eq i32 %150, 0, !dbg !544
br i1 %.not72, label %153, label %151, !dbg !544
151: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%152 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %137, float %149) #16, !dbg !544
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544
153: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%154 = call float @llvm.fma.f32(float noundef %.013.i.i, float %137, float %149) #14, !dbg !544
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544
__nv_fmaf_rn.exit.i3.i.i.i: ; preds = %153, %151
%.029.i.i = phi float [ %152, %151 ], [ %154, %153 ], !dbg !544
%155 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not73 = icmp eq i32 %155, 0, !dbg !544
br i1 %.not73, label %158, label %156, !dbg !544
156: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%157 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %.029.i.i) #16, !dbg !544
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544
158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%159 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %.029.i.i) #14, !dbg !544
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit1.i.i.i.i: ; preds = %158, %156
%.030.i.i = phi float [ %157, %156 ], [ %159, %158 ], !dbg !544
%160 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not74 = icmp eq i32 %160, 0, !dbg !544
br i1 %.not74, label %163, label %161, !dbg !544
161: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%162 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %137, float %.030.i.i) #16, !dbg !544
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544
163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%164 = call float @llvm.fma.f32(float %137, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !544
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544
__nv_fmaf_rn.exit2.i.i.i.i: ; preds = %163, %161
%.031.i.i = phi float [ %162, %161 ], [ %164, %163 ], !dbg !544
%165 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not75 = icmp eq i32 %165, 0, !dbg !544
br i1 %.not75, label %168, label %166, !dbg !544
166: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%167 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !544
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544
168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%169 = fadd float %.028.i.i, %.031.i.i, !dbg !544
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544
__nv_fadd_rn.exit.i.i.i.i: ; preds = %168, %166
%.032.i.i = phi float [ %167, %166 ], [ %169, %168 ], !dbg !544
%170 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not76 = icmp eq i32 %170, 0, !dbg !544
br i1 %.not76, label %174, label %171, !dbg !544
171: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%172 = fneg float %.032.i.i, !dbg !544
%173 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %172) #16, !dbg !544
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544
174: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%175 = fsub float %.028.i.i, %.032.i.i, !dbg !544
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544
__nv_fadd_rn.exit3.i.i.i.i: ; preds = %174, %171
%.033.i.i = phi float [ %173, %171 ], [ %175, %174 ], !dbg !544
%176 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not77 = icmp eq i32 %176, 0, !dbg !544
br i1 %.not77, label %179, label %177, !dbg !544
177: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%178 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !544
br label %__internal_dsmul.exit.i.i.i, !dbg !544
179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%180 = fadd float %.031.i.i, %.033.i.i, !dbg !544
br label %__internal_dsmul.exit.i.i.i, !dbg !544
__internal_dsmul.exit.i.i.i: ; preds = %179, %177
%.034.i.i = phi float [ %178, %177 ], [ %180, %179 ], !dbg !544
%181 = bitcast float %.032.i.i to i32, !dbg !544
%182 = icmp eq i32 %181, 1118925336, !dbg !544
%183 = add i32 %181, -1, !dbg !544
%184 = bitcast i32 %183 to float, !dbg !544
%185 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !544
%prod.i.044.0.i.i = select i1 %182, float %185, float %.034.i.i, !dbg !544
%prod.i.145.0.i.i = select i1 %182, float %184, float %.032.i.i, !dbg !544
%186 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not78 = icmp eq i32 %186, 0, !dbg !544
br i1 %.not78, label %189, label %187, !dbg !544
187: ; preds = %__internal_dsmul.exit.i.i.i
%188 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !544
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544
189: ; preds = %__internal_dsmul.exit.i.i.i
%190 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !544
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544
__nv_fmul_rn.exit.i10.i.i.i: ; preds = %189, %187
%.017.i.i = phi float [ %188, %187 ], [ %190, %189 ], !dbg !544
%191 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !544
%192 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not79 = icmp eq i32 %192, 0, !dbg !544
br i1 %.not79, label %195, label %193, !dbg !544
193: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%194 = call float @llvm.nvvm.fabs.ftz.f(float %191) #16, !dbg !544
br label %__nv_fabsf.exit.i.i.i.i, !dbg !544
195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%196 = call float @llvm.fabs.f32(float %191) #14, !dbg !544
br label %__nv_fabsf.exit.i.i.i.i, !dbg !544
__nv_fabsf.exit.i.i.i.i: ; preds = %195, %193
%.023.i.i = phi float [ %194, %193 ], [ %196, %195 ], !dbg !544
%197 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !544
%198 = bitcast float %191 to i32, !dbg !544
%199 = and i32 %198, -2147483648, !dbg !544
%200 = or i32 %199, 1123811328, !dbg !544
%201 = bitcast i32 %200 to float, !dbg !544
%j.i.i.0.i.i = select i1 %197, float %201, float %191, !dbg !544
%202 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not80 = icmp eq i32 %202, 0, !dbg !544
br i1 %.not80, label %205, label %203, !dbg !544
203: ; preds = %__nv_fabsf.exit.i.i.i.i
%204 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !544
br label %__internal_fmad.exit4.i.i.i.i, !dbg !544
205: ; preds = %__nv_fabsf.exit.i.i.i.i
%206 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !544
br label %__internal_fmad.exit4.i.i.i.i, !dbg !544
__internal_fmad.exit4.i.i.i.i: ; preds = %205, %203
%.035.i.i = phi float [ %204, %203 ], [ %206, %205 ], !dbg !544
%207 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not81 = icmp eq i32 %207, 0, !dbg !544
br i1 %.not81, label %210, label %208, !dbg !544
208: ; preds = %__internal_fmad.exit4.i.i.i.i
%209 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !544
br label %__internal_expf_kernel.exit.i.i.i, !dbg !544
210: ; preds = %__internal_fmad.exit4.i.i.i.i
%211 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !544
br label %__internal_expf_kernel.exit.i.i.i, !dbg !544
__internal_expf_kernel.exit.i.i.i: ; preds = %210, %208
%.036.i.i = phi float [ %209, %208 ], [ %211, %210 ], !dbg !544
%212 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !544
%213 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !544
%214 = bitcast float %213 to i32, !dbg !544
%215 = shl i32 %214, 23, !dbg !544
%216 = bitcast i32 %215 to float, !dbg !544
%217 = call float @llvm.nvvm.ex2.approx.ftz.f(float %212) #16, !dbg !544
%218 = fmul float %217, %216, !dbg !544
%219 = fcmp une float %218, 0x7FF0000000000000, !dbg !544
br i1 %219, label %220, label %__internal_accurate_powf.exit.i.i, !dbg !544
220: ; preds = %__internal_expf_kernel.exit.i.i.i
%221 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not94 = icmp eq i32 %221, 0, !dbg !544
br i1 %.not94, label %224, label %222, !dbg !544
222: ; preds = %220
%223 = call float @llvm.nvvm.fma.rn.ftz.f(float %218, float %prod.i.044.0.i.i, float %218) #16, !dbg !544
br label %__internal_accurate_powf.exit.i.i, !dbg !544
224: ; preds = %220
%225 = call float @llvm.fma.f32(float %218, float %prod.i.044.0.i.i, float %218) #14, !dbg !544
br label %__internal_accurate_powf.exit.i.i, !dbg !544
__internal_accurate_powf.exit.i.i: ; preds = %224, %222, %__internal_expf_kernel.exit.i.i.i
%t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %223, %222 ], [ %225, %224 ], !dbg !544
%226 = fcmp olt float %51, 0.000000e+00, !dbg !544
%227 = and i1 %226, %56, !dbg !544
%228 = bitcast float %t.i.0.i.i to i32, !dbg !544
%229 = xor i32 %228, -2147483648, !dbg !544
%230 = bitcast i32 %229 to float, !dbg !544
%.010.i.i = select i1 %227, float %230, float %t.i.0.i.i, !dbg !544
%231 = fcmp oeq float %51, 0.000000e+00, !dbg !544
%232 = fadd float %51, %51, !dbg !544
%233 = select i1 %56, float %232, float 0.000000e+00, !dbg !544
%.212.i.i = select i1 %231, float %233, float %.010.i.i, !dbg !544
%234 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not = icmp eq i32 %234, 0, !dbg !544
br i1 %.not, label %237, label %235, !dbg !544
235: ; preds = %__internal_accurate_powf.exit.i.i
%236 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit.i.i.i, !dbg !544
237: ; preds = %__internal_accurate_powf.exit.i.i
%238 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit.i.i.i, !dbg !544
__nv_fabsf.exit.i.i.i: ; preds = %237, %235
%.01.i.i = phi float [ %236, %235 ], [ %238, %237 ], !dbg !544
%239 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not82 = icmp eq i32 %239, 0, !dbg !544
br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %240, !dbg !544
240: ; preds = %__nv_fabsf.exit.i.i.i
%241 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_fabsf.exit1.i.i.i, !dbg !544
__nv_fabsf.exit1.i.i.i: ; preds = %240, %__nv_fabsf.exit.i.i.i
%.02.i.i = phi float [ %241, %240 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !544
%242 = fadd float %.01.i.i, %.02.i.i, !dbg !544
%243 = bitcast float %242 to i32, !dbg !544
%244 = icmp sgt i32 %243, 2139095039, !dbg !544
br i1 %244, label %245, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
245: ; preds = %__nv_fabsf.exit1.i.i.i
%246 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not88 = icmp eq i32 %246, 0, !dbg !544
br i1 %.not88, label %249, label %247, !dbg !544
247: ; preds = %245
%248 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_isnanf.exit.i.i.i, !dbg !544
249: ; preds = %245
%250 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_isnanf.exit.i.i.i, !dbg !544
__nv_isnanf.exit.i.i.i: ; preds = %249, %247
%.06.i.i = phi float [ %248, %247 ], [ %250, %249 ], !dbg !544
%251 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !544
br i1 %251, label %257, label %252, !dbg !544
252: ; preds = %__nv_isnanf.exit.i.i.i
%253 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not89 = icmp eq i32 %253, 0, !dbg !544
br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %254, !dbg !544
254: ; preds = %252
%255 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_isnanf.exit11.i.i.i, !dbg !544
__nv_isnanf.exit11.i.i.i: ; preds = %254, %252
%.07.i.i = phi float [ %255, %254 ], [ 4.000000e+00, %252 ], !dbg !544
%256 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !544
br i1 %256, label %257, label %259, !dbg !544
257: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i
%258 = fadd float %51, 4.000000e+00, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
259: ; preds = %__nv_isnanf.exit11.i.i.i
%260 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not90 = icmp eq i32 %260, 0, !dbg !544
br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %261, !dbg !544
261: ; preds = %259
%262 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544
br label %__nv_isinff.exit8.i.i.i, !dbg !544
__nv_isinff.exit8.i.i.i: ; preds = %261, %259
%.05.i.i = phi float [ %262, %261 ], [ 4.000000e+00, %259 ], !dbg !544
%263 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !544
br i1 %263, label %264, label %274, !dbg !544
264: ; preds = %__nv_isinff.exit8.i.i.i
%265 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not93 = icmp eq i32 %265, 0, !dbg !544
br i1 %.not93, label %268, label %266, !dbg !544
266: ; preds = %264
%267 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_fabsf.exit5.i.i.i, !dbg !544
268: ; preds = %264
%269 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_fabsf.exit5.i.i.i, !dbg !544
__nv_fabsf.exit5.i.i.i: ; preds = %268, %266
%.04.i.i = phi float [ %267, %266 ], [ %269, %268 ], !dbg !544
%270 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !544
%271 = fcmp oeq float %51, -1.000000e+00, !dbg !544
%272 = select i1 %270, float 0x7FF0000000000000, float 0.000000e+00, !dbg !544
%273 = select i1 %271, float 1.000000e+00, float %272, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
274: ; preds = %__nv_isinff.exit8.i.i.i
%275 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544
%.not91 = icmp eq i32 %275, 0, !dbg !544
br i1 %.not91, label %278, label %276, !dbg !544
276: ; preds = %274
%277 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544
br label %__nv_isinff.exit.i.i.i, !dbg !544
278: ; preds = %274
%279 = call float @llvm.fabs.f32(float %51) #14, !dbg !544
br label %__nv_isinff.exit.i.i.i, !dbg !544
__nv_isinff.exit.i.i.i: ; preds = %278, %276
%.03.i.i = phi float [ %277, %276 ], [ %279, %278 ], !dbg !544
%280 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !544
br i1 %280, label %281, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
281: ; preds = %__nv_isinff.exit.i.i.i
%282 = select i1 %227, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !544
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544
__internal_powf_infinite_cases.exit.i.i: ; preds = %281, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %257, %__nv_fabsf.exit1.i.i.i
%.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %258, %257 ], [ %273, %__nv_fabsf.exit5.i.i.i ], [ %282, %281 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !544
%283 = fcmp oeq float %51, 1.000000e+00, !dbg !544
%t.0.i.i = select i1 %283, float 1.000000e+00, float %.3.i.i, !dbg !544
%284 = fadd float %value_phi13.i, %t.0.i.i, !dbg !547
%.not83 = icmp eq i64 %iv.next, %15, !dbg !548
%285 = add nuw i64 %iv.next, 1, !dbg !550
br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !551
L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i
br label %L467.i, !dbg !552
L467.i: ; preds = %L467.i.loopexit, %L302.i
%value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %284, %L467.i.loopexit ]
%286 = fpext float %value_phi17.i to double, !dbg !552
%287 = call i32 @llvm.nvvm.d2i.hi(double %286) #16, !dbg !558
%288 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !558
%289 = and i32 %288, 2146435072, !dbg !558
%290 = icmp eq i32 %289, 1072693248, !dbg !558
%291 = call double @llvm.fabs.f64(double %286) #14, !dbg !558
%292 = call fastcc double @__internal_accurate_pow(double %291) #16, !dbg !558
%293 = icmp slt i32 %287, 0, !dbg !558
%294 = and i1 %293, %290, !dbg !558
br i1 %294, label %295, label %300, !dbg !558
295: ; preds = %L467.i
%296 = call i32 @llvm.nvvm.d2i.hi(double %292) #16, !dbg !558
%297 = call i32 @llvm.nvvm.d2i.lo(double %292) #16, !dbg !558
%298 = xor i32 %296, -2147483648, !dbg !558
%299 = call double @llvm.nvvm.lohi.i2d(i32 %297, i32 %298) #16, !dbg !558
br label %300, !dbg !558
300: ; preds = %295, %L467.i
%t.0.i37.i = phi double [ %299, %295 ], [ %292, %L467.i ], !dbg !558
%301 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !558
br i1 %301, label %302, label %306, !dbg !558
302: ; preds = %300
%spec.select = select i1 %290, i32 %287, i32 0, !dbg !558
%303 = icmp slt i32 %288, 0, !dbg !558
%304 = or i32 %spec.select, 2146435072, !dbg !558
%thi.1.i.i = select i1 %303, i32 %304, i32 %spec.select, !dbg !558
%305 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !558
br label %307, !dbg !558
306: ; preds = %300
%spec.select97 = select i1 %293, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !558
br label %307, !dbg !558
307: ; preds = %306, %302
%t.2.i.i = phi double [ %305, %302 ], [ %spec.select97, %306 ], !dbg !558
%308 = fadd double %286, 2.500000e-01, !dbg !558
%309 = call i32 @llvm.nvvm.d2i.hi(double %308) #16, !dbg !558
%310 = and i32 %309, 2146435072, !dbg !558
%311 = icmp eq i32 %310, 2146435072, !dbg !558
br i1 %311, label %312, label %__nv_pow.exit.i, !dbg !558
312: ; preds = %307
%313 = fcmp ugt double %291, 0x7FF0000000000000, !dbg !558
br i1 %313, label %__nv_pow.exit.i, label %314, !dbg !558
314: ; preds = %312
%315 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%316 = icmp eq i32 %315, 200, !dbg !558
br i1 %316, label %.critedge, label %317, !dbg !558
317: ; preds = %314
%318 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%319 = icmp eq i32 %318, 350, !dbg !558
br i1 %319, label %.critedge, label %320, !dbg !558
320: ; preds = %317
%321 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%322 = icmp eq i32 %321, 370, !dbg !558
br i1 %322, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !558
__nv_isinfd.exit4.i.i: ; preds = %320
%323 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !558
%324 = and i32 %288, 2147483647, !dbg !558
%325 = icmp eq i32 %324, 2146435072, !dbg !558
%326 = icmp eq i32 %323, 0, !dbg !558
%327 = and i1 %326, %325, !dbg !558
br i1 %327, label %328, label %.critedge, !dbg !558
328: ; preds = %__nv_isinfd.exit4.i.i
%329 = fcmp ogt double %291, 1.000000e+00, !dbg !558
%thi.2.i.i = select i1 %329, i32 2146435072, i32 0, !dbg !558
%330 = icmp slt i32 %288, 0, !dbg !558
%331 = xor i32 %thi.2.i.i, 2146435072
%spec.select8 = select i1 %330, i32 %331, i32 %thi.2.i.i, !dbg !558
%332 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !558
%thi.4.i.i = select i1 %332, i32 1072693248, i32 %spec.select8, !dbg !558
%333 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !558
br label %__nv_pow.exit.i, !dbg !558
.critedge: ; preds = %__nv_isinfd.exit4.i.i, %320, %317, %314
%334 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%335 = icmp eq i32 %334, 200, !dbg !558
br i1 %335, label %342, label %336, !dbg !558
336: ; preds = %.critedge
%337 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%338 = icmp eq i32 %337, 350, !dbg !558
br i1 %338, label %342, label %339, !dbg !558
339: ; preds = %336
%340 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558
%341 = icmp eq i32 %340, 370, !dbg !558
br i1 %341, label %342, label %344, !dbg !558
342: ; preds = %339, %336, %.critedge
%343 = fcmp oeq double %291, 0x7FF0000000000000, !dbg !558
br label %__nv_isinfd.exit.i.i, !dbg !558
344: ; preds = %339
%345 = call i32 @llvm.nvvm.d2i.lo(double %286) #16, !dbg !558
%346 = and i32 %287, 2147483647, !dbg !558
%347 = icmp eq i32 %346, 2146435072, !dbg !558
%348 = icmp eq i32 %345, 0, !dbg !558
%349 = and i1 %348, %347, !dbg !558
br label %__nv_isinfd.exit.i.i, !dbg !558
__nv_isinfd.exit.i.i: ; preds = %344, %342
%.0.i39.i.in = phi i1 [ %343, %342 ], [ %349, %344 ]
br i1 %.0.i39.i.in, label %350, label %__nv_pow.exit.i, !dbg !558
350: ; preds = %__nv_isinfd.exit.i.i
%.inv87 = icmp slt i32 %288, 0, !dbg !558
%spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !558
%351 = and i32 %288, 2147483647, !dbg !558
%352 = icmp ne i32 %351, 1071644672, !dbg !558
%or.cond11 = and i1 %352, %294, !dbg !558
%353 = or i32 %spec.select9, -2147483648, !dbg !558
%thi.6.i.i = select i1 %or.cond11, i32 %353, i32 %spec.select9, !dbg !558
%354 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !558
br label %__nv_pow.exit.i, !dbg !558
__nv_pow.exit.i: ; preds = %350, %__nv_isinfd.exit.i.i, %328, %312, %307
%t.6.i.i = phi double [ %t.2.i.i, %307 ], [ %333, %328 ], [ %354, %350 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %308, %312 ], !dbg !558
%355 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !559
%356 = select i1 %355, i64 %.fca.2.0.extract, i64 0, !dbg !559
%357 = load i64, i64* %13, align 8, !dbg !569, !tbaa !301
%358 = load i64, i64* %14, align 8, !dbg !573, !tbaa !301
%359 = add i64 %358, -1, !dbg !573
%360 = mul i64 %359, %356, !dbg !576
%361 = add i64 %360, %357, !dbg !577
%362 = icmp sgt i64 %.fca.3.extract, 0, !dbg !578
%363 = select i1 %362, i64 %.fca.3.extract, i64 0, !dbg !578
%364 = icmp slt i64 %361, 1, !dbg !587
%365 = icmp sgt i64 %361, %363, !dbg !587
%366 = or i1 %364, %365, !dbg !589
br i1 %366, label %L493.i, label %L491.i, !dbg !589
L491.i: ; preds = %__nv_pow.exit.i
%367 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !558
%368 = fptrunc double %t.6.i.i to float, !dbg !590
%369 = select i1 %367, float 1.000000e+00, float %368, !dbg !590
%370 = add nsw i64 %361, -1, !dbg !592
%371 = getelementptr inbounds float, float addrspace(1)* %12, i64 %370, !dbg !599
store float %369, float addrspace(1)* %371, align 4, !dbg !599, !tbaa !435
br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !600
L493.i: ; preds = %__nv_pow.exit.i
%372 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !589
unreachable, !dbg !589
fail.i: ; preds = %entry
call fastcc void @gpu_report_exception() #14, !dbg !501
call fastcc void @gpu_signal_exception() #14, !dbg !501
call void asm sideeffect "exit;", ""() #16, !dbg !501
unreachable, !dbg !501
pass.i: ; preds = %entry
%373 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !601, !range !127
%374 = udiv i64 %11, %.fca.1.0.0.0.0.extract, !dbg !501
%375 = mul i64 %374, %.fca.1.0.0.0.0.extract, !dbg !606
%376 = sub i64 %11, %375, !dbg !608
%377 = zext i32 %373 to i64, !dbg !499
%378 = lshr i64 %377, 8, !dbg !501
%379 = and i64 %377, 255, !dbg !608
%380 = add nuw nsw i64 %379, 1, !dbg !609
%381 = shl i64 %376, 8, !dbg !610
%382 = add i64 %380, %381, !dbg !614
%383 = add nuw nsw i64 %374, 1, !dbg !615
%384 = add nuw nsw i64 %383, %378, !dbg !614
%385 = icmp sgt i64 %382, 0, !dbg !618
%386 = icmp sle i64 %382, %.fca.0.0.0.0.extract, !dbg !618
%387 = and i1 %385, %386, !dbg !623
%388 = icmp sle i64 %384, %.fca.0.0.1.0.extract, !dbg !618
%389 = and i1 %388, %387, !dbg !624
br i1 %389, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !626
julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i
call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14, !dbg !627
ret void, !dbg !475
}
; Function Attrs: willreturn mustprogress
define internal void @diffejulia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %"'", { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !628 {
entry:
%"'ipa79" = alloca [2 x i64], align 8
store [2 x i64] zeroinitializer, [2 x i64]* %"'ipa79", align 8
%4 = alloca [2 x i64], align 8
%"'ipa" = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !629
store { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } zeroinitializer, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", align 8, !dbg !629
%5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !629
%"'ipc" = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa" to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !629
%6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !629
%.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !629
%".fca.0.0.0.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !629
%.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !629
store i64 %.fca.0.0.0.0.extract, i64* %".fca.0.0.0.0.gep'ipg", align 8, !dbg !629
store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !629
%.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !629
%".fca.0.0.1.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !629
%.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !629
store i64 %.fca.0.0.1.0.extract, i64* %".fca.0.0.1.0.gep'ipg", align 8, !dbg !629
store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !629
%.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !629
%".fca.1.0.0.0.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !629
%.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !629
store i64 %.fca.1.0.0.0.0.extract, i64* %".fca.1.0.0.0.0.gep'ipg", align 8, !dbg !629
store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !629
%.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !629
%".fca.1.0.0.1.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !629
%.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !629
store i64 %.fca.1.0.0.1.0.extract, i64* %".fca.1.0.0.1.0.gep'ipg", align 8, !dbg !629
store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !629
%.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !629
%.fca.1.extract32_replacementA = phi i64 , !dbg !629
%.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !629
%.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !629
%.fca.3.extract38_replacementA = phi i64 , !dbg !629
%".fca.0.extract12'ipev" = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %"'", 0, !dbg !629
%.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !629
%.fca.1.extract14_replacementA = phi i64 , !dbg !629
%.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !629
%.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !629
%.fca.3.extract20_replacementA = phi i64 , !dbg !629
%.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !629
%.fca.1.extract_replacementA = phi i64 , !dbg !629
%.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !629
%.fca.2.1.extract_replacementA = phi i64 , !dbg !629
%.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !629
%_replacementA78 = phi i8*
%_replacementA = phi {}***
%7 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !630, !range !65
%8 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !639
%9 = zext i32 %7 to i64, !dbg !653
%10 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !655
br i1 %8, label %pass.i, label %fail.i, !dbg !655
L302.i: ; preds = %pass.i
%_augmented = call fastcc i64 @augmented_julia___index_Global_NTuple_5574([2 x i64]* %4, [2 x i64]* %"'ipa79", { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %6, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %"'ipc"), !dbg !658
store i64 %_augmented, i64* %_augmented_cache, align 8, !dbg !660, !invariant.group !663
%11 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !660
%12 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !664
%.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !665
%13 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !665
br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !669
L319.i.preheader: ; preds = %L302.i
%14 = load i64, i64* %11, align 8, !tbaa !301
%15 = icmp sgt i64 %.fca.2.1.extract36, 0
%16 = select i1 %15, i64 %.fca.2.1.extract36, i64 0
%17 = icmp sgt i64 %14, 0
%18 = icmp sle i64 %14, %16
%19 = and i1 %17, %18
%20 = add i64 %14, -1
%21 = mul i64 %20, %13
%22 = load i64, i64* %12, align 8
%23 = icmp sgt i64 %.fca.2.0.extract16, 0
%24 = select i1 %23, i64 %.fca.2.0.extract16, i64 0
%25 = icmp sgt i64 %.fca.2.1.extract18, 0
%26 = select i1 %25, i64 %.fca.2.1.extract18, i64 0
%27 = icmp sgt i64 %22, 0
%28 = icmp sle i64 %22, %26
%29 = and i1 %27, %28
%30 = add i64 %22, -1
%31 = mul i64 %30, %24
store i64 %31, i64* %_cache, align 8, !dbg !670, !invariant.group !674
%32 = add nsw i64 %13, -1, !dbg !670
%33 = add nuw i64 %32, 1, !dbg !670
%malloccall = tail call noalias nonnull i8* @malloc(i64 %33)
%.not59_malloccache = bitcast i8* %malloccall to i1*
call void @llvm.memset.p0i8.i64(i8* %malloccall, i8 0, i64 %33, i1 false), !dbg !670
store i1* %.not59_malloccache, i1** %.not59_cache, align 1, !dbg !670, !invariant.group !675
br label %L319.i, !dbg !670
L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader
%iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ]
%value_phi13.i = phi float [ %286, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ]
%iv.next = add nuw nsw i64 %iv, 1, !dbg !676
%34 = icmp ule i64 %iv.next, %13, !dbg !676
%35 = and i1 %34, %19, !dbg !681
br i1 %35, label %L340.i, label %L342.i, !dbg !670
L340.i: ; preds = %L319.i
%36 = add i64 %21, %iv.next, !dbg !682
%37 = shl i64 %36, 2, !dbg !689
%38 = add i64 %37, -4, !dbg !689
%39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !694
%40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !695
%41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !695
%42 = icmp ule i64 %iv.next, %24, !dbg !676
%43 = and i1 %42, %29, !dbg !681
br i1 %43, label %L405.i, label %L407.i, !dbg !670
L342.i: ; preds = %L319.i
%44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !670
unreachable
L405.i: ; preds = %L340.i
%45 = add i64 %31, %iv.next, !dbg !682
%46 = shl i64 %45, 2, !dbg !689
%47 = add i64 %46, -4, !dbg !689
%"'ipg" = getelementptr i8, i8 addrspace(1)* %".fca.0.extract12'ipev", i64 %47, !dbg !694
%48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !694
%"'ipc82" = bitcast i8 addrspace(1)* %"'ipg" to float addrspace(1)*, !dbg !695
%49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !695
%50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !695
%51 = fsub float %41, %50, !dbg !700
%52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not59 = icmp eq i32 %52, 0, !dbg !701
%53 = load i1*, i1** %.not59_cache, align 8, !dbg !701, !dereferenceable !704, !invariant.group !675
%54 = getelementptr inbounds i1, i1* %53, i64 %iv, !dbg !701
store i1 %.not59, i1* %54, align 1, !dbg !701, !invariant.group !705
br i1 %.not59, label %__nv_fabsf.exit.i.i, label %56, !dbg !701
L407.i: ; preds = %L340.i
%55 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !670
unreachable
56: ; preds = %L405.i
%57 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !701
br label %__nv_fabsf.exit.i.i, !dbg !701
__nv_fabsf.exit.i.i: ; preds = %56, %L405.i
%.08.i.i = phi float [ %57, %56 ], [ 0.000000e+00, %L405.i ], !dbg !701
%58 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !701
%59 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not60 = icmp eq i32 %59, 0, !dbg !701
br i1 %.not60, label %62, label %60, !dbg !701
60: ; preds = %__nv_fabsf.exit.i.i
%61 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701
br label %__nv_fabsf.exit1.i.i, !dbg !701
62: ; preds = %__nv_fabsf.exit.i.i
%63 = call float @llvm.fabs.f32(float %51) #14, !dbg !701
br label %__nv_fabsf.exit1.i.i, !dbg !701
__nv_fabsf.exit1.i.i: ; preds = %62, %60
%.09.i.i = phi float [ %61, %60 ], [ %63, %62 ], !dbg !701
%64 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%"'ip_phi" = phi i32 , !dbg !701
%.not61 = icmp eq i32 %64, 0, !dbg !701
%65 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !701
%66 = fmul float %.09.i.i, 0x4170000000000000, !dbg !701
%67 = and i1 %65, %.not61, !dbg !701
%.116.i.i = select i1 %67, float %66, float %.09.i.i, !dbg !701
%expo.i.i.1.i.i = select i1 %67, float -1.510000e+02, float -1.270000e+02, !dbg !701
%68 = bitcast float %.116.i.i to i32, !dbg !701
%69 = and i32 %68, 8388607, !dbg !701
%70 = or i32 %69, 1065353216, !dbg !701
%71 = bitcast i32 %70 to float, !dbg !701
%72 = lshr i32 %68, 23, !dbg !701
%73 = uitofp i32 %72 to float, !dbg !701
%74 = fadd float %expo.i.i.1.i.i, %73, !dbg !701
%75 = fcmp ogt float %71, 0x3FF6A09E60000000, !dbg !701
%76 = fmul float %71, 5.000000e-01, !dbg !701
%77 = fadd float %74, 1.000000e+00, !dbg !701
%expo.i.i.2.i.i = select i1 %75, float %77, float %74, !dbg !701
%m.i.i.0.i.i = select i1 %75, float %76, float %71, !dbg !701
%78 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !701
%79 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !701
%80 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %79) #17, !dbg !701, !srcloc !362
%81 = fmul float %78, 2.000000e+00, !dbg !701
%82 = fmul float %80, %81, !dbg !701
%83 = fmul float %82, %82, !dbg !701
%84 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not62 = icmp eq i32 %84, 0, !dbg !701
br i1 %.not62, label %87, label %85, !dbg !701
85: ; preds = %__nv_fabsf.exit1.i.i
%86 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %83, float noundef 0x3F8995EC60000000) #16, !dbg !701
br label %__internal_fmad.exit.i.i.i.i, !dbg !701
87: ; preds = %__nv_fabsf.exit1.i.i
%88 = call float @llvm.fma.f32(float %83, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !701
br label %__internal_fmad.exit.i.i.i.i, !dbg !701
__internal_fmad.exit.i.i.i.i: ; preds = %87, %85
%.020.i.i = phi float [ %86, %85 ], [ %88, %87 ], !dbg !701
%89 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not63 = icmp eq i32 %89, 0, !dbg !701
br i1 %.not63, label %92, label %90, !dbg !701
90: ; preds = %__internal_fmad.exit.i.i.i.i
%91 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %83, float noundef 0x3FB55557A0000000) #16, !dbg !701
br label %__internal_fmad.exit3.i.i.i.i, !dbg !701
92: ; preds = %__internal_fmad.exit.i.i.i.i
%93 = call float @llvm.fma.f32(float %.020.i.i, float %83, float noundef 0x3FB55557A0000000) #14, !dbg !701
br label %__internal_fmad.exit3.i.i.i.i, !dbg !701
__internal_fmad.exit3.i.i.i.i: ; preds = %92, %90
%.021.i.i = phi float [ %91, %90 ], [ %93, %92 ], !dbg !701
%94 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not64 = icmp eq i32 %94, 0, !dbg !701
br i1 %.not64, label %97, label %95, !dbg !701
95: ; preds = %__internal_fmad.exit3.i.i.i.i
%96 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %83) #16, !dbg !701
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !701
97: ; preds = %__internal_fmad.exit3.i.i.i.i
%98 = fmul float %83, %.021.i.i, !dbg !701
br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !701
__nv_fmul_rn.exit4.i.i.i.i: ; preds = %97, %95
%.022.i.i = phi float [ %96, %95 ], [ %98, %97 ], !dbg !701
%99 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not65 = icmp eq i32 %99, 0, !dbg !701
br i1 %.not65, label %102, label %100, !dbg !701
100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%101 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %82) #16, !dbg !701
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !701
102: ; preds = %__nv_fmul_rn.exit4.i.i.i.i
%103 = fmul float %82, %.022.i.i, !dbg !701
br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !701
__nv_fmul_rn.exit5.i.i.i.i: ; preds = %102, %100
%.024.i.i = phi float [ %101, %100 ], [ %103, %102 ], !dbg !701
%104 = fsub float %78, %82, !dbg !701
%105 = fmul float %104, 2.000000e+00, !dbg !701
%106 = fneg float %82, !dbg !701
%107 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not66 = icmp eq i32 %107, 0, !dbg !701
br i1 %.not66, label %110, label %108, !dbg !701
108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%109 = call float @llvm.nvvm.fma.rn.ftz.f(float %106, float %78, float %105) #16, !dbg !701
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !701
110: ; preds = %__nv_fmul_rn.exit5.i.i.i.i
%111 = call float @llvm.fma.f32(float %106, float %78, float %105) #14, !dbg !701
br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !701
__nv_fmaf_rn.exit.i.i.i.i: ; preds = %110, %108
%.025.i.i = phi float [ %109, %108 ], [ %111, %110 ], !dbg !701
%112 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not67 = icmp eq i32 %112, 0, !dbg !701
br i1 %.not67, label %115, label %113, !dbg !701
113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%114 = call float @llvm.nvvm.mul.rn.ftz.f(float %80, float %.025.i.i) #16, !dbg !701
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !701
115: ; preds = %__nv_fmaf_rn.exit.i.i.i.i
%116 = fmul float %80, %.025.i.i, !dbg !701
br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !701
__nv_fmul_rn.exit6.i.i.i.i: ; preds = %115, %113
%.026.i.i = phi float [ %114, %113 ], [ %116, %115 ], !dbg !701
%117 = fadd float %82, %.024.i.i, !dbg !701
%118 = fsub float %82, %117, !dbg !701
%119 = fadd float %.024.i.i, %118, !dbg !701
%120 = fadd float %119, %.026.i.i, !dbg !701
%121 = fadd float %117, %120, !dbg !701
%122 = fsub float %117, %121, !dbg !701
%123 = fadd float %120, %122, !dbg !701
%124 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not68 = icmp eq i32 %124, 0, !dbg !701
br i1 %.not68, label %127, label %125, !dbg !701
125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%126 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !701
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !701
127: ; preds = %__nv_fmul_rn.exit6.i.i.i.i
%128 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !701
br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !701
__nv_fmul_rn.exit2.i.i.i.i: ; preds = %127, %125
%.019.i.i = phi float [ %126, %125 ], [ %128, %127 ], !dbg !701
%129 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not69 = icmp eq i32 %129, 0, !dbg !701
br i1 %.not69, label %132, label %130, !dbg !701
130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%131 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !701
br label %__internal_log_ep.exit.i.i.i, !dbg !701
132: ; preds = %__nv_fmul_rn.exit2.i.i.i.i
%133 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !701
br label %__internal_log_ep.exit.i.i.i, !dbg !701
__internal_log_ep.exit.i.i.i: ; preds = %132, %130
%.018.i.i = phi float [ %131, %130 ], [ %133, %132 ], !dbg !701
%134 = fadd float %121, %.019.i.i, !dbg !701
%135 = fsub float %.019.i.i, %134, !dbg !701
%136 = fadd float %121, %135, !dbg !701
%137 = fadd float %123, %136, !dbg !701
%138 = fadd float %137, %.018.i.i, !dbg !701
%139 = fadd float %134, %138, !dbg !701
%140 = fsub float %134, %139, !dbg !701
%141 = fadd float %138, %140, !dbg !701
%142 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not70 = icmp eq i32 %142, 0, !dbg !701
br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %143, !dbg !701
143: ; preds = %__internal_log_ep.exit.i.i.i
%144 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701
br label %__nv_fabsf.exit.i6.i.i, !dbg !701
__nv_fabsf.exit.i6.i.i: ; preds = %143, %__internal_log_ep.exit.i.i.i
%.027.i.i = phi float [ %144, %143 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !701
%145 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !701
%.013.i.i = select i1 %145, float 0x3F40000000000000, float 4.000000e+00, !dbg !701
%146 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not71 = icmp eq i32 %146, 0, !dbg !701
br i1 %.not71, label %149, label %147, !dbg !701
147: ; preds = %__nv_fabsf.exit.i6.i.i
%148 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %139) #16, !dbg !701
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !701
149: ; preds = %__nv_fabsf.exit.i6.i.i
%150 = fmul float %139, %.013.i.i, !dbg !701
br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !701
__nv_fmul_rn.exit.i.i.i.i: ; preds = %149, %147
%.028.i.i = phi float [ %148, %147 ], [ %150, %149 ], !dbg !701
%151 = fneg float %.028.i.i, !dbg !701
%152 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not72 = icmp eq i32 %152, 0, !dbg !701
br i1 %.not72, label %155, label %153, !dbg !701
153: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%154 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %151) #16, !dbg !701
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !701
155: ; preds = %__nv_fmul_rn.exit.i.i.i.i
%156 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %151) #14, !dbg !701
br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !701
__nv_fmaf_rn.exit.i3.i.i.i: ; preds = %155, %153
%.029.i.i = phi float [ %154, %153 ], [ %156, %155 ], !dbg !701
%157 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not73 = icmp eq i32 %157, 0, !dbg !701
br i1 %.not73, label %160, label %158, !dbg !701
158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%159 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %141, float %.029.i.i) #16, !dbg !701
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !701
160: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i
%161 = call float @llvm.fma.f32(float noundef %.013.i.i, float %141, float %.029.i.i) #14, !dbg !701
br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !701
__nv_fmaf_rn.exit1.i.i.i.i: ; preds = %160, %158
%.030.i.i = phi float [ %159, %158 ], [ %161, %160 ], !dbg !701
%162 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not74 = icmp eq i32 %162, 0, !dbg !701
br i1 %.not74, label %165, label %163, !dbg !701
163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%164 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %139, float %.030.i.i) #16, !dbg !701
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !701
165: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i
%166 = call float @llvm.fma.f32(float %139, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !701
br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !701
__nv_fmaf_rn.exit2.i.i.i.i: ; preds = %165, %163
%.031.i.i = phi float [ %164, %163 ], [ %166, %165 ], !dbg !701
%167 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not75 = icmp eq i32 %167, 0, !dbg !701
br i1 %.not75, label %170, label %168, !dbg !701
168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%169 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !701
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !701
170: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i
%171 = fadd float %.028.i.i, %.031.i.i, !dbg !701
br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !701
__nv_fadd_rn.exit.i.i.i.i: ; preds = %170, %168
%.032.i.i = phi float [ %169, %168 ], [ %171, %170 ], !dbg !701
%172 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not76 = icmp eq i32 %172, 0, !dbg !701
br i1 %.not76, label %176, label %173, !dbg !701
173: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%174 = fneg float %.032.i.i, !dbg !701
%175 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %174) #16, !dbg !701
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !701
176: ; preds = %__nv_fadd_rn.exit.i.i.i.i
%177 = fsub float %.028.i.i, %.032.i.i, !dbg !701
br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !701
__nv_fadd_rn.exit3.i.i.i.i: ; preds = %176, %173
%.033.i.i = phi float [ %175, %173 ], [ %177, %176 ], !dbg !701
%178 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not77 = icmp eq i32 %178, 0, !dbg !701
br i1 %.not77, label %181, label %179, !dbg !701
179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%180 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !701
br label %__internal_dsmul.exit.i.i.i, !dbg !701
181: ; preds = %__nv_fadd_rn.exit3.i.i.i.i
%182 = fadd float %.031.i.i, %.033.i.i, !dbg !701
br label %__internal_dsmul.exit.i.i.i, !dbg !701
__internal_dsmul.exit.i.i.i: ; preds = %181, %179
%.034.i.i = phi float [ %180, %179 ], [ %182, %181 ], !dbg !701
%183 = bitcast float %.032.i.i to i32, !dbg !701
%184 = icmp eq i32 %183, 1118925336, !dbg !701
%185 = add i32 %183, -1, !dbg !701
%186 = bitcast i32 %185 to float, !dbg !701
%187 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !701
%prod.i.044.0.i.i = select i1 %184, float %187, float %.034.i.i, !dbg !701
%prod.i.145.0.i.i = select i1 %184, float %186, float %.032.i.i, !dbg !701
%188 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not78 = icmp eq i32 %188, 0, !dbg !701
br i1 %.not78, label %191, label %189, !dbg !701
189: ; preds = %__internal_dsmul.exit.i.i.i
%190 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !701
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !701
191: ; preds = %__internal_dsmul.exit.i.i.i
%192 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !701
br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !701
__nv_fmul_rn.exit.i10.i.i.i: ; preds = %191, %189
%.017.i.i = phi float [ %190, %189 ], [ %192, %191 ], !dbg !701
%193 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !701
%194 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not79 = icmp eq i32 %194, 0, !dbg !701
br i1 %.not79, label %197, label %195, !dbg !701
195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%196 = call float @llvm.nvvm.fabs.ftz.f(float %193) #16, !dbg !701
br label %__nv_fabsf.exit.i.i.i.i, !dbg !701
197: ; preds = %__nv_fmul_rn.exit.i10.i.i.i
%198 = call float @llvm.fabs.f32(float %193) #14, !dbg !701
br label %__nv_fabsf.exit.i.i.i.i, !dbg !701
__nv_fabsf.exit.i.i.i.i: ; preds = %197, %195
%.023.i.i = phi float [ %196, %195 ], [ %198, %197 ], !dbg !701
%199 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !701
%200 = bitcast float %193 to i32, !dbg !701
%201 = and i32 %200, -2147483648, !dbg !701
%202 = or i32 %201, 1123811328, !dbg !701
%203 = bitcast i32 %202 to float, !dbg !701
%j.i.i.0.i.i = select i1 %199, float %203, float %193, !dbg !701
%204 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not80 = icmp eq i32 %204, 0, !dbg !701
br i1 %.not80, label %207, label %205, !dbg !701
205: ; preds = %__nv_fabsf.exit.i.i.i.i
%206 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !701
br label %__internal_fmad.exit4.i.i.i.i, !dbg !701
207: ; preds = %__nv_fabsf.exit.i.i.i.i
%208 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !701
br label %__internal_fmad.exit4.i.i.i.i, !dbg !701
__internal_fmad.exit4.i.i.i.i: ; preds = %207, %205
%.035.i.i = phi float [ %206, %205 ], [ %208, %207 ], !dbg !701
%209 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not81 = icmp eq i32 %209, 0, !dbg !701
br i1 %.not81, label %212, label %210, !dbg !701
210: ; preds = %__internal_fmad.exit4.i.i.i.i
%211 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !701
br label %__internal_expf_kernel.exit.i.i.i, !dbg !701
212: ; preds = %__internal_fmad.exit4.i.i.i.i
%213 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !701
br label %__internal_expf_kernel.exit.i.i.i, !dbg !701
__internal_expf_kernel.exit.i.i.i: ; preds = %212, %210
%.036.i.i = phi float [ %211, %210 ], [ %213, %212 ], !dbg !701
%214 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !701
%215 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !701
%216 = bitcast float %215 to i32, !dbg !701
%217 = shl i32 %216, 23, !dbg !701
%218 = bitcast i32 %217 to float, !dbg !701
%219 = call float @llvm.nvvm.ex2.approx.ftz.f(float %214) #16, !dbg !701
%220 = fmul float %219, %218, !dbg !701
%221 = fcmp une float %220, 0x7FF0000000000000, !dbg !701
br i1 %221, label %222, label %__internal_accurate_powf.exit.i.i, !dbg !701
222: ; preds = %__internal_expf_kernel.exit.i.i.i
%223 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not94 = icmp eq i32 %223, 0, !dbg !701
br i1 %.not94, label %226, label %224, !dbg !701
224: ; preds = %222
%225 = call float @llvm.nvvm.fma.rn.ftz.f(float %220, float %prod.i.044.0.i.i, float %220) #16, !dbg !701
br label %__internal_accurate_powf.exit.i.i, !dbg !701
226: ; preds = %222
%227 = call float @llvm.fma.f32(float %220, float %prod.i.044.0.i.i, float %220) #14, !dbg !701
br label %__internal_accurate_powf.exit.i.i, !dbg !701
__internal_accurate_powf.exit.i.i: ; preds = %226, %224, %__internal_expf_kernel.exit.i.i.i
%t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %225, %224 ], [ %227, %226 ], !dbg !701
%228 = fcmp olt float %51, 0.000000e+00, !dbg !701
%229 = and i1 %228, %58, !dbg !701
%230 = bitcast float %t.i.0.i.i to i32, !dbg !701
%231 = xor i32 %230, -2147483648, !dbg !701
%232 = bitcast i32 %231 to float, !dbg !701
%.010.i.i = select i1 %229, float %232, float %t.i.0.i.i, !dbg !701
%233 = fcmp oeq float %51, 0.000000e+00, !dbg !701
%234 = fadd float %51, %51, !dbg !701
%235 = select i1 %58, float %234, float 0.000000e+00, !dbg !701
%.212.i.i = select i1 %233, float %235, float %.010.i.i, !dbg !701
%236 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not = icmp eq i32 %236, 0, !dbg !701
br i1 %.not, label %239, label %237, !dbg !701
237: ; preds = %__internal_accurate_powf.exit.i.i
%238 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701
br label %__nv_fabsf.exit.i.i.i, !dbg !701
239: ; preds = %__internal_accurate_powf.exit.i.i
%240 = call float @llvm.fabs.f32(float %51) #14, !dbg !701
br label %__nv_fabsf.exit.i.i.i, !dbg !701
__nv_fabsf.exit.i.i.i: ; preds = %239, %237
%.01.i.i = phi float [ %238, %237 ], [ %240, %239 ], !dbg !701
%241 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not82 = icmp eq i32 %241, 0, !dbg !701
br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %242, !dbg !701
242: ; preds = %__nv_fabsf.exit.i.i.i
%243 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701
br label %__nv_fabsf.exit1.i.i.i, !dbg !701
__nv_fabsf.exit1.i.i.i: ; preds = %242, %__nv_fabsf.exit.i.i.i
%.02.i.i = phi float [ %243, %242 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !701
%244 = fadd float %.01.i.i, %.02.i.i, !dbg !701
%245 = bitcast float %244 to i32, !dbg !701
%246 = icmp sgt i32 %245, 2139095039, !dbg !701
br i1 %246, label %247, label %__internal_powf_infinite_cases.exit.i.i, !dbg !701
247: ; preds = %__nv_fabsf.exit1.i.i.i
%248 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not88 = icmp eq i32 %248, 0, !dbg !701
br i1 %.not88, label %251, label %249, !dbg !701
249: ; preds = %247
%250 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701
br label %__nv_isnanf.exit.i.i.i, !dbg !701
251: ; preds = %247
%252 = call float @llvm.fabs.f32(float %51) #14, !dbg !701
br label %__nv_isnanf.exit.i.i.i, !dbg !701
__nv_isnanf.exit.i.i.i: ; preds = %251, %249
%.06.i.i = phi float [ %250, %249 ], [ %252, %251 ], !dbg !701
%253 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !701
br i1 %253, label %259, label %254, !dbg !701
254: ; preds = %__nv_isnanf.exit.i.i.i
%255 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not89 = icmp eq i32 %255, 0, !dbg !701
br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %256, !dbg !701
256: ; preds = %254
%257 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701
br label %__nv_isnanf.exit11.i.i.i, !dbg !701
__nv_isnanf.exit11.i.i.i: ; preds = %256, %254
%.07.i.i = phi float [ %257, %256 ], [ 4.000000e+00, %254 ], !dbg !701
%258 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !701
br i1 %258, label %259, label %261, !dbg !701
259: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i
%260 = fadd float %51, 4.000000e+00, !dbg !701
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701
261: ; preds = %__nv_isnanf.exit11.i.i.i
%262 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not90 = icmp eq i32 %262, 0, !dbg !701
br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %263, !dbg !701
263: ; preds = %261
%264 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701
br label %__nv_isinff.exit8.i.i.i, !dbg !701
__nv_isinff.exit8.i.i.i: ; preds = %263, %261
%.05.i.i = phi float [ %264, %263 ], [ 4.000000e+00, %261 ], !dbg !701
%265 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !701
br i1 %265, label %266, label %276, !dbg !701
266: ; preds = %__nv_isinff.exit8.i.i.i
%267 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not93 = icmp eq i32 %267, 0, !dbg !701
br i1 %.not93, label %270, label %268, !dbg !701
268: ; preds = %266
%269 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701
br label %__nv_fabsf.exit5.i.i.i, !dbg !701
270: ; preds = %266
%271 = call float @llvm.fabs.f32(float %51) #14, !dbg !701
br label %__nv_fabsf.exit5.i.i.i, !dbg !701
__nv_fabsf.exit5.i.i.i: ; preds = %270, %268
%.04.i.i = phi float [ %269, %268 ], [ %271, %270 ], !dbg !701
%272 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !701
%273 = fcmp oeq float %51, -1.000000e+00, !dbg !701
%274 = select i1 %272, float 0x7FF0000000000000, float 0.000000e+00, !dbg !701
%275 = select i1 %273, float 1.000000e+00, float %274, !dbg !701
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701
276: ; preds = %__nv_isinff.exit8.i.i.i
%277 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701
%.not91 = icmp eq i32 %277, 0, !dbg !701
br i1 %.not91, label %280, label %278, !dbg !701
278: ; preds = %276
%279 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701
br label %__nv_isinff.exit.i.i.i, !dbg !701
280: ; preds = %276
%281 = call float @llvm.fabs.f32(float %51) #14, !dbg !701
br label %__nv_isinff.exit.i.i.i, !dbg !701
__nv_isinff.exit.i.i.i: ; preds = %280, %278
%.03.i.i = phi float [ %279, %278 ], [ %281, %280 ], !dbg !701
%282 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !701
br i1 %282, label %283, label %__internal_powf_infinite_cases.exit.i.i, !dbg !701
283: ; preds = %__nv_isinff.exit.i.i.i
%284 = select i1 %229, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !701
br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701
__internal_powf_infinite_cases.exit.i.i: ; preds = %283, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %259, %__nv_fabsf.exit1.i.i.i
%.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %260, %259 ], [ %275, %__nv_fabsf.exit5.i.i.i ], [ %284, %283 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !701
%285 = fcmp oeq float %51, 1.000000e+00, !dbg !701
%t.0.i.i = select i1 %285, float 1.000000e+00, float %.3.i.i, !dbg !701
%286 = fadd float %value_phi13.i, %t.0.i.i, !dbg !706
%.not83 = icmp eq i64 %iv.next, %13, !dbg !707
%287 = add nuw i64 %iv.next, 1, !dbg !709
br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !710
L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i
br label %L467.i, !dbg !711
L467.i: ; preds = %L467.i.loopexit, %L302.i
%value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %286, %L467.i.loopexit ]
%288 = fpext float %value_phi17.i to double, !dbg !711
%289 = call i32 @llvm.nvvm.d2i.hi(double %288) #16, !dbg !717
%290 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !717
%291 = and i32 %290, 2146435072, !dbg !717
%292 = icmp eq i32 %291, 1072693248, !dbg !717
%293 = call double @llvm.fabs.f64(double %288) #14, !dbg !717
%294 = call fastcc double @__internal_accurate_pow(double %293) #16, !dbg !717
%295 = icmp slt i32 %289, 0, !dbg !717
%296 = and i1 %295, %292, !dbg !717
br i1 %296, label %297, label %302, !dbg !717
297: ; preds = %L467.i
%298 = call i32 @llvm.nvvm.d2i.hi(double %294) #16, !dbg !717
%299 = call i32 @llvm.nvvm.d2i.lo(double %294) #16, !dbg !717
%300 = xor i32 %298, -2147483648, !dbg !717
%301 = call double @llvm.nvvm.lohi.i2d(i32 %299, i32 %300) #16, !dbg !717
br label %302, !dbg !717
302: ; preds = %297, %L467.i
%t.0.i37.i = phi double [ %301, %297 ], [ %294, %L467.i ], !dbg !717
%303 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !717
br i1 %303, label %304, label %308, !dbg !717
304: ; preds = %302
%spec.select = select i1 %292, i32 %289, i32 0, !dbg !717
%305 = icmp slt i32 %290, 0, !dbg !717
%306 = or i32 %spec.select, 2146435072, !dbg !717
%thi.1.i.i = select i1 %305, i32 %306, i32 %spec.select, !dbg !717
%307 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !717
br label %309, !dbg !717
308: ; preds = %302
%spec.select97 = select i1 %295, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !717
br label %309, !dbg !717
309: ; preds = %308, %304
%t.2.i.i = phi double [ %307, %304 ], [ %spec.select97, %308 ], !dbg !717
%310 = fadd double %288, 2.500000e-01, !dbg !717
%311 = call i32 @llvm.nvvm.d2i.hi(double %310) #16, !dbg !717
%312 = and i32 %311, 2146435072, !dbg !717
%313 = icmp eq i32 %312, 2146435072, !dbg !717
br i1 %313, label %314, label %__nv_pow.exit.i, !dbg !717
314: ; preds = %309
%315 = fcmp ugt double %293, 0x7FF0000000000000, !dbg !717
br i1 %315, label %__nv_pow.exit.i, label %316, !dbg !717
316: ; preds = %314
%317 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%318 = icmp eq i32 %317, 200, !dbg !717
br i1 %318, label %.critedge, label %319, !dbg !717
319: ; preds = %316
%320 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%321 = icmp eq i32 %320, 350, !dbg !717
br i1 %321, label %.critedge, label %322, !dbg !717
322: ; preds = %319
%323 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%324 = icmp eq i32 %323, 370, !dbg !717
br i1 %324, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !717
__nv_isinfd.exit4.i.i: ; preds = %322
%325 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !717
%326 = and i32 %290, 2147483647, !dbg !717
%327 = icmp eq i32 %326, 2146435072, !dbg !717
%328 = icmp eq i32 %325, 0, !dbg !717
%329 = and i1 %328, %327, !dbg !717
br i1 %329, label %330, label %.critedge, !dbg !717
330: ; preds = %__nv_isinfd.exit4.i.i
%331 = fcmp ogt double %293, 1.000000e+00, !dbg !717
%thi.2.i.i = select i1 %331, i32 2146435072, i32 0, !dbg !717
%332 = icmp slt i32 %290, 0, !dbg !717
%333 = xor i32 %thi.2.i.i, 2146435072
%spec.select8 = select i1 %332, i32 %333, i32 %thi.2.i.i, !dbg !717
%334 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !717
%thi.4.i.i = select i1 %334, i32 1072693248, i32 %spec.select8, !dbg !717
%335 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !717
br label %__nv_pow.exit.i, !dbg !717
.critedge: ; preds = %__nv_isinfd.exit4.i.i, %322, %319, %316
%336 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%337 = icmp eq i32 %336, 200, !dbg !717
br i1 %337, label %344, label %338, !dbg !717
338: ; preds = %.critedge
%339 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%340 = icmp eq i32 %339, 350, !dbg !717
br i1 %340, label %344, label %341, !dbg !717
341: ; preds = %338
%342 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717
%343 = icmp eq i32 %342, 370, !dbg !717
br i1 %343, label %344, label %346, !dbg !717
344: ; preds = %341, %338, %.critedge
%345 = fcmp oeq double %293, 0x7FF0000000000000, !dbg !717
br label %__nv_isinfd.exit.i.i, !dbg !717
346: ; preds = %341
%347 = call i32 @llvm.nvvm.d2i.lo(double %288) #16, !dbg !717
%348 = and i32 %289, 2147483647, !dbg !717
%349 = icmp eq i32 %348, 2146435072, !dbg !717
%350 = icmp eq i32 %347, 0, !dbg !717
%351 = and i1 %350, %349, !dbg !717
br label %__nv_isinfd.exit.i.i, !dbg !717
__nv_isinfd.exit.i.i: ; preds = %346, %344
%.0.i39.i.in = phi i1 [ %345, %344 ], [ %351, %346 ]
br i1 %.0.i39.i.in, label %352, label %__nv_pow.exit.i, !dbg !717
352: ; preds = %__nv_isinfd.exit.i.i
%.inv87 = icmp slt i32 %290, 0, !dbg !717
%spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !717
%353 = and i32 %290, 2147483647, !dbg !717
%354 = icmp ne i32 %353, 1071644672, !dbg !717
%or.cond11 = and i1 %354, %296, !dbg !717
%355 = or i32 %spec.select9, -2147483648, !dbg !717
%thi.6.i.i = select i1 %or.cond11, i32 %355, i32 %spec.select9, !dbg !717
%356 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !717
br label %__nv_pow.exit.i, !dbg !717
__nv_pow.exit.i: ; preds = %352, %__nv_isinfd.exit.i.i, %330, %314, %309
%t.6.i.i = phi double [ %t.2.i.i, %309 ], [ %335, %330 ], [ %356, %352 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %310, %314 ], !dbg !717
%357 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !718
%358 = select i1 %357, i64 %.fca.2.0.extract, i64 0, !dbg !718
%359 = load i64, i64* %11, align 8, !dbg !728, !tbaa !301
%360 = load i64, i64* %12, align 8, !dbg !732, !tbaa !301
%361 = add i64 %360, -1, !dbg !732
%362 = mul i64 %361, %358, !dbg !735
%363 = add i64 %362, %359, !dbg !736
%364 = icmp sgt i64 %.fca.3.extract, 0, !dbg !737
%365 = select i1 %364, i64 %.fca.3.extract, i64 0, !dbg !737
%366 = icmp slt i64 %363, 1, !dbg !746
%367 = icmp sgt i64 %363, %365, !dbg !746
%368 = or i1 %366, %367, !dbg !748
br i1 %368, label %L493.i, label %L491.i, !dbg !748
L491.i: ; preds = %__nv_pow.exit.i
%369 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !717
%370 = fptrunc double %t.6.i.i to float, !dbg !749
%371 = select i1 %369, float 1.000000e+00, float %370, !dbg !749
%372 = add nsw i64 %363, -1, !dbg !751
%373 = getelementptr inbounds float, float addrspace(1)* %10, i64 %372, !dbg !758
store float %371, float addrspace(1)* %373, align 4, !dbg !758, !tbaa !435
br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !759
L493.i: ; preds = %__nv_pow.exit.i
%374 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !748
unreachable, !dbg !748
fail.i: ; preds = %entry
call fastcc void @gpu_report_exception() #14, !dbg !655
call fastcc void @gpu_signal_exception() #14, !dbg !655
call void asm sideeffect "exit;", ""() #16, !dbg !655
unreachable, !dbg !655
pass.i: ; preds = %entry
%375 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !760, !range !127
%376 = udiv i64 %9, %.fca.1.0.0.0.0.extract, !dbg !655
%377 = mul i64 %376, %.fca.1.0.0.0.0.extract, !dbg !765
%378 = sub i64 %9, %377, !dbg !767
%379 = zext i32 %375 to i64, !dbg !653
%380 = lshr i64 %379, 8, !dbg !655
%381 = and i64 %379, 255, !dbg !767
%382 = add nuw nsw i64 %381, 1, !dbg !768
%383 = shl i64 %378, 8, !dbg !769
%384 = add i64 %382, %383, !dbg !773
%385 = add nuw nsw i64 %376, 1, !dbg !774
%386 = add nuw nsw i64 %385, %380, !dbg !773
%387 = icmp sgt i64 %384, 0, !dbg !777
%388 = icmp sle i64 %384, %.fca.0.0.0.0.extract, !dbg !777
%389 = and i1 %387, %388, !dbg !782
%390 = icmp sle i64 %386, %.fca.0.0.1.0.extract, !dbg !777
%391 = and i1 %390, %389, !dbg !783
br i1 %391, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !785
julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i
call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %_replacementA78) #14, !dbg !786
br label %invertjulia_gpu_dist_kernel__5526_inner.exit, !dbg !629
allocsForInversion: ; No predecessors!
%"iv'ac" = alloca i64, align 8
%_augmented_cache = alloca i64, align 8
store i64 0, i64* %_augmented_cache, align 8
%"value_phi13.i'de" = alloca float, align 4
store float 0.000000e+00, float* %"value_phi13.i'de", align 4
%"'de" = alloca float, align 4
store float 0.000000e+00, float* %"'de", align 4
%"'de80" = alloca float, align 4
store float 0.000000e+00, float* %"'de80", align 4
%"'de81" = alloca float, align 4
store float 0.000000e+00, float* %"'de81", align 4
%_cache = alloca i64, align 8
store i64 0, i64* %_cache, align 8
%.not59_cache = alloca i1*, align 8
%"'de87" = alloca float, align 4
store float 0.000000e+00, float* %"'de87", align 4
invertentry: ; No predecessors!
ret void
invertL302.i: ; preds = %invertL319.i.preheader
%392 = load i64, i64* %_augmented_cache, align 8, !invariant.group !663
call fastcc void @diffejulia___index_Global_NTuple_5574([2 x i64]* %4, [2 x i64]* %"'ipa79", { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %6, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %"'ipc", i64 %392), !dbg !658
br label %invertpass.i
invertL319.i.preheader: ; preds = %invertL319.i
%393 = load i64, i64* %"iv'ac", align 8
%forfree = load i1*, i1** %.not59_cache, align 1, !dereferenceable !787, !invariant.group !675
%394 = bitcast i1* %forfree to i8*
tail call void @free(i8* nonnull %394), !dbg !629
br label %invertL302.i
invertL319.i: ; preds = %invertL340.i
%395 = load float, float* %"value_phi13.i'de", align 4
store float 0.000000e+00, float* %"value_phi13.i'de", align 4
%396 = load i64, i64* %"iv'ac", align 8
%397 = icmp eq i64 %396, 0
%398 = xor i1 %397, true
%399 = select fast i1 %398, float %395, float 0.000000e+00
%400 = load float, float* %"'de", align 4
%401 = fadd fast float %400, %395
%402 = select fast i1 %397, float %400, float %401
store float %402, float* %"'de", align 4
br i1 %397, label %invertL319.i.preheader, label %incinvertL319.i
incinvertL319.i: ; preds = %invertL319.i
%403 = load i64, i64* %"iv'ac", align 8
%404 = add nsw i64 %403, -1
store i64 %404, i64* %"iv'ac", align 8
br label %invert__internal_powf_infinite_cases.exit.i.i
invertL340.i: ; preds = %invertL405.i
br label %invertL319.i
invertL342.i: ; No predecessors!
invertL405.i: ; preds = %invert__nv_fabsf.exit.i.i, %invert
%405 = load float, float* %"'de80", align 4
%406 = fneg fast float %405
store float 0.000000e+00, float* %"'de80", align 4
%407 = load float, float* %"'de81", align 4
%408 = fadd fast float %407, %406
store float %408, float* %"'de81", align 4
%409 = load float, float* %"'de81", align 4
store float 0.000000e+00, float* %"'de81", align 4
%410 = load i64, i64* %"iv'ac", align 8
%411 = load i64, i64* %_cache, align 8, !invariant.group !674
%iv.next_unwrap = add nuw nsw i64 %410, 1
%_unwrap = add i64 %411, %iv.next_unwrap
%_unwrap83 = shl i64 %_unwrap, 2
%_unwrap84 = add i64 %_unwrap83, -4
%"'ipg_unwrap" = getelementptr i8, i8 addrspace(1)* %".fca.0.extract12'ipev", i64 %_unwrap84
%"'ipc82_unwrap" = bitcast i8 addrspace(1)* %"'ipg_unwrap" to float addrspace(1)*
%412 = atomicrmw fadd float addrspace(1)* %"'ipc82_unwrap", float %409 monotonic
br label %invertL340.i
invertL407.i: ; No predecessors!
invert: ; preds = %invert__nv_fabsf.exit.i.i
br label %invertL405.i
invert__nv_fabsf.exit.i.i: ; No predecessors!
%413 = load i64, i64* %"iv'ac", align 8
%.fca.2.0.extract34_unwrap = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0
%.inv_unwrap = icmp sgt i64 %.fca.2.0.extract34_unwrap, 0
%_unwrap85 = select i1 %.inv_unwrap, i64 %.fca.2.0.extract34_unwrap, i64 0
%_unwrap86 = add nsw i64 %_unwrap85, -1
%414 = add nuw i64 %_unwrap86, 1
%415 = load i1*, i1** %.not59_cache, align 8, !dereferenceable !704, !invariant.group !675
%416 = getelementptr inbounds i1, i1* %415, i64 %413
%417 = load i1, i1* %416, align 1, !invariant.group !705
br i1 %417, label %invertL405.i, label %invert
invert1: ; No predecessors!
%418 = load float, float* %"'de87", align 4
store float 0.000000e+00, float* %"'de87", align 4
invert2: ; No predecessors!
invert__nv_fabsf.exit1.i.i: ; No predecessors!
invert3: ; No predecessors!
invert4: ; No predecessors!
invert__internal_fmad.exit.i.i.i.i: ; No predecessors!
invert5: ; No predecessors!
invert6: ; No predecessors!
invert__internal_fmad.exit3.i.i.i.i: ; No predecessors!
invert7: ; No predecessors!
invert8: ; No predecessors!
invert__nv_fmul_rn.exit4.i.i.i.i: ; No predecessors!
invert9: ; No predecessors!
invert10: ; No predecessors!
invert__nv_fmul_rn.exit5.i.i.i.i: ; No predecessors!
invert11: ; No predecessors!
invert12: ; No predecessors!
invert__nv_fmaf_rn.exit.i.i.i.i: ; No predecessors!
invert13: ; No predecessors!
invert14: ; No predecessors!
invert__nv_fmul_rn.exit6.i.i.i.i: ; No predecessors!
invert15: ; No predecessors!
invert16: ; No predecessors!
invert__nv_fmul_rn.exit2.i.i.i.i: ; No predecessors!
invert17: ; No predecessors!
invert18: ; No predecessors!
invert__internal_log_ep.exit.i.i.i: ; No predecessors!
invert19: ; No predecessors!
invert__nv_fabsf.exit.i6.i.i: ; No predecessors!
invert20: ; No predecessors!
invert21: ; No predecessors!
invert__nv_fmul_rn.exit.i.i.i.i: ; No predecessors!
invert22: ; No predecessors!
invert23: ; No predecessors!
invert__nv_fmaf_rn.exit.i3.i.i.i: ; No predecessors!
invert24: ; No predecessors!
invert25: ; No predecessors!
invert__nv_fmaf_rn.exit1.i.i.i.i: ; No predecessors!
invert26: ; No predecessors!
invert27: ; No predecessors!
invert__nv_fmaf_rn.exit2.i.i.i.i: ; No predecessors!
invert28: ; No predecessors!
invert29: ; No predecessors!
invert__nv_fadd_rn.exit.i.i.i.i: ; No predecessors!
invert30: ; No predecessors!
invert31: ; No predecessors!
invert__nv_fadd_rn.exit3.i.i.i.i: ; No predecessors!
invert32: ; No predecessors!
invert33: ; No predecessors!
invert__internal_dsmul.exit.i.i.i: ; No predecessors!
invert34: ; No predecessors!
invert35: ; No predecessors!
invert__nv_fmul_rn.exit.i10.i.i.i: ; No predecessors!
invert36: ; No predecessors!
invert37: ; No predecessors!
invert__nv_fabsf.exit.i.i.i.i: ; No predecessors!
invert38: ; No predecessors!
invert39: ; No predecessors!
invert__internal_fmad.exit4.i.i.i.i: ; No predecessors!
invert40: ; No predecessors!
invert41: ; No predecessors!
invert__internal_expf_kernel.exit.i.i.i: ; No predecessors!
invert42: ; No predecessors!
invert43: ; No predecessors!
invert44: ; No predecessors!
invert__internal_accurate_powf.exit.i.i: ; No predecessors!
invert45: ; No predecessors!
invert46: ; No predecessors!
invert__nv_fabsf.exit.i.i.i: ; No predecessors!
invert47: ; No predecessors!
invert__nv_fabsf.exit1.i.i.i: ; No predecessors!
invert48: ; No predecessors!
invert49: ; No predecessors!
invert50: ; No predecessors!
invert__nv_isnanf.exit.i.i.i: ; No predecessors!
invert51: ; No predecessors!
invert52: ; No predecessors!
invert__nv_isnanf.exit11.i.i.i: ; No predecessors!
invert53: ; No predecessors!
invert54: ; No predecessors!
invert55: ; No predecessors!
invert__nv_isinff.exit8.i.i.i: ; No predecessors!
invert56: ; No predecessors!
invert57: ; No predecessors!
invert58: ; No predecessors!
invert__nv_fabsf.exit5.i.i.i: ; No predecessors!
invert59: ; No predecessors!
invert60: ; No predecessors!
invert61: ; No predecessors!
invert__nv_isinff.exit.i.i.i: ; No predecessors!
invert62: ; No predecessors!
invert__internal_powf_infinite_cases.exit.i.i: ; preds = %incinvertL319.i
invertL467.i.loopexit: ; No predecessors!
invertL467.i: ; No predecessors!
invert63: ; No predecessors!
invert64: ; No predecessors!
invert65: ; No predecessors!
invert66: ; No predecessors!
invert67: ; No predecessors!
invert68: ; No predecessors!
invert69: ; No predecessors!
invert70: ; No predecessors!
invert71: ; No predecessors!
invert__nv_isinfd.exit4.i.i: ; No predecessors!
invert72: ; No predecessors!
invert.critedge: ; No predecessors!
invert73: ; No predecessors!
invert74: ; No predecessors!
invert75: ; No predecessors!
invert76: ; No predecessors!
invert__nv_isinfd.exit.i.i: ; No predecessors!
invert77: ; No predecessors!
invert__nv_pow.exit.i: ; No predecessors!
invertL491.i: ; No predecessors!
invertL493.i: ; No predecessors!
invertfail.i: ; No predecessors!
invertpass.i: ; preds = %invertL302.i
invertjulia_gpu_dist_kernel__5526_inner.exit: ; preds = %julia_gpu_dist_kernel__5526_inner.exit
}
cannot handle (reverse) unknown intrinsic
llvm.nvvm.fabs.ftz.f
%59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !187
Stacktrace:
[1] #^
@ ~/.julia/packages/CUDA/DfvRa/src/device/intrinsics/math.jl:233
[2] #^
@ ~/.julia/packages/CUDA/DfvRa/src/device/intrinsics/math.jl:243
[3] literal_pow
@ ./intfuncs.jl:316
[4] macro expansion
@ ~/proj/mnistjl/distances.jl:43
[5] gpu_dist_kernel!
@ ~/.julia/packages/KernelAbstractions/1ZLga/src/macros.jl:80
[6] gpu_dist_kernel! (repeats 2 times)
@ ./none:0
Stacktrace:
[1] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:2636
[2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool)
@ Enzyme.API ~/.julia/packages/Enzyme/di3zM/src/api.jl:111
[3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}, mod::LLVM.Module, primalf::LLVM.Function, adjoint::GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{Const{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}}, Const{CuDeviceMatrix{Float32, 1}}, Duplicated{CuDeviceMatrix{Float32, 1}}, Const{CuDeviceMatrix{Float32, 1}}}}, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, dupClosure::Bool, wrap::Bool, modifiedBetween::Bool, returnPrimal::Bool)
@ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:3271
[4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.Context, strip::Bool, validate::Bool, only_entry::Bool, parent_job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}})
@ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:4158
[5] (::GPUCompiler.var"#114#117"{LLVM.Context, GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}})()
@ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:296
[6] get!(default::GPUCompiler.var"#114#117"{LLVM.Context, GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}}, h::Dict{GPUCompiler.CompilerJob, String}, key::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}})
@ Base ./dict.jl:464
[7] macro expansion
@ ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:295 [inlined]
[8] emit_llvm(job::GPUCompiler.CompilerJob, method_instance::Any; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, ctx::LLVM.Context)
@ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/utils.jl:64
[9] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context)
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:353
[10] #224
@ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:347 [inlined]
[11] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}})
@ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:76
[12] cufunction_compile(job::GPUCompiler.CompilerJob)
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:346
[13] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link))
@ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/cache.jl:90
[14] cufunction(f::KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}})
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299
[15] cufunction(f::KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}})
@ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:293
[16] macro expansion
@ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:102 [inlined]
[17] (::KernelAbstractions.Kernel{CUDADevice, KernelAbstractions.NDIteration.StaticSize{(256,)}, KernelAbstractions.NDIteration.DynamicSize, KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}})(::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64}, dependencies::CUDAKernels.CudaEvent, workgroupsize::Nothing, progress::Function)
@ CUDAKernels ~/.julia/packages/CUDAKernels/YWLqR/src/CUDAKernels.jl:273
[18] Δdist!(x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, y::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, result::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ∂y::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer})
@ Main ~/proj/mnistjl/distances.jl:62
[19] top-level scope
@ ./timing.jl:220
in expression starting at /home/tmb/proj/mnistjl/distances.jl:77
[deps]
CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba"
CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57"
Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9"
KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c"
KernelGradients = "e5faadeb-7f6c-408e-9747-a7a26e81c66a"
Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40"
Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment