Created
August 12, 2022 01:48
-
-
Save tmbdev/b1ed5c3964fc4ded0d530dc2869e0aaf to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using Enzyme | |
| using CUDA | |
| using CUDAKernels | |
| using KernelAbstractions | |
| using KernelGradients | |
| using Test | |
| CUDA.allowscalar(false) | |
| device = CUDA.device() | |
| @show device | |
| ## | |
| d = 10 | |
| n = 20 | |
| m = 25 | |
| a, b, result = rand(Float32, d, n), rand(Float32, d, m), zeros(Float32, n, m) | |
| ca, cb, cresult = CuArray(a), CuArray(b), CuArray(result) | |
| function kconfig(a::Array) | |
| return (CPU(), 4) | |
| end | |
| function kconfig(a::CuArray) | |
| return (CUDADevice(), 256) | |
| end | |
| function same_device(xs...) | |
| device = KernelAbstractions.get_device(xs[1]) | |
| for x in xs | |
| if KernelAbstractions.get_device(x) != device | |
| return false | |
| end | |
| end | |
| return true | |
| end | |
| @kernel function dist_kernel!(@Const(x), @Const(y), result) | |
| i, j = @index(Global, NTuple) | |
| tmp = zero(eltype(result)) | |
| for k = 1:size(x, 1) | |
| tmp += (x[k, i] - y[k, j]) ^ 4 | |
| end | |
| tmp = tmp ^ (1.0 / 4) | |
| result[i, j] = tmp | |
| end | |
| function dist!(x, y, result) | |
| @assert size(x, 1) == size(y, 1) | |
| @assert size(x, 2) == size(result, 1) | |
| @assert size(y, 2) == size(result, 2) | |
| @assert same_device(x, y, result) | |
| kernel! = dist_kernel!(kconfig(x)...) | |
| ev = kernel!(x, y, result; ndrange=size(result)) | |
| wait(ev) | |
| end | |
| function Δdist!(x, y, result, ∂y) | |
| @assert same_device(x, y, result, ∂y) | |
| deriv = Enzyme.autodiff(dist_kernel!(kconfig(x)...)) | |
| ev = deriv(x, Duplicated(y, ∂y), result; ndrange=size(result)) | |
| wait(ev) | |
| end | |
| println("cpu") | |
| @time dist!(a, b, result) | |
| @time dist!(a, b, result) | |
| ∂b = zero(b) | |
| @time Δdist!(a, b, result, ∂b) | |
| @time Δdist!(a, b, result, ∂b) | |
| println("gpu") | |
| @time dist!(ca, cb, cresult) | |
| @time dist!(ca, cb, cresult) | |
| c∂b = zero(cb) | |
| @time Δdist!(ca, cb, cresult, c∂b) | |
| @time Δdist!(ca, cb, cresult, c∂b) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # This file is machine-generated - editing it directly is not advised | |
| julia_version = "1.7.3" | |
| manifest_format = "2.0" | |
| [[deps.AbstractFFTs]] | |
| deps = ["ChainRulesCore", "LinearAlgebra"] | |
| git-tree-sha1 = "69f7020bd72f069c219b5e8c236c1fa90d2cb409" | |
| uuid = "621f4979-c628-5d54-868e-fcf4e3e8185c" | |
| version = "1.2.1" | |
| [[deps.Adapt]] | |
| deps = ["LinearAlgebra"] | |
| git-tree-sha1 = "195c5505521008abea5aee4f96930717958eac6f" | |
| uuid = "79e6a3ab-5dfb-504d-930d-738a2a938a0e" | |
| version = "3.4.0" | |
| [[deps.ArgTools]] | |
| uuid = "0dad84c5-d112-42e6-8d28-ef12dabb789f" | |
| [[deps.Artifacts]] | |
| uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" | |
| [[deps.Atomix]] | |
| deps = ["UnsafeAtomics"] | |
| git-tree-sha1 = "c06a868224ecba914baa6942988e2f2aade419be" | |
| uuid = "a9b6321e-bd34-4604-b9c9-b65b8de01458" | |
| version = "0.1.0" | |
| [[deps.BFloat16s]] | |
| deps = ["LinearAlgebra", "Printf", "Random", "Test"] | |
| git-tree-sha1 = "a598ecb0d717092b5539dbbe890c98bac842b072" | |
| uuid = "ab4f0b2a-ad5b-11e8-123f-65d77653426b" | |
| version = "0.2.0" | |
| [[deps.Base64]] | |
| uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" | |
| [[deps.CEnum]] | |
| git-tree-sha1 = "eb4cb44a499229b3b8426dcfb5dd85333951ff90" | |
| uuid = "fa961155-64e5-5f13-b03f-caf6b980ea82" | |
| version = "0.4.2" | |
| [[deps.CUDA]] | |
| deps = ["AbstractFFTs", "Adapt", "BFloat16s", "CEnum", "CompilerSupportLibraries_jll", "ExprTools", "GPUArrays", "GPUCompiler", "LLVM", "LazyArtifacts", "Libdl", "LinearAlgebra", "Logging", "Printf", "Random", "Random123", "RandomNumbers", "Reexport", "Requires", "SparseArrays", "SpecialFunctions", "TimerOutputs"] | |
| git-tree-sha1 = "49549e2c28ffb9cc77b3689dc10e46e6271e9452" | |
| uuid = "052768ef-5323-5732-b1bb-66c8b64840ba" | |
| version = "3.12.0" | |
| [[deps.CUDAKernels]] | |
| deps = ["Adapt", "CUDA", "KernelAbstractions", "StaticArrays", "UnsafeAtomicsLLVM"] | |
| git-tree-sha1 = "bbab4d1a4001ec322c384dfff0889cec4118da93" | |
| uuid = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" | |
| version = "0.4.3" | |
| [[deps.ChainRulesCore]] | |
| deps = ["Compat", "LinearAlgebra", "SparseArrays"] | |
| git-tree-sha1 = "80ca332f6dcb2508adba68f22f551adb2d00a624" | |
| uuid = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" | |
| version = "1.15.3" | |
| [[deps.ChangesOfVariables]] | |
| deps = ["ChainRulesCore", "LinearAlgebra", "Test"] | |
| git-tree-sha1 = "38f7a08f19d8810338d4f5085211c7dfa5d5bdd8" | |
| uuid = "9e997f8a-9a97-42d5-a9f1-ce6bfc15e2c0" | |
| version = "0.1.4" | |
| [[deps.Compat]] | |
| deps = ["Dates", "LinearAlgebra", "UUIDs"] | |
| git-tree-sha1 = "924cdca592bc16f14d2f7006754a621735280b74" | |
| uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" | |
| version = "4.1.0" | |
| [[deps.CompilerSupportLibraries_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" | |
| [[deps.Dates]] | |
| deps = ["Printf"] | |
| uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" | |
| [[deps.DiffRules]] | |
| deps = ["IrrationalConstants", "LogExpFunctions", "NaNMath", "Random", "SpecialFunctions"] | |
| git-tree-sha1 = "28d605d9a0ac17118fe2c5e9ce0fbb76c3ceb120" | |
| uuid = "b552c78f-8df3-52c6-915a-8e097449b14b" | |
| version = "1.11.0" | |
| [[deps.DocStringExtensions]] | |
| deps = ["LibGit2"] | |
| git-tree-sha1 = "5158c2b41018c5f7eb1470d558127ac274eca0c9" | |
| uuid = "ffbed154-4ef7-542d-bbb7-c09d3a79fcae" | |
| version = "0.9.1" | |
| [[deps.Downloads]] | |
| deps = ["ArgTools", "FileWatching", "LibCURL", "NetworkOptions"] | |
| uuid = "f43a241f-c20a-4ad4-852c-f6b1247861c6" | |
| [[deps.Enzyme]] | |
| deps = ["Adapt", "CEnum", "Enzyme_jll", "GPUCompiler", "LLVM", "Libdl", "LinearAlgebra", "ObjectFile", "Printf", "Random"] | |
| git-tree-sha1 = "8ab9eb44fbcfc9161b3f81be7814a7618f2a3460" | |
| uuid = "7da242da-08ed-463a-9acd-ee780be4f1d9" | |
| version = "0.10.4" | |
| [[deps.Enzyme_jll]] | |
| deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"] | |
| git-tree-sha1 = "722aa3b554e883118e0e3111629ec40e176cee2c" | |
| uuid = "7cc45869-7501-5eee-bdea-0790c847d4ef" | |
| version = "0.0.33+0" | |
| [[deps.ExprTools]] | |
| git-tree-sha1 = "56559bbef6ca5ea0c0818fa5c90320398a6fbf8d" | |
| uuid = "e2ba6199-217a-4e67-a87a-7c52f15ade04" | |
| version = "0.1.8" | |
| [[deps.FileWatching]] | |
| uuid = "7b1f6079-737a-58dc-b8bc-7a2ca5c1b5ee" | |
| [[deps.GPUArrays]] | |
| deps = ["Adapt", "GPUArraysCore", "LLVM", "LinearAlgebra", "Printf", "Random", "Reexport", "Serialization", "Statistics"] | |
| git-tree-sha1 = "73145f1d724b5ee0e90098aec39a65e9697429a6" | |
| uuid = "0c68f7d7-f131-5f86-a1c3-88cf8149b2d7" | |
| version = "8.4.2" | |
| [[deps.GPUArraysCore]] | |
| deps = ["Adapt"] | |
| git-tree-sha1 = "d88b17a38322e153c519f5a9ed8d91e9baa03d8f" | |
| uuid = "46192b85-c4d5-4398-a991-12ede77f4527" | |
| version = "0.1.1" | |
| [[deps.GPUCompiler]] | |
| deps = ["ExprTools", "InteractiveUtils", "LLVM", "Libdl", "Logging", "TimerOutputs", "UUIDs"] | |
| git-tree-sha1 = "122d7bcc92abf94cf1a86281ad7a4d0e838ab9e0" | |
| uuid = "61eb1bfa-7361-4325-ad38-22787b887f55" | |
| version = "0.16.3" | |
| [[deps.InteractiveUtils]] | |
| deps = ["Markdown"] | |
| uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" | |
| [[deps.InverseFunctions]] | |
| deps = ["Test"] | |
| git-tree-sha1 = "b3364212fb5d870f724876ffcd34dd8ec6d98918" | |
| uuid = "3587e190-3f89-42d0-90ee-14403ec27112" | |
| version = "0.1.7" | |
| [[deps.IrrationalConstants]] | |
| git-tree-sha1 = "7fd44fd4ff43fc60815f8e764c0f352b83c49151" | |
| uuid = "92d709cd-6900-40b7-9082-c6be49f344b6" | |
| version = "0.1.1" | |
| [[deps.JLLWrappers]] | |
| deps = ["Preferences"] | |
| git-tree-sha1 = "abc9885a7ca2052a736a600f7fa66209f96506e1" | |
| uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" | |
| version = "1.4.1" | |
| [[deps.KernelAbstractions]] | |
| deps = ["Adapt", "Atomix", "InteractiveUtils", "LinearAlgebra", "MacroTools", "SparseArrays", "StaticArrays", "UUIDs", "UnsafeAtomics", "UnsafeAtomicsLLVM"] | |
| git-tree-sha1 = "02838ecfc5f925ac408ffe6b359ac59ef8865272" | |
| uuid = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | |
| version = "0.8.3" | |
| [[deps.KernelGradients]] | |
| deps = ["Enzyme", "KernelAbstractions"] | |
| git-tree-sha1 = "6dbcc9f869625fa50e1c7483f1c4200c65f17f9c" | |
| uuid = "e5faadeb-7f6c-408e-9747-a7a26e81c66a" | |
| version = "0.1.2" | |
| [[deps.LLVM]] | |
| deps = ["CEnum", "LLVMExtra_jll", "Libdl", "Printf", "Unicode"] | |
| git-tree-sha1 = "e7e9184b0bf0158ac4e4aa9daf00041b5909bf1a" | |
| uuid = "929cbde3-209d-540e-8aea-75f648917ca0" | |
| version = "4.14.0" | |
| [[deps.LLVMExtra_jll]] | |
| deps = ["Artifacts", "JLLWrappers", "LazyArtifacts", "Libdl", "Pkg", "TOML"] | |
| git-tree-sha1 = "771bfe376249626d3ca12bcd58ba243d3f961576" | |
| uuid = "dad2f222-ce93-54a1-a47d-0025e8a3acab" | |
| version = "0.0.16+0" | |
| [[deps.LazyArtifacts]] | |
| deps = ["Artifacts", "Pkg"] | |
| uuid = "4af54fe1-eca0-43a8-85a7-787d91b784e3" | |
| [[deps.LibCURL]] | |
| deps = ["LibCURL_jll", "MozillaCACerts_jll"] | |
| uuid = "b27032c2-a3e7-50c8-80cd-2d36dbcbfd21" | |
| [[deps.LibCURL_jll]] | |
| deps = ["Artifacts", "LibSSH2_jll", "Libdl", "MbedTLS_jll", "Zlib_jll", "nghttp2_jll"] | |
| uuid = "deac9b47-8bc7-5906-a0fe-35ac56dc84c0" | |
| [[deps.LibGit2]] | |
| deps = ["Base64", "NetworkOptions", "Printf", "SHA"] | |
| uuid = "76f85450-5226-5b5a-8eaa-529ad045b433" | |
| [[deps.LibSSH2_jll]] | |
| deps = ["Artifacts", "Libdl", "MbedTLS_jll"] | |
| uuid = "29816b5a-b9ab-546f-933c-edad1886dfa8" | |
| [[deps.Libdl]] | |
| uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" | |
| [[deps.LinearAlgebra]] | |
| deps = ["Libdl", "libblastrampoline_jll"] | |
| uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" | |
| [[deps.LogExpFunctions]] | |
| deps = ["ChainRulesCore", "ChangesOfVariables", "DocStringExtensions", "InverseFunctions", "IrrationalConstants", "LinearAlgebra"] | |
| git-tree-sha1 = "361c2b088575b07946508f135ac556751240091c" | |
| uuid = "2ab3a3ac-af41-5b50-aa03-7779005ae688" | |
| version = "0.3.17" | |
| [[deps.Logging]] | |
| uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" | |
| [[deps.MacroTools]] | |
| deps = ["Markdown", "Random"] | |
| git-tree-sha1 = "3d3e902b31198a27340d0bf00d6ac452866021cf" | |
| uuid = "1914dd2f-81c6-5fcd-8719-6d5c9610ff09" | |
| version = "0.5.9" | |
| [[deps.Markdown]] | |
| deps = ["Base64"] | |
| uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" | |
| [[deps.MbedTLS_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" | |
| [[deps.MozillaCACerts_jll]] | |
| uuid = "14a3606d-f60d-562e-9121-12d972cd8159" | |
| [[deps.NaNMath]] | |
| deps = ["OpenLibm_jll"] | |
| git-tree-sha1 = "a7c3d1da1189a1c2fe843a3bfa04d18d20eb3211" | |
| uuid = "77ba4419-2d1f-58cd-9bb1-8ffee604a2e3" | |
| version = "1.0.1" | |
| [[deps.NetworkOptions]] | |
| uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" | |
| [[deps.ObjectFile]] | |
| deps = ["Reexport", "StructIO"] | |
| git-tree-sha1 = "55ce61d43409b1fb0279d1781bf3b0f22c83ab3b" | |
| uuid = "d8793406-e978-5875-9003-1fc021f44a92" | |
| version = "0.3.7" | |
| [[deps.OpenBLAS_jll]] | |
| deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] | |
| uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" | |
| [[deps.OpenLibm_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "05823500-19ac-5b8b-9628-191a04bc5112" | |
| [[deps.OpenSpecFun_jll]] | |
| deps = ["Artifacts", "CompilerSupportLibraries_jll", "JLLWrappers", "Libdl", "Pkg"] | |
| git-tree-sha1 = "13652491f6856acfd2db29360e1bbcd4565d04f1" | |
| uuid = "efe28fd5-8261-553b-a9e1-b2916fc3738e" | |
| version = "0.5.5+0" | |
| [[deps.Pkg]] | |
| deps = ["Artifacts", "Dates", "Downloads", "LibGit2", "Libdl", "Logging", "Markdown", "Printf", "REPL", "Random", "SHA", "Serialization", "TOML", "Tar", "UUIDs", "p7zip_jll"] | |
| uuid = "44cfe95a-1eb2-52ea-b672-e2afdf69b78f" | |
| [[deps.Preferences]] | |
| deps = ["TOML"] | |
| git-tree-sha1 = "47e5f437cc0e7ef2ce8406ce1e7e24d44915f88d" | |
| uuid = "21216c6a-2e73-6563-6e65-726566657250" | |
| version = "1.3.0" | |
| [[deps.Printf]] | |
| deps = ["Unicode"] | |
| uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" | |
| [[deps.REPL]] | |
| deps = ["InteractiveUtils", "Markdown", "Sockets", "Unicode"] | |
| uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" | |
| [[deps.Random]] | |
| deps = ["SHA", "Serialization"] | |
| uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" | |
| [[deps.Random123]] | |
| deps = ["Random", "RandomNumbers"] | |
| git-tree-sha1 = "7a1a306b72cfa60634f03a911405f4e64d1b718b" | |
| uuid = "74087812-796a-5b5d-8853-05524746bad3" | |
| version = "1.6.0" | |
| [[deps.RandomNumbers]] | |
| deps = ["Random", "Requires"] | |
| git-tree-sha1 = "043da614cc7e95c703498a491e2c21f58a2b8111" | |
| uuid = "e6cf234a-135c-5ec9-84dd-332b85af5143" | |
| version = "1.5.3" | |
| [[deps.Reexport]] | |
| git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" | |
| uuid = "189a3867-3050-52da-a836-e630ba90ab69" | |
| version = "1.2.2" | |
| [[deps.Requires]] | |
| deps = ["UUIDs"] | |
| git-tree-sha1 = "838a3a4188e2ded87a4f9f184b4b0d78a1e91cb7" | |
| uuid = "ae029012-a4dd-5104-9daa-d747884805df" | |
| version = "1.3.0" | |
| [[deps.SHA]] | |
| uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" | |
| [[deps.Serialization]] | |
| uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" | |
| [[deps.Sockets]] | |
| uuid = "6462fe0b-24de-5631-8697-dd941f90decc" | |
| [[deps.SparseArrays]] | |
| deps = ["LinearAlgebra", "Random"] | |
| uuid = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" | |
| [[deps.SpecialFunctions]] | |
| deps = ["ChainRulesCore", "IrrationalConstants", "LogExpFunctions", "OpenLibm_jll", "OpenSpecFun_jll"] | |
| git-tree-sha1 = "d75bda01f8c31ebb72df80a46c88b25d1c79c56d" | |
| uuid = "276daf66-3868-5448-9aa4-cd146d93841b" | |
| version = "2.1.7" | |
| [[deps.StaticArrays]] | |
| deps = ["LinearAlgebra", "Random", "StaticArraysCore", "Statistics"] | |
| git-tree-sha1 = "23368a3313d12a2326ad0035f0db0c0966f438ef" | |
| uuid = "90137ffa-7385-5640-81b9-e52037218182" | |
| version = "1.5.2" | |
| [[deps.StaticArraysCore]] | |
| git-tree-sha1 = "66fe9eb253f910fe8cf161953880cfdaef01cdf0" | |
| uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" | |
| version = "1.0.1" | |
| [[deps.Statistics]] | |
| deps = ["LinearAlgebra", "SparseArrays"] | |
| uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" | |
| [[deps.StructIO]] | |
| deps = ["Test"] | |
| git-tree-sha1 = "010dc73c7146869c042b49adcdb6bf528c12e859" | |
| uuid = "53d494c1-5632-5724-8f4c-31dff12d585f" | |
| version = "0.3.0" | |
| [[deps.TOML]] | |
| deps = ["Dates"] | |
| uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" | |
| [[deps.Tar]] | |
| deps = ["ArgTools", "SHA"] | |
| uuid = "a4e569a6-e804-4fa4-b0f3-eef7a1d5b13e" | |
| [[deps.Test]] | |
| deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] | |
| uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | |
| [[deps.TimerOutputs]] | |
| deps = ["ExprTools", "Printf"] | |
| git-tree-sha1 = "464d64b2510a25e6efe410e7edab14fffdc333df" | |
| uuid = "a759f4b9-e2f1-59dc-863e-4aeb61b1ea8f" | |
| version = "0.5.20" | |
| [[deps.Tullio]] | |
| deps = ["ChainRulesCore", "DiffRules", "LinearAlgebra", "Requires"] | |
| git-tree-sha1 = "859e2e9a7222553a0c052e423557cedb49376da9" | |
| uuid = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" | |
| version = "0.3.4" | |
| [[deps.UUIDs]] | |
| deps = ["Random", "SHA"] | |
| uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" | |
| [[deps.Unicode]] | |
| uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" | |
| [[deps.UnsafeAtomics]] | |
| git-tree-sha1 = "6331ac3440856ea1988316b46045303bef658278" | |
| uuid = "013be700-e6cd-48c3-b4a1-df204f14c38f" | |
| version = "0.2.1" | |
| [[deps.UnsafeAtomicsLLVM]] | |
| deps = ["LLVM", "UnsafeAtomics"] | |
| git-tree-sha1 = "33af9d2031d0dc09e2be9a0d4beefec4466def8e" | |
| uuid = "d80eeb9a-aca5-4d75-85e5-170c8b632249" | |
| version = "0.1.0" | |
| [[deps.Zlib_jll]] | |
| deps = ["Libdl"] | |
| uuid = "83775a58-1f1d-513f-b197-d71354ab007a" | |
| [[deps.libblastrampoline_jll]] | |
| deps = ["Artifacts", "Libdl", "OpenBLAS_jll"] | |
| uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" | |
| [[deps.nghttp2_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "8e850ede-7688-5339-a07c-302acd2aaf8d" | |
| [[deps.p7zip_jll]] | |
| deps = ["Artifacts", "Libdl"] | |
| uuid = "3f19e933-33d8-53b3-aaab-bd5110c3b7a0" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| device = CuDevice(0) | |
| cpu | |
| 0.177147 seconds (1.03 M allocations: 55.361 MiB, 4.22% gc time, 99.71% compilation time) | |
| 0.000385 seconds (15 allocations: 816 bytes) | |
| 10.042157 seconds (24.48 M allocations: 1.296 GiB, 3.90% gc time, 69.11% compilation time) | |
| 0.000470 seconds (141 allocations: 2.844 KiB) | |
| gpu | |
| 10.081786 seconds (21.57 M allocations: 1.137 GiB, 4.98% gc time, 31.50% compilation time) | |
| 0.000098 seconds (62 allocations: 2.859 KiB) | |
| ERROR: LoadError: Enzyme compilation failed. | |
| Current scope: | |
| ; Function Attrs: willreturn mustprogress | |
| define void @preprocess_julia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !474 { | |
| entry: | |
| %4 = alloca [2 x i64], align 8 | |
| %5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !475 | |
| %6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !475 | |
| %.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !475 | |
| %.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !475 | |
| store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !475 | |
| %.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !475 | |
| %.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !475 | |
| store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !475 | |
| %.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !475 | |
| %.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !475 | |
| store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !475 | |
| %.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !475 | |
| %.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !475 | |
| store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !475 | |
| %.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !475 | |
| %.fca.1.extract32 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 1, !dbg !475 | |
| %.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !475 | |
| %.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !475 | |
| %.fca.3.extract38 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 3, !dbg !475 | |
| %.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !475 | |
| %.fca.1.extract14 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 1, !dbg !475 | |
| %.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !475 | |
| %.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !475 | |
| %.fca.3.extract20 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 3, !dbg !475 | |
| %.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !475 | |
| %.fca.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 1, !dbg !475 | |
| %.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !475 | |
| %.fca.2.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 1, !dbg !475 | |
| %.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !475 | |
| %7 = bitcast [2 x i64]* %4 to i8* | |
| call void @llvm.lifetime.start.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14 | |
| %8 = call {}*** @julia.get_pgcstack() #14 | |
| %9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !476, !range !65 | |
| %10 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !485 | |
| %11 = zext i32 %9 to i64, !dbg !499 | |
| %12 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !501 | |
| br i1 %10, label %pass.i, label %fail.i, !dbg !501 | |
| L302.i: ; preds = %pass.i | |
| call fastcc void @julia___index_Global_NTuple_5574([2 x i64]* noalias nocapture noundef nonnull writeonly sret([2 x i64]) align 8 dereferenceable(16) %4, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(32) %6) #12, !dbg !504 | |
| %13 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !506 | |
| %14 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !509 | |
| %.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !510 | |
| %15 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !510 | |
| br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !514 | |
| L319.i.preheader: ; preds = %L302.i | |
| %16 = load i64, i64* %13, align 8, !tbaa !301 | |
| %17 = icmp sgt i64 %.fca.2.1.extract36, 0 | |
| %18 = select i1 %17, i64 %.fca.2.1.extract36, i64 0 | |
| %19 = icmp sgt i64 %16, 0 | |
| %20 = icmp sle i64 %16, %18 | |
| %21 = and i1 %19, %20 | |
| %22 = add i64 %16, -1 | |
| %23 = mul i64 %22, %15 | |
| %24 = load i64, i64* %14, align 8 | |
| %25 = icmp sgt i64 %.fca.2.0.extract16, 0 | |
| %26 = select i1 %25, i64 %.fca.2.0.extract16, i64 0 | |
| %27 = icmp sgt i64 %.fca.2.1.extract18, 0 | |
| %28 = select i1 %27, i64 %.fca.2.1.extract18, i64 0 | |
| %29 = icmp sgt i64 %24, 0 | |
| %30 = icmp sle i64 %24, %28 | |
| %31 = and i1 %29, %30 | |
| %32 = add i64 %24, -1 | |
| %33 = mul i64 %32, %26 | |
| br label %L319.i, !dbg !515 | |
| L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader | |
| %iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ] | |
| %value_phi13.i = phi float [ %284, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ] | |
| %iv.next = add nuw nsw i64 %iv, 1, !dbg !519 | |
| %34 = icmp ule i64 %iv.next, %15, !dbg !519 | |
| %35 = and i1 %34, %21, !dbg !524 | |
| br i1 %35, label %L340.i, label %L342.i, !dbg !515 | |
| L340.i: ; preds = %L319.i | |
| %36 = add i64 %23, %iv.next, !dbg !525 | |
| %37 = shl i64 %36, 2, !dbg !532 | |
| %38 = add i64 %37, -4, !dbg !532 | |
| %39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !537 | |
| %40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !538 | |
| %41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !538 | |
| %42 = icmp ule i64 %iv.next, %26, !dbg !519 | |
| %43 = and i1 %42, %31, !dbg !524 | |
| br i1 %43, label %L405.i, label %L407.i, !dbg !515 | |
| L342.i: ; preds = %L319.i | |
| %44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515 | |
| unreachable, !dbg !515 | |
| L405.i: ; preds = %L340.i | |
| %45 = add i64 %33, %iv.next, !dbg !525 | |
| %46 = shl i64 %45, 2, !dbg !532 | |
| %47 = add i64 %46, -4, !dbg !532 | |
| %48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !537 | |
| %49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !538 | |
| %50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !538 | |
| %51 = fsub float %41, %50, !dbg !543 | |
| %52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not59 = icmp eq i32 %52, 0, !dbg !544 | |
| br i1 %.not59, label %__nv_fabsf.exit.i.i, label %54, !dbg !544 | |
| L407.i: ; preds = %L340.i | |
| %53 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515 | |
| unreachable, !dbg !515 | |
| 54: ; preds = %L405.i | |
| %55 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i: ; preds = %54, %L405.i | |
| %.08.i.i = phi float [ %55, %54 ], [ 0.000000e+00, %L405.i ], !dbg !544 | |
| %56 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !544 | |
| %57 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not60 = icmp eq i32 %57, 0, !dbg !544 | |
| br i1 %.not60, label %60, label %58, !dbg !544 | |
| 58: ; preds = %__nv_fabsf.exit.i.i | |
| %59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !544 | |
| 60: ; preds = %__nv_fabsf.exit.i.i | |
| %61 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !544 | |
| __nv_fabsf.exit1.i.i: ; preds = %60, %58 | |
| %.09.i.i = phi float [ %59, %58 ], [ %61, %60 ], !dbg !544 | |
| %62 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not61 = icmp eq i32 %62, 0, !dbg !544 | |
| %63 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !544 | |
| %64 = fmul float %.09.i.i, 0x4170000000000000, !dbg !544 | |
| %65 = and i1 %63, %.not61, !dbg !544 | |
| %.116.i.i = select i1 %65, float %64, float %.09.i.i, !dbg !544 | |
| %expo.i.i.1.i.i = select i1 %65, float -1.510000e+02, float -1.270000e+02, !dbg !544 | |
| %66 = bitcast float %.116.i.i to i32, !dbg !544 | |
| %67 = and i32 %66, 8388607, !dbg !544 | |
| %68 = or i32 %67, 1065353216, !dbg !544 | |
| %69 = bitcast i32 %68 to float, !dbg !544 | |
| %70 = lshr i32 %66, 23, !dbg !544 | |
| %71 = uitofp i32 %70 to float, !dbg !544 | |
| %72 = fadd float %expo.i.i.1.i.i, %71, !dbg !544 | |
| %73 = fcmp ogt float %69, 0x3FF6A09E60000000, !dbg !544 | |
| %74 = fmul float %69, 5.000000e-01, !dbg !544 | |
| %75 = fadd float %72, 1.000000e+00, !dbg !544 | |
| %expo.i.i.2.i.i = select i1 %73, float %75, float %72, !dbg !544 | |
| %m.i.i.0.i.i = select i1 %73, float %74, float %69, !dbg !544 | |
| %76 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !544 | |
| %77 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !544 | |
| %78 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %77) #17, !dbg !544, !srcloc !362 | |
| %79 = fmul float %76, 2.000000e+00, !dbg !544 | |
| %80 = fmul float %78, %79, !dbg !544 | |
| %81 = fmul float %80, %80, !dbg !544 | |
| %82 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not62 = icmp eq i32 %82, 0, !dbg !544 | |
| br i1 %.not62, label %85, label %83, !dbg !544 | |
| 83: ; preds = %__nv_fabsf.exit1.i.i | |
| %84 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %81, float noundef 0x3F8995EC60000000) #16, !dbg !544 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !544 | |
| 85: ; preds = %__nv_fabsf.exit1.i.i | |
| %86 = call float @llvm.fma.f32(float %81, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !544 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit.i.i.i.i: ; preds = %85, %83 | |
| %.020.i.i = phi float [ %84, %83 ], [ %86, %85 ], !dbg !544 | |
| %87 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not63 = icmp eq i32 %87, 0, !dbg !544 | |
| br i1 %.not63, label %90, label %88, !dbg !544 | |
| 88: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %89 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #16, !dbg !544 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !544 | |
| 90: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %91 = call float @llvm.fma.f32(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #14, !dbg !544 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit3.i.i.i.i: ; preds = %90, %88 | |
| %.021.i.i = phi float [ %89, %88 ], [ %91, %90 ], !dbg !544 | |
| %92 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not64 = icmp eq i32 %92, 0, !dbg !544 | |
| br i1 %.not64, label %95, label %93, !dbg !544 | |
| 93: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %94 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %81) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544 | |
| 95: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %96 = fmul float %81, %.021.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit4.i.i.i.i: ; preds = %95, %93 | |
| %.022.i.i = phi float [ %94, %93 ], [ %96, %95 ], !dbg !544 | |
| %97 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not65 = icmp eq i32 %97, 0, !dbg !544 | |
| br i1 %.not65, label %100, label %98, !dbg !544 | |
| 98: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %99 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %80) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544 | |
| 100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %101 = fmul float %80, %.022.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit5.i.i.i.i: ; preds = %100, %98 | |
| %.024.i.i = phi float [ %99, %98 ], [ %101, %100 ], !dbg !544 | |
| %102 = fsub float %76, %80, !dbg !544 | |
| %103 = fmul float %102, 2.000000e+00, !dbg !544 | |
| %104 = fneg float %80, !dbg !544 | |
| %105 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not66 = icmp eq i32 %105, 0, !dbg !544 | |
| br i1 %.not66, label %108, label %106, !dbg !544 | |
| 106: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %107 = call float @llvm.nvvm.fma.rn.ftz.f(float %104, float %76, float %103) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544 | |
| 108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %109 = call float @llvm.fma.f32(float %104, float %76, float %103) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit.i.i.i.i: ; preds = %108, %106 | |
| %.025.i.i = phi float [ %107, %106 ], [ %109, %108 ], !dbg !544 | |
| %110 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not67 = icmp eq i32 %110, 0, !dbg !544 | |
| br i1 %.not67, label %113, label %111, !dbg !544 | |
| 111: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %112 = call float @llvm.nvvm.mul.rn.ftz.f(float %78, float %.025.i.i) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544 | |
| 113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %114 = fmul float %78, %.025.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit6.i.i.i.i: ; preds = %113, %111 | |
| %.026.i.i = phi float [ %112, %111 ], [ %114, %113 ], !dbg !544 | |
| %115 = fadd float %80, %.024.i.i, !dbg !544 | |
| %116 = fsub float %80, %115, !dbg !544 | |
| %117 = fadd float %.024.i.i, %116, !dbg !544 | |
| %118 = fadd float %117, %.026.i.i, !dbg !544 | |
| %119 = fadd float %115, %118, !dbg !544 | |
| %120 = fsub float %115, %119, !dbg !544 | |
| %121 = fadd float %118, %120, !dbg !544 | |
| %122 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not68 = icmp eq i32 %122, 0, !dbg !544 | |
| br i1 %.not68, label %125, label %123, !dbg !544 | |
| 123: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %124 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544 | |
| 125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %126 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !544 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit2.i.i.i.i: ; preds = %125, %123 | |
| %.019.i.i = phi float [ %124, %123 ], [ %126, %125 ], !dbg !544 | |
| %127 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not69 = icmp eq i32 %127, 0, !dbg !544 | |
| br i1 %.not69, label %130, label %128, !dbg !544 | |
| 128: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %129 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !544 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !544 | |
| 130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %131 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !544 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !544 | |
| __internal_log_ep.exit.i.i.i: ; preds = %130, %128 | |
| %.018.i.i = phi float [ %129, %128 ], [ %131, %130 ], !dbg !544 | |
| %132 = fadd float %119, %.019.i.i, !dbg !544 | |
| %133 = fsub float %.019.i.i, %132, !dbg !544 | |
| %134 = fadd float %119, %133, !dbg !544 | |
| %135 = fadd float %121, %134, !dbg !544 | |
| %136 = fadd float %135, %.018.i.i, !dbg !544 | |
| %137 = fadd float %132, %136, !dbg !544 | |
| %138 = fsub float %132, %137, !dbg !544 | |
| %139 = fadd float %136, %138, !dbg !544 | |
| %140 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not70 = icmp eq i32 %140, 0, !dbg !544 | |
| br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %141, !dbg !544 | |
| 141: ; preds = %__internal_log_ep.exit.i.i.i | |
| %142 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i6.i.i, !dbg !544 | |
| __nv_fabsf.exit.i6.i.i: ; preds = %141, %__internal_log_ep.exit.i.i.i | |
| %.027.i.i = phi float [ %142, %141 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !544 | |
| %143 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !544 | |
| %.013.i.i = select i1 %143, float 0x3F40000000000000, float 4.000000e+00, !dbg !544 | |
| %144 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not71 = icmp eq i32 %144, 0, !dbg !544 | |
| br i1 %.not71, label %147, label %145, !dbg !544 | |
| 145: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %146 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %137) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544 | |
| 147: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %148 = fmul float %137, %.013.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit.i.i.i.i: ; preds = %147, %145 | |
| %.028.i.i = phi float [ %146, %145 ], [ %148, %147 ], !dbg !544 | |
| %149 = fneg float %.028.i.i, !dbg !544 | |
| %150 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not72 = icmp eq i32 %150, 0, !dbg !544 | |
| br i1 %.not72, label %153, label %151, !dbg !544 | |
| 151: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %152 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %137, float %149) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544 | |
| 153: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %154 = call float @llvm.fma.f32(float noundef %.013.i.i, float %137, float %149) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit.i3.i.i.i: ; preds = %153, %151 | |
| %.029.i.i = phi float [ %152, %151 ], [ %154, %153 ], !dbg !544 | |
| %155 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not73 = icmp eq i32 %155, 0, !dbg !544 | |
| br i1 %.not73, label %158, label %156, !dbg !544 | |
| 156: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %157 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %.029.i.i) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544 | |
| 158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %159 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %.029.i.i) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit1.i.i.i.i: ; preds = %158, %156 | |
| %.030.i.i = phi float [ %157, %156 ], [ %159, %158 ], !dbg !544 | |
| %160 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not74 = icmp eq i32 %160, 0, !dbg !544 | |
| br i1 %.not74, label %163, label %161, !dbg !544 | |
| 161: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %162 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %137, float %.030.i.i) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544 | |
| 163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %164 = call float @llvm.fma.f32(float %137, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit2.i.i.i.i: ; preds = %163, %161 | |
| %.031.i.i = phi float [ %162, %161 ], [ %164, %163 ], !dbg !544 | |
| %165 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not75 = icmp eq i32 %165, 0, !dbg !544 | |
| br i1 %.not75, label %168, label %166, !dbg !544 | |
| 166: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %167 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !544 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544 | |
| 168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %169 = fadd float %.028.i.i, %.031.i.i, !dbg !544 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fadd_rn.exit.i.i.i.i: ; preds = %168, %166 | |
| %.032.i.i = phi float [ %167, %166 ], [ %169, %168 ], !dbg !544 | |
| %170 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not76 = icmp eq i32 %170, 0, !dbg !544 | |
| br i1 %.not76, label %174, label %171, !dbg !544 | |
| 171: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %172 = fneg float %.032.i.i, !dbg !544 | |
| %173 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %172) #16, !dbg !544 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544 | |
| 174: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %175 = fsub float %.028.i.i, %.032.i.i, !dbg !544 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544 | |
| __nv_fadd_rn.exit3.i.i.i.i: ; preds = %174, %171 | |
| %.033.i.i = phi float [ %173, %171 ], [ %175, %174 ], !dbg !544 | |
| %176 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not77 = icmp eq i32 %176, 0, !dbg !544 | |
| br i1 %.not77, label %179, label %177, !dbg !544 | |
| 177: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %178 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !544 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !544 | |
| 179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %180 = fadd float %.031.i.i, %.033.i.i, !dbg !544 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !544 | |
| __internal_dsmul.exit.i.i.i: ; preds = %179, %177 | |
| %.034.i.i = phi float [ %178, %177 ], [ %180, %179 ], !dbg !544 | |
| %181 = bitcast float %.032.i.i to i32, !dbg !544 | |
| %182 = icmp eq i32 %181, 1118925336, !dbg !544 | |
| %183 = add i32 %181, -1, !dbg !544 | |
| %184 = bitcast i32 %183 to float, !dbg !544 | |
| %185 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !544 | |
| %prod.i.044.0.i.i = select i1 %182, float %185, float %.034.i.i, !dbg !544 | |
| %prod.i.145.0.i.i = select i1 %182, float %184, float %.032.i.i, !dbg !544 | |
| %186 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not78 = icmp eq i32 %186, 0, !dbg !544 | |
| br i1 %.not78, label %189, label %187, !dbg !544 | |
| 187: ; preds = %__internal_dsmul.exit.i.i.i | |
| %188 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544 | |
| 189: ; preds = %__internal_dsmul.exit.i.i.i | |
| %190 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit.i10.i.i.i: ; preds = %189, %187 | |
| %.017.i.i = phi float [ %188, %187 ], [ %190, %189 ], !dbg !544 | |
| %191 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !544 | |
| %192 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not79 = icmp eq i32 %192, 0, !dbg !544 | |
| br i1 %.not79, label %195, label %193, !dbg !544 | |
| 193: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %194 = call float @llvm.nvvm.fabs.ftz.f(float %191) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !544 | |
| 195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %196 = call float @llvm.fabs.f32(float %191) #14, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i.i.i: ; preds = %195, %193 | |
| %.023.i.i = phi float [ %194, %193 ], [ %196, %195 ], !dbg !544 | |
| %197 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !544 | |
| %198 = bitcast float %191 to i32, !dbg !544 | |
| %199 = and i32 %198, -2147483648, !dbg !544 | |
| %200 = or i32 %199, 1123811328, !dbg !544 | |
| %201 = bitcast i32 %200 to float, !dbg !544 | |
| %j.i.i.0.i.i = select i1 %197, float %201, float %191, !dbg !544 | |
| %202 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not80 = icmp eq i32 %202, 0, !dbg !544 | |
| br i1 %.not80, label %205, label %203, !dbg !544 | |
| 203: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %204 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !544 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !544 | |
| 205: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %206 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !544 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit4.i.i.i.i: ; preds = %205, %203 | |
| %.035.i.i = phi float [ %204, %203 ], [ %206, %205 ], !dbg !544 | |
| %207 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not81 = icmp eq i32 %207, 0, !dbg !544 | |
| br i1 %.not81, label %210, label %208, !dbg !544 | |
| 208: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %209 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !544 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !544 | |
| 210: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %211 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !544 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !544 | |
| __internal_expf_kernel.exit.i.i.i: ; preds = %210, %208 | |
| %.036.i.i = phi float [ %209, %208 ], [ %211, %210 ], !dbg !544 | |
| %212 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !544 | |
| %213 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !544 | |
| %214 = bitcast float %213 to i32, !dbg !544 | |
| %215 = shl i32 %214, 23, !dbg !544 | |
| %216 = bitcast i32 %215 to float, !dbg !544 | |
| %217 = call float @llvm.nvvm.ex2.approx.ftz.f(float %212) #16, !dbg !544 | |
| %218 = fmul float %217, %216, !dbg !544 | |
| %219 = fcmp une float %218, 0x7FF0000000000000, !dbg !544 | |
| br i1 %219, label %220, label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| 220: ; preds = %__internal_expf_kernel.exit.i.i.i | |
| %221 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not94 = icmp eq i32 %221, 0, !dbg !544 | |
| br i1 %.not94, label %224, label %222, !dbg !544 | |
| 222: ; preds = %220 | |
| %223 = call float @llvm.nvvm.fma.rn.ftz.f(float %218, float %prod.i.044.0.i.i, float %218) #16, !dbg !544 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| 224: ; preds = %220 | |
| %225 = call float @llvm.fma.f32(float %218, float %prod.i.044.0.i.i, float %218) #14, !dbg !544 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| __internal_accurate_powf.exit.i.i: ; preds = %224, %222, %__internal_expf_kernel.exit.i.i.i | |
| %t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %223, %222 ], [ %225, %224 ], !dbg !544 | |
| %226 = fcmp olt float %51, 0.000000e+00, !dbg !544 | |
| %227 = and i1 %226, %56, !dbg !544 | |
| %228 = bitcast float %t.i.0.i.i to i32, !dbg !544 | |
| %229 = xor i32 %228, -2147483648, !dbg !544 | |
| %230 = bitcast i32 %229 to float, !dbg !544 | |
| %.010.i.i = select i1 %227, float %230, float %t.i.0.i.i, !dbg !544 | |
| %231 = fcmp oeq float %51, 0.000000e+00, !dbg !544 | |
| %232 = fadd float %51, %51, !dbg !544 | |
| %233 = select i1 %56, float %232, float 0.000000e+00, !dbg !544 | |
| %.212.i.i = select i1 %231, float %233, float %.010.i.i, !dbg !544 | |
| %234 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not = icmp eq i32 %234, 0, !dbg !544 | |
| br i1 %.not, label %237, label %235, !dbg !544 | |
| 235: ; preds = %__internal_accurate_powf.exit.i.i | |
| %236 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !544 | |
| 237: ; preds = %__internal_accurate_powf.exit.i.i | |
| %238 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i.i: ; preds = %237, %235 | |
| %.01.i.i = phi float [ %236, %235 ], [ %238, %237 ], !dbg !544 | |
| %239 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not82 = icmp eq i32 %239, 0, !dbg !544 | |
| br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %240, !dbg !544 | |
| 240: ; preds = %__nv_fabsf.exit.i.i.i | |
| %241 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i.i, !dbg !544 | |
| __nv_fabsf.exit1.i.i.i: ; preds = %240, %__nv_fabsf.exit.i.i.i | |
| %.02.i.i = phi float [ %241, %240 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !544 | |
| %242 = fadd float %.01.i.i, %.02.i.i, !dbg !544 | |
| %243 = bitcast float %242 to i32, !dbg !544 | |
| %244 = icmp sgt i32 %243, 2139095039, !dbg !544 | |
| br i1 %244, label %245, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 245: ; preds = %__nv_fabsf.exit1.i.i.i | |
| %246 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not88 = icmp eq i32 %246, 0, !dbg !544 | |
| br i1 %.not88, label %249, label %247, !dbg !544 | |
| 247: ; preds = %245 | |
| %248 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !544 | |
| 249: ; preds = %245 | |
| %250 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !544 | |
| __nv_isnanf.exit.i.i.i: ; preds = %249, %247 | |
| %.06.i.i = phi float [ %248, %247 ], [ %250, %249 ], !dbg !544 | |
| %251 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %251, label %257, label %252, !dbg !544 | |
| 252: ; preds = %__nv_isnanf.exit.i.i.i | |
| %253 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not89 = icmp eq i32 %253, 0, !dbg !544 | |
| br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %254, !dbg !544 | |
| 254: ; preds = %252 | |
| %255 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_isnanf.exit11.i.i.i, !dbg !544 | |
| __nv_isnanf.exit11.i.i.i: ; preds = %254, %252 | |
| %.07.i.i = phi float [ %255, %254 ], [ 4.000000e+00, %252 ], !dbg !544 | |
| %256 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %256, label %257, label %259, !dbg !544 | |
| 257: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i | |
| %258 = fadd float %51, 4.000000e+00, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 259: ; preds = %__nv_isnanf.exit11.i.i.i | |
| %260 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not90 = icmp eq i32 %260, 0, !dbg !544 | |
| br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %261, !dbg !544 | |
| 261: ; preds = %259 | |
| %262 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_isinff.exit8.i.i.i, !dbg !544 | |
| __nv_isinff.exit8.i.i.i: ; preds = %261, %259 | |
| %.05.i.i = phi float [ %262, %261 ], [ 4.000000e+00, %259 ], !dbg !544 | |
| %263 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %263, label %264, label %274, !dbg !544 | |
| 264: ; preds = %__nv_isinff.exit8.i.i.i | |
| %265 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not93 = icmp eq i32 %265, 0, !dbg !544 | |
| br i1 %.not93, label %268, label %266, !dbg !544 | |
| 266: ; preds = %264 | |
| %267 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !544 | |
| 268: ; preds = %264 | |
| %269 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !544 | |
| __nv_fabsf.exit5.i.i.i: ; preds = %268, %266 | |
| %.04.i.i = phi float [ %267, %266 ], [ %269, %268 ], !dbg !544 | |
| %270 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !544 | |
| %271 = fcmp oeq float %51, -1.000000e+00, !dbg !544 | |
| %272 = select i1 %270, float 0x7FF0000000000000, float 0.000000e+00, !dbg !544 | |
| %273 = select i1 %271, float 1.000000e+00, float %272, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 274: ; preds = %__nv_isinff.exit8.i.i.i | |
| %275 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not91 = icmp eq i32 %275, 0, !dbg !544 | |
| br i1 %.not91, label %278, label %276, !dbg !544 | |
| 276: ; preds = %274 | |
| %277 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !544 | |
| 278: ; preds = %274 | |
| %279 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !544 | |
| __nv_isinff.exit.i.i.i: ; preds = %278, %276 | |
| %.03.i.i = phi float [ %277, %276 ], [ %279, %278 ], !dbg !544 | |
| %280 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %280, label %281, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 281: ; preds = %__nv_isinff.exit.i.i.i | |
| %282 = select i1 %227, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| __internal_powf_infinite_cases.exit.i.i: ; preds = %281, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %257, %__nv_fabsf.exit1.i.i.i | |
| %.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %258, %257 ], [ %273, %__nv_fabsf.exit5.i.i.i ], [ %282, %281 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !544 | |
| %283 = fcmp oeq float %51, 1.000000e+00, !dbg !544 | |
| %t.0.i.i = select i1 %283, float 1.000000e+00, float %.3.i.i, !dbg !544 | |
| %284 = fadd float %value_phi13.i, %t.0.i.i, !dbg !547 | |
| %.not83 = icmp eq i64 %iv.next, %15, !dbg !548 | |
| %285 = add nuw i64 %iv.next, 1, !dbg !550 | |
| br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !551 | |
| L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i | |
| br label %L467.i, !dbg !552 | |
| L467.i: ; preds = %L467.i.loopexit, %L302.i | |
| %value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %284, %L467.i.loopexit ] | |
| %286 = fpext float %value_phi17.i to double, !dbg !552 | |
| %287 = call i32 @llvm.nvvm.d2i.hi(double %286) #16, !dbg !558 | |
| %288 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !558 | |
| %289 = and i32 %288, 2146435072, !dbg !558 | |
| %290 = icmp eq i32 %289, 1072693248, !dbg !558 | |
| %291 = call double @llvm.fabs.f64(double %286) #14, !dbg !558 | |
| %292 = call fastcc double @__internal_accurate_pow(double %291) #16, !dbg !558 | |
| %293 = icmp slt i32 %287, 0, !dbg !558 | |
| %294 = and i1 %293, %290, !dbg !558 | |
| br i1 %294, label %295, label %300, !dbg !558 | |
| 295: ; preds = %L467.i | |
| %296 = call i32 @llvm.nvvm.d2i.hi(double %292) #16, !dbg !558 | |
| %297 = call i32 @llvm.nvvm.d2i.lo(double %292) #16, !dbg !558 | |
| %298 = xor i32 %296, -2147483648, !dbg !558 | |
| %299 = call double @llvm.nvvm.lohi.i2d(i32 %297, i32 %298) #16, !dbg !558 | |
| br label %300, !dbg !558 | |
| 300: ; preds = %295, %L467.i | |
| %t.0.i37.i = phi double [ %299, %295 ], [ %292, %L467.i ], !dbg !558 | |
| %301 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !558 | |
| br i1 %301, label %302, label %306, !dbg !558 | |
| 302: ; preds = %300 | |
| %spec.select = select i1 %290, i32 %287, i32 0, !dbg !558 | |
| %303 = icmp slt i32 %288, 0, !dbg !558 | |
| %304 = or i32 %spec.select, 2146435072, !dbg !558 | |
| %thi.1.i.i = select i1 %303, i32 %304, i32 %spec.select, !dbg !558 | |
| %305 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !558 | |
| br label %307, !dbg !558 | |
| 306: ; preds = %300 | |
| %spec.select97 = select i1 %293, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !558 | |
| br label %307, !dbg !558 | |
| 307: ; preds = %306, %302 | |
| %t.2.i.i = phi double [ %305, %302 ], [ %spec.select97, %306 ], !dbg !558 | |
| %308 = fadd double %286, 2.500000e-01, !dbg !558 | |
| %309 = call i32 @llvm.nvvm.d2i.hi(double %308) #16, !dbg !558 | |
| %310 = and i32 %309, 2146435072, !dbg !558 | |
| %311 = icmp eq i32 %310, 2146435072, !dbg !558 | |
| br i1 %311, label %312, label %__nv_pow.exit.i, !dbg !558 | |
| 312: ; preds = %307 | |
| %313 = fcmp ugt double %291, 0x7FF0000000000000, !dbg !558 | |
| br i1 %313, label %__nv_pow.exit.i, label %314, !dbg !558 | |
| 314: ; preds = %312 | |
| %315 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %316 = icmp eq i32 %315, 200, !dbg !558 | |
| br i1 %316, label %.critedge, label %317, !dbg !558 | |
| 317: ; preds = %314 | |
| %318 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %319 = icmp eq i32 %318, 350, !dbg !558 | |
| br i1 %319, label %.critedge, label %320, !dbg !558 | |
| 320: ; preds = %317 | |
| %321 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %322 = icmp eq i32 %321, 370, !dbg !558 | |
| br i1 %322, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !558 | |
| __nv_isinfd.exit4.i.i: ; preds = %320 | |
| %323 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !558 | |
| %324 = and i32 %288, 2147483647, !dbg !558 | |
| %325 = icmp eq i32 %324, 2146435072, !dbg !558 | |
| %326 = icmp eq i32 %323, 0, !dbg !558 | |
| %327 = and i1 %326, %325, !dbg !558 | |
| br i1 %327, label %328, label %.critedge, !dbg !558 | |
| 328: ; preds = %__nv_isinfd.exit4.i.i | |
| %329 = fcmp ogt double %291, 1.000000e+00, !dbg !558 | |
| %thi.2.i.i = select i1 %329, i32 2146435072, i32 0, !dbg !558 | |
| %330 = icmp slt i32 %288, 0, !dbg !558 | |
| %331 = xor i32 %thi.2.i.i, 2146435072 | |
| %spec.select8 = select i1 %330, i32 %331, i32 %thi.2.i.i, !dbg !558 | |
| %332 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !558 | |
| %thi.4.i.i = select i1 %332, i32 1072693248, i32 %spec.select8, !dbg !558 | |
| %333 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !558 | |
| br label %__nv_pow.exit.i, !dbg !558 | |
| .critedge: ; preds = %__nv_isinfd.exit4.i.i, %320, %317, %314 | |
| %334 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %335 = icmp eq i32 %334, 200, !dbg !558 | |
| br i1 %335, label %342, label %336, !dbg !558 | |
| 336: ; preds = %.critedge | |
| %337 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %338 = icmp eq i32 %337, 350, !dbg !558 | |
| br i1 %338, label %342, label %339, !dbg !558 | |
| 339: ; preds = %336 | |
| %340 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %341 = icmp eq i32 %340, 370, !dbg !558 | |
| br i1 %341, label %342, label %344, !dbg !558 | |
| 342: ; preds = %339, %336, %.critedge | |
| %343 = fcmp oeq double %291, 0x7FF0000000000000, !dbg !558 | |
| br label %__nv_isinfd.exit.i.i, !dbg !558 | |
| 344: ; preds = %339 | |
| %345 = call i32 @llvm.nvvm.d2i.lo(double %286) #16, !dbg !558 | |
| %346 = and i32 %287, 2147483647, !dbg !558 | |
| %347 = icmp eq i32 %346, 2146435072, !dbg !558 | |
| %348 = icmp eq i32 %345, 0, !dbg !558 | |
| %349 = and i1 %348, %347, !dbg !558 | |
| br label %__nv_isinfd.exit.i.i, !dbg !558 | |
| __nv_isinfd.exit.i.i: ; preds = %344, %342 | |
| %.0.i39.i.in = phi i1 [ %343, %342 ], [ %349, %344 ] | |
| br i1 %.0.i39.i.in, label %350, label %__nv_pow.exit.i, !dbg !558 | |
| 350: ; preds = %__nv_isinfd.exit.i.i | |
| %.inv87 = icmp slt i32 %288, 0, !dbg !558 | |
| %spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !558 | |
| %351 = and i32 %288, 2147483647, !dbg !558 | |
| %352 = icmp ne i32 %351, 1071644672, !dbg !558 | |
| %or.cond11 = and i1 %352, %294, !dbg !558 | |
| %353 = or i32 %spec.select9, -2147483648, !dbg !558 | |
| %thi.6.i.i = select i1 %or.cond11, i32 %353, i32 %spec.select9, !dbg !558 | |
| %354 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !558 | |
| br label %__nv_pow.exit.i, !dbg !558 | |
| __nv_pow.exit.i: ; preds = %350, %__nv_isinfd.exit.i.i, %328, %312, %307 | |
| %t.6.i.i = phi double [ %t.2.i.i, %307 ], [ %333, %328 ], [ %354, %350 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %308, %312 ], !dbg !558 | |
| %355 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !559 | |
| %356 = select i1 %355, i64 %.fca.2.0.extract, i64 0, !dbg !559 | |
| %357 = load i64, i64* %13, align 8, !dbg !569, !tbaa !301 | |
| %358 = load i64, i64* %14, align 8, !dbg !573, !tbaa !301 | |
| %359 = add i64 %358, -1, !dbg !573 | |
| %360 = mul i64 %359, %356, !dbg !576 | |
| %361 = add i64 %360, %357, !dbg !577 | |
| %362 = icmp sgt i64 %.fca.3.extract, 0, !dbg !578 | |
| %363 = select i1 %362, i64 %.fca.3.extract, i64 0, !dbg !578 | |
| %364 = icmp slt i64 %361, 1, !dbg !587 | |
| %365 = icmp sgt i64 %361, %363, !dbg !587 | |
| %366 = or i1 %364, %365, !dbg !589 | |
| br i1 %366, label %L493.i, label %L491.i, !dbg !589 | |
| L491.i: ; preds = %__nv_pow.exit.i | |
| %367 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !558 | |
| %368 = fptrunc double %t.6.i.i to float, !dbg !590 | |
| %369 = select i1 %367, float 1.000000e+00, float %368, !dbg !590 | |
| %370 = add nsw i64 %361, -1, !dbg !592 | |
| %371 = getelementptr inbounds float, float addrspace(1)* %12, i64 %370, !dbg !599 | |
| store float %369, float addrspace(1)* %371, align 4, !dbg !599, !tbaa !435 | |
| br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !600 | |
| L493.i: ; preds = %__nv_pow.exit.i | |
| %372 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !589 | |
| unreachable, !dbg !589 | |
| fail.i: ; preds = %entry | |
| call fastcc void @gpu_report_exception() #14, !dbg !501 | |
| call fastcc void @gpu_signal_exception() #14, !dbg !501 | |
| call void asm sideeffect "exit;", ""() #16, !dbg !501 | |
| unreachable, !dbg !501 | |
| pass.i: ; preds = %entry | |
| %373 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !601, !range !127 | |
| %374 = udiv i64 %11, %.fca.1.0.0.0.0.extract, !dbg !501 | |
| %375 = mul i64 %374, %.fca.1.0.0.0.0.extract, !dbg !606 | |
| %376 = sub i64 %11, %375, !dbg !608 | |
| %377 = zext i32 %373 to i64, !dbg !499 | |
| %378 = lshr i64 %377, 8, !dbg !501 | |
| %379 = and i64 %377, 255, !dbg !608 | |
| %380 = add nuw nsw i64 %379, 1, !dbg !609 | |
| %381 = shl i64 %376, 8, !dbg !610 | |
| %382 = add i64 %380, %381, !dbg !614 | |
| %383 = add nuw nsw i64 %374, 1, !dbg !615 | |
| %384 = add nuw nsw i64 %383, %378, !dbg !614 | |
| %385 = icmp sgt i64 %382, 0, !dbg !618 | |
| %386 = icmp sle i64 %382, %.fca.0.0.0.0.extract, !dbg !618 | |
| %387 = and i1 %385, %386, !dbg !623 | |
| %388 = icmp sle i64 %384, %.fca.0.0.1.0.extract, !dbg !618 | |
| %389 = and i1 %388, %387, !dbg !624 | |
| br i1 %389, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !626 | |
| julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i | |
| call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14, !dbg !627 | |
| ret void, !dbg !475 | |
| } | |
| ; Function Attrs: willreturn mustprogress | |
| define void @preprocess_julia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !474 { | |
| entry: | |
| %4 = alloca [2 x i64], align 8 | |
| %5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !475 | |
| %6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !475 | |
| %.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !475 | |
| %.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !475 | |
| store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !475 | |
| %.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !475 | |
| %.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !475 | |
| store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !475 | |
| %.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !475 | |
| %.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !475 | |
| store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !475 | |
| %.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !475 | |
| %.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !475 | |
| store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !475 | |
| %.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !475 | |
| %.fca.1.extract32 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 1, !dbg !475 | |
| %.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !475 | |
| %.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !475 | |
| %.fca.3.extract38 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 3, !dbg !475 | |
| %.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !475 | |
| %.fca.1.extract14 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 1, !dbg !475 | |
| %.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !475 | |
| %.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !475 | |
| %.fca.3.extract20 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 3, !dbg !475 | |
| %.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !475 | |
| %.fca.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 1, !dbg !475 | |
| %.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !475 | |
| %.fca.2.1.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 1, !dbg !475 | |
| %.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !475 | |
| %7 = bitcast [2 x i64]* %4 to i8* | |
| call void @llvm.lifetime.start.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14 | |
| %8 = call {}*** @julia.get_pgcstack() #14 | |
| %9 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !476, !range !65 | |
| %10 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !485 | |
| %11 = zext i32 %9 to i64, !dbg !499 | |
| %12 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !501 | |
| br i1 %10, label %pass.i, label %fail.i, !dbg !501 | |
| L302.i: ; preds = %pass.i | |
| call fastcc void @julia___index_Global_NTuple_5574([2 x i64]* noalias nocapture noundef nonnull writeonly sret([2 x i64]) align 8 dereferenceable(16) %4, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* nocapture noundef nonnull readonly align 8 dereferenceable(32) %6) #12, !dbg !504 | |
| %13 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !506 | |
| %14 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !509 | |
| %.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !510 | |
| %15 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !510 | |
| br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !514 | |
| L319.i.preheader: ; preds = %L302.i | |
| %16 = load i64, i64* %13, align 8, !tbaa !301 | |
| %17 = icmp sgt i64 %.fca.2.1.extract36, 0 | |
| %18 = select i1 %17, i64 %.fca.2.1.extract36, i64 0 | |
| %19 = icmp sgt i64 %16, 0 | |
| %20 = icmp sle i64 %16, %18 | |
| %21 = and i1 %19, %20 | |
| %22 = add i64 %16, -1 | |
| %23 = mul i64 %22, %15 | |
| %24 = load i64, i64* %14, align 8 | |
| %25 = icmp sgt i64 %.fca.2.0.extract16, 0 | |
| %26 = select i1 %25, i64 %.fca.2.0.extract16, i64 0 | |
| %27 = icmp sgt i64 %.fca.2.1.extract18, 0 | |
| %28 = select i1 %27, i64 %.fca.2.1.extract18, i64 0 | |
| %29 = icmp sgt i64 %24, 0 | |
| %30 = icmp sle i64 %24, %28 | |
| %31 = and i1 %29, %30 | |
| %32 = add i64 %24, -1 | |
| %33 = mul i64 %32, %26 | |
| br label %L319.i, !dbg !515 | |
| L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader | |
| %iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ] | |
| %value_phi13.i = phi float [ %284, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ] | |
| %iv.next = add nuw nsw i64 %iv, 1, !dbg !519 | |
| %34 = icmp ule i64 %iv.next, %15, !dbg !519 | |
| %35 = and i1 %34, %21, !dbg !524 | |
| br i1 %35, label %L340.i, label %L342.i, !dbg !515 | |
| L340.i: ; preds = %L319.i | |
| %36 = add i64 %23, %iv.next, !dbg !525 | |
| %37 = shl i64 %36, 2, !dbg !532 | |
| %38 = add i64 %37, -4, !dbg !532 | |
| %39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !537 | |
| %40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !538 | |
| %41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !538 | |
| %42 = icmp ule i64 %iv.next, %26, !dbg !519 | |
| %43 = and i1 %42, %31, !dbg !524 | |
| br i1 %43, label %L405.i, label %L407.i, !dbg !515 | |
| L342.i: ; preds = %L319.i | |
| %44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515 | |
| unreachable, !dbg !515 | |
| L405.i: ; preds = %L340.i | |
| %45 = add i64 %33, %iv.next, !dbg !525 | |
| %46 = shl i64 %45, 2, !dbg !532 | |
| %47 = add i64 %46, -4, !dbg !532 | |
| %48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !537 | |
| %49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !538 | |
| %50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !538 | |
| %51 = fsub float %41, %50, !dbg !543 | |
| %52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not59 = icmp eq i32 %52, 0, !dbg !544 | |
| br i1 %.not59, label %__nv_fabsf.exit.i.i, label %54, !dbg !544 | |
| L407.i: ; preds = %L340.i | |
| %53 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !515 | |
| unreachable, !dbg !515 | |
| 54: ; preds = %L405.i | |
| %55 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i: ; preds = %54, %L405.i | |
| %.08.i.i = phi float [ %55, %54 ], [ 0.000000e+00, %L405.i ], !dbg !544 | |
| %56 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !544 | |
| %57 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not60 = icmp eq i32 %57, 0, !dbg !544 | |
| br i1 %.not60, label %60, label %58, !dbg !544 | |
| 58: ; preds = %__nv_fabsf.exit.i.i | |
| %59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !544 | |
| 60: ; preds = %__nv_fabsf.exit.i.i | |
| %61 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !544 | |
| __nv_fabsf.exit1.i.i: ; preds = %60, %58 | |
| %.09.i.i = phi float [ %59, %58 ], [ %61, %60 ], !dbg !544 | |
| %62 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not61 = icmp eq i32 %62, 0, !dbg !544 | |
| %63 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !544 | |
| %64 = fmul float %.09.i.i, 0x4170000000000000, !dbg !544 | |
| %65 = and i1 %63, %.not61, !dbg !544 | |
| %.116.i.i = select i1 %65, float %64, float %.09.i.i, !dbg !544 | |
| %expo.i.i.1.i.i = select i1 %65, float -1.510000e+02, float -1.270000e+02, !dbg !544 | |
| %66 = bitcast float %.116.i.i to i32, !dbg !544 | |
| %67 = and i32 %66, 8388607, !dbg !544 | |
| %68 = or i32 %67, 1065353216, !dbg !544 | |
| %69 = bitcast i32 %68 to float, !dbg !544 | |
| %70 = lshr i32 %66, 23, !dbg !544 | |
| %71 = uitofp i32 %70 to float, !dbg !544 | |
| %72 = fadd float %expo.i.i.1.i.i, %71, !dbg !544 | |
| %73 = fcmp ogt float %69, 0x3FF6A09E60000000, !dbg !544 | |
| %74 = fmul float %69, 5.000000e-01, !dbg !544 | |
| %75 = fadd float %72, 1.000000e+00, !dbg !544 | |
| %expo.i.i.2.i.i = select i1 %73, float %75, float %72, !dbg !544 | |
| %m.i.i.0.i.i = select i1 %73, float %74, float %69, !dbg !544 | |
| %76 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !544 | |
| %77 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !544 | |
| %78 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %77) #17, !dbg !544, !srcloc !362 | |
| %79 = fmul float %76, 2.000000e+00, !dbg !544 | |
| %80 = fmul float %78, %79, !dbg !544 | |
| %81 = fmul float %80, %80, !dbg !544 | |
| %82 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not62 = icmp eq i32 %82, 0, !dbg !544 | |
| br i1 %.not62, label %85, label %83, !dbg !544 | |
| 83: ; preds = %__nv_fabsf.exit1.i.i | |
| %84 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %81, float noundef 0x3F8995EC60000000) #16, !dbg !544 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !544 | |
| 85: ; preds = %__nv_fabsf.exit1.i.i | |
| %86 = call float @llvm.fma.f32(float %81, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !544 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit.i.i.i.i: ; preds = %85, %83 | |
| %.020.i.i = phi float [ %84, %83 ], [ %86, %85 ], !dbg !544 | |
| %87 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not63 = icmp eq i32 %87, 0, !dbg !544 | |
| br i1 %.not63, label %90, label %88, !dbg !544 | |
| 88: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %89 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #16, !dbg !544 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !544 | |
| 90: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %91 = call float @llvm.fma.f32(float %.020.i.i, float %81, float noundef 0x3FB55557A0000000) #14, !dbg !544 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit3.i.i.i.i: ; preds = %90, %88 | |
| %.021.i.i = phi float [ %89, %88 ], [ %91, %90 ], !dbg !544 | |
| %92 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not64 = icmp eq i32 %92, 0, !dbg !544 | |
| br i1 %.not64, label %95, label %93, !dbg !544 | |
| 93: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %94 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %81) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544 | |
| 95: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %96 = fmul float %81, %.021.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit4.i.i.i.i: ; preds = %95, %93 | |
| %.022.i.i = phi float [ %94, %93 ], [ %96, %95 ], !dbg !544 | |
| %97 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not65 = icmp eq i32 %97, 0, !dbg !544 | |
| br i1 %.not65, label %100, label %98, !dbg !544 | |
| 98: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %99 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %80) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544 | |
| 100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %101 = fmul float %80, %.022.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit5.i.i.i.i: ; preds = %100, %98 | |
| %.024.i.i = phi float [ %99, %98 ], [ %101, %100 ], !dbg !544 | |
| %102 = fsub float %76, %80, !dbg !544 | |
| %103 = fmul float %102, 2.000000e+00, !dbg !544 | |
| %104 = fneg float %80, !dbg !544 | |
| %105 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not66 = icmp eq i32 %105, 0, !dbg !544 | |
| br i1 %.not66, label %108, label %106, !dbg !544 | |
| 106: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %107 = call float @llvm.nvvm.fma.rn.ftz.f(float %104, float %76, float %103) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544 | |
| 108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %109 = call float @llvm.fma.f32(float %104, float %76, float %103) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit.i.i.i.i: ; preds = %108, %106 | |
| %.025.i.i = phi float [ %107, %106 ], [ %109, %108 ], !dbg !544 | |
| %110 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not67 = icmp eq i32 %110, 0, !dbg !544 | |
| br i1 %.not67, label %113, label %111, !dbg !544 | |
| 111: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %112 = call float @llvm.nvvm.mul.rn.ftz.f(float %78, float %.025.i.i) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544 | |
| 113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %114 = fmul float %78, %.025.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit6.i.i.i.i: ; preds = %113, %111 | |
| %.026.i.i = phi float [ %112, %111 ], [ %114, %113 ], !dbg !544 | |
| %115 = fadd float %80, %.024.i.i, !dbg !544 | |
| %116 = fsub float %80, %115, !dbg !544 | |
| %117 = fadd float %.024.i.i, %116, !dbg !544 | |
| %118 = fadd float %117, %.026.i.i, !dbg !544 | |
| %119 = fadd float %115, %118, !dbg !544 | |
| %120 = fsub float %115, %119, !dbg !544 | |
| %121 = fadd float %118, %120, !dbg !544 | |
| %122 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not68 = icmp eq i32 %122, 0, !dbg !544 | |
| br i1 %.not68, label %125, label %123, !dbg !544 | |
| 123: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %124 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544 | |
| 125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %126 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !544 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit2.i.i.i.i: ; preds = %125, %123 | |
| %.019.i.i = phi float [ %124, %123 ], [ %126, %125 ], !dbg !544 | |
| %127 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not69 = icmp eq i32 %127, 0, !dbg !544 | |
| br i1 %.not69, label %130, label %128, !dbg !544 | |
| 128: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %129 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !544 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !544 | |
| 130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %131 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !544 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !544 | |
| __internal_log_ep.exit.i.i.i: ; preds = %130, %128 | |
| %.018.i.i = phi float [ %129, %128 ], [ %131, %130 ], !dbg !544 | |
| %132 = fadd float %119, %.019.i.i, !dbg !544 | |
| %133 = fsub float %.019.i.i, %132, !dbg !544 | |
| %134 = fadd float %119, %133, !dbg !544 | |
| %135 = fadd float %121, %134, !dbg !544 | |
| %136 = fadd float %135, %.018.i.i, !dbg !544 | |
| %137 = fadd float %132, %136, !dbg !544 | |
| %138 = fsub float %132, %137, !dbg !544 | |
| %139 = fadd float %136, %138, !dbg !544 | |
| %140 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not70 = icmp eq i32 %140, 0, !dbg !544 | |
| br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %141, !dbg !544 | |
| 141: ; preds = %__internal_log_ep.exit.i.i.i | |
| %142 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i6.i.i, !dbg !544 | |
| __nv_fabsf.exit.i6.i.i: ; preds = %141, %__internal_log_ep.exit.i.i.i | |
| %.027.i.i = phi float [ %142, %141 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !544 | |
| %143 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !544 | |
| %.013.i.i = select i1 %143, float 0x3F40000000000000, float 4.000000e+00, !dbg !544 | |
| %144 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not71 = icmp eq i32 %144, 0, !dbg !544 | |
| br i1 %.not71, label %147, label %145, !dbg !544 | |
| 145: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %146 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %137) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544 | |
| 147: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %148 = fmul float %137, %.013.i.i, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit.i.i.i.i: ; preds = %147, %145 | |
| %.028.i.i = phi float [ %146, %145 ], [ %148, %147 ], !dbg !544 | |
| %149 = fneg float %.028.i.i, !dbg !544 | |
| %150 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not72 = icmp eq i32 %150, 0, !dbg !544 | |
| br i1 %.not72, label %153, label %151, !dbg !544 | |
| 151: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %152 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %137, float %149) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544 | |
| 153: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %154 = call float @llvm.fma.f32(float noundef %.013.i.i, float %137, float %149) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit.i3.i.i.i: ; preds = %153, %151 | |
| %.029.i.i = phi float [ %152, %151 ], [ %154, %153 ], !dbg !544 | |
| %155 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not73 = icmp eq i32 %155, 0, !dbg !544 | |
| br i1 %.not73, label %158, label %156, !dbg !544 | |
| 156: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %157 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %.029.i.i) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544 | |
| 158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %159 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %.029.i.i) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit1.i.i.i.i: ; preds = %158, %156 | |
| %.030.i.i = phi float [ %157, %156 ], [ %159, %158 ], !dbg !544 | |
| %160 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not74 = icmp eq i32 %160, 0, !dbg !544 | |
| br i1 %.not74, label %163, label %161, !dbg !544 | |
| 161: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %162 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %137, float %.030.i.i) #16, !dbg !544 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544 | |
| 163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %164 = call float @llvm.fma.f32(float %137, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !544 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !544 | |
| __nv_fmaf_rn.exit2.i.i.i.i: ; preds = %163, %161 | |
| %.031.i.i = phi float [ %162, %161 ], [ %164, %163 ], !dbg !544 | |
| %165 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not75 = icmp eq i32 %165, 0, !dbg !544 | |
| br i1 %.not75, label %168, label %166, !dbg !544 | |
| 166: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %167 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !544 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544 | |
| 168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %169 = fadd float %.028.i.i, %.031.i.i, !dbg !544 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !544 | |
| __nv_fadd_rn.exit.i.i.i.i: ; preds = %168, %166 | |
| %.032.i.i = phi float [ %167, %166 ], [ %169, %168 ], !dbg !544 | |
| %170 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not76 = icmp eq i32 %170, 0, !dbg !544 | |
| br i1 %.not76, label %174, label %171, !dbg !544 | |
| 171: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %172 = fneg float %.032.i.i, !dbg !544 | |
| %173 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %172) #16, !dbg !544 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544 | |
| 174: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %175 = fsub float %.028.i.i, %.032.i.i, !dbg !544 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !544 | |
| __nv_fadd_rn.exit3.i.i.i.i: ; preds = %174, %171 | |
| %.033.i.i = phi float [ %173, %171 ], [ %175, %174 ], !dbg !544 | |
| %176 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not77 = icmp eq i32 %176, 0, !dbg !544 | |
| br i1 %.not77, label %179, label %177, !dbg !544 | |
| 177: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %178 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !544 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !544 | |
| 179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %180 = fadd float %.031.i.i, %.033.i.i, !dbg !544 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !544 | |
| __internal_dsmul.exit.i.i.i: ; preds = %179, %177 | |
| %.034.i.i = phi float [ %178, %177 ], [ %180, %179 ], !dbg !544 | |
| %181 = bitcast float %.032.i.i to i32, !dbg !544 | |
| %182 = icmp eq i32 %181, 1118925336, !dbg !544 | |
| %183 = add i32 %181, -1, !dbg !544 | |
| %184 = bitcast i32 %183 to float, !dbg !544 | |
| %185 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !544 | |
| %prod.i.044.0.i.i = select i1 %182, float %185, float %.034.i.i, !dbg !544 | |
| %prod.i.145.0.i.i = select i1 %182, float %184, float %.032.i.i, !dbg !544 | |
| %186 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not78 = icmp eq i32 %186, 0, !dbg !544 | |
| br i1 %.not78, label %189, label %187, !dbg !544 | |
| 187: ; preds = %__internal_dsmul.exit.i.i.i | |
| %188 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544 | |
| 189: ; preds = %__internal_dsmul.exit.i.i.i | |
| %190 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !544 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !544 | |
| __nv_fmul_rn.exit.i10.i.i.i: ; preds = %189, %187 | |
| %.017.i.i = phi float [ %188, %187 ], [ %190, %189 ], !dbg !544 | |
| %191 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !544 | |
| %192 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not79 = icmp eq i32 %192, 0, !dbg !544 | |
| br i1 %.not79, label %195, label %193, !dbg !544 | |
| 193: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %194 = call float @llvm.nvvm.fabs.ftz.f(float %191) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !544 | |
| 195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %196 = call float @llvm.fabs.f32(float %191) #14, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i.i.i: ; preds = %195, %193 | |
| %.023.i.i = phi float [ %194, %193 ], [ %196, %195 ], !dbg !544 | |
| %197 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !544 | |
| %198 = bitcast float %191 to i32, !dbg !544 | |
| %199 = and i32 %198, -2147483648, !dbg !544 | |
| %200 = or i32 %199, 1123811328, !dbg !544 | |
| %201 = bitcast i32 %200 to float, !dbg !544 | |
| %j.i.i.0.i.i = select i1 %197, float %201, float %191, !dbg !544 | |
| %202 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not80 = icmp eq i32 %202, 0, !dbg !544 | |
| br i1 %.not80, label %205, label %203, !dbg !544 | |
| 203: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %204 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !544 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !544 | |
| 205: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %206 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !544 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !544 | |
| __internal_fmad.exit4.i.i.i.i: ; preds = %205, %203 | |
| %.035.i.i = phi float [ %204, %203 ], [ %206, %205 ], !dbg !544 | |
| %207 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not81 = icmp eq i32 %207, 0, !dbg !544 | |
| br i1 %.not81, label %210, label %208, !dbg !544 | |
| 208: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %209 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !544 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !544 | |
| 210: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %211 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !544 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !544 | |
| __internal_expf_kernel.exit.i.i.i: ; preds = %210, %208 | |
| %.036.i.i = phi float [ %209, %208 ], [ %211, %210 ], !dbg !544 | |
| %212 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !544 | |
| %213 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !544 | |
| %214 = bitcast float %213 to i32, !dbg !544 | |
| %215 = shl i32 %214, 23, !dbg !544 | |
| %216 = bitcast i32 %215 to float, !dbg !544 | |
| %217 = call float @llvm.nvvm.ex2.approx.ftz.f(float %212) #16, !dbg !544 | |
| %218 = fmul float %217, %216, !dbg !544 | |
| %219 = fcmp une float %218, 0x7FF0000000000000, !dbg !544 | |
| br i1 %219, label %220, label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| 220: ; preds = %__internal_expf_kernel.exit.i.i.i | |
| %221 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not94 = icmp eq i32 %221, 0, !dbg !544 | |
| br i1 %.not94, label %224, label %222, !dbg !544 | |
| 222: ; preds = %220 | |
| %223 = call float @llvm.nvvm.fma.rn.ftz.f(float %218, float %prod.i.044.0.i.i, float %218) #16, !dbg !544 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| 224: ; preds = %220 | |
| %225 = call float @llvm.fma.f32(float %218, float %prod.i.044.0.i.i, float %218) #14, !dbg !544 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !544 | |
| __internal_accurate_powf.exit.i.i: ; preds = %224, %222, %__internal_expf_kernel.exit.i.i.i | |
| %t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %223, %222 ], [ %225, %224 ], !dbg !544 | |
| %226 = fcmp olt float %51, 0.000000e+00, !dbg !544 | |
| %227 = and i1 %226, %56, !dbg !544 | |
| %228 = bitcast float %t.i.0.i.i to i32, !dbg !544 | |
| %229 = xor i32 %228, -2147483648, !dbg !544 | |
| %230 = bitcast i32 %229 to float, !dbg !544 | |
| %.010.i.i = select i1 %227, float %230, float %t.i.0.i.i, !dbg !544 | |
| %231 = fcmp oeq float %51, 0.000000e+00, !dbg !544 | |
| %232 = fadd float %51, %51, !dbg !544 | |
| %233 = select i1 %56, float %232, float 0.000000e+00, !dbg !544 | |
| %.212.i.i = select i1 %231, float %233, float %.010.i.i, !dbg !544 | |
| %234 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not = icmp eq i32 %234, 0, !dbg !544 | |
| br i1 %.not, label %237, label %235, !dbg !544 | |
| 235: ; preds = %__internal_accurate_powf.exit.i.i | |
| %236 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !544 | |
| 237: ; preds = %__internal_accurate_powf.exit.i.i | |
| %238 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !544 | |
| __nv_fabsf.exit.i.i.i: ; preds = %237, %235 | |
| %.01.i.i = phi float [ %236, %235 ], [ %238, %237 ], !dbg !544 | |
| %239 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not82 = icmp eq i32 %239, 0, !dbg !544 | |
| br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %240, !dbg !544 | |
| 240: ; preds = %__nv_fabsf.exit.i.i.i | |
| %241 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_fabsf.exit1.i.i.i, !dbg !544 | |
| __nv_fabsf.exit1.i.i.i: ; preds = %240, %__nv_fabsf.exit.i.i.i | |
| %.02.i.i = phi float [ %241, %240 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !544 | |
| %242 = fadd float %.01.i.i, %.02.i.i, !dbg !544 | |
| %243 = bitcast float %242 to i32, !dbg !544 | |
| %244 = icmp sgt i32 %243, 2139095039, !dbg !544 | |
| br i1 %244, label %245, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 245: ; preds = %__nv_fabsf.exit1.i.i.i | |
| %246 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not88 = icmp eq i32 %246, 0, !dbg !544 | |
| br i1 %.not88, label %249, label %247, !dbg !544 | |
| 247: ; preds = %245 | |
| %248 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !544 | |
| 249: ; preds = %245 | |
| %250 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !544 | |
| __nv_isnanf.exit.i.i.i: ; preds = %249, %247 | |
| %.06.i.i = phi float [ %248, %247 ], [ %250, %249 ], !dbg !544 | |
| %251 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %251, label %257, label %252, !dbg !544 | |
| 252: ; preds = %__nv_isnanf.exit.i.i.i | |
| %253 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not89 = icmp eq i32 %253, 0, !dbg !544 | |
| br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %254, !dbg !544 | |
| 254: ; preds = %252 | |
| %255 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_isnanf.exit11.i.i.i, !dbg !544 | |
| __nv_isnanf.exit11.i.i.i: ; preds = %254, %252 | |
| %.07.i.i = phi float [ %255, %254 ], [ 4.000000e+00, %252 ], !dbg !544 | |
| %256 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %256, label %257, label %259, !dbg !544 | |
| 257: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i | |
| %258 = fadd float %51, 4.000000e+00, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 259: ; preds = %__nv_isnanf.exit11.i.i.i | |
| %260 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not90 = icmp eq i32 %260, 0, !dbg !544 | |
| br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %261, !dbg !544 | |
| 261: ; preds = %259 | |
| %262 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !544 | |
| br label %__nv_isinff.exit8.i.i.i, !dbg !544 | |
| __nv_isinff.exit8.i.i.i: ; preds = %261, %259 | |
| %.05.i.i = phi float [ %262, %261 ], [ 4.000000e+00, %259 ], !dbg !544 | |
| %263 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %263, label %264, label %274, !dbg !544 | |
| 264: ; preds = %__nv_isinff.exit8.i.i.i | |
| %265 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not93 = icmp eq i32 %265, 0, !dbg !544 | |
| br i1 %.not93, label %268, label %266, !dbg !544 | |
| 266: ; preds = %264 | |
| %267 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !544 | |
| 268: ; preds = %264 | |
| %269 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !544 | |
| __nv_fabsf.exit5.i.i.i: ; preds = %268, %266 | |
| %.04.i.i = phi float [ %267, %266 ], [ %269, %268 ], !dbg !544 | |
| %270 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !544 | |
| %271 = fcmp oeq float %51, -1.000000e+00, !dbg !544 | |
| %272 = select i1 %270, float 0x7FF0000000000000, float 0.000000e+00, !dbg !544 | |
| %273 = select i1 %271, float 1.000000e+00, float %272, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 274: ; preds = %__nv_isinff.exit8.i.i.i | |
| %275 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !544 | |
| %.not91 = icmp eq i32 %275, 0, !dbg !544 | |
| br i1 %.not91, label %278, label %276, !dbg !544 | |
| 276: ; preds = %274 | |
| %277 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !544 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !544 | |
| 278: ; preds = %274 | |
| %279 = call float @llvm.fabs.f32(float %51) #14, !dbg !544 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !544 | |
| __nv_isinff.exit.i.i.i: ; preds = %278, %276 | |
| %.03.i.i = phi float [ %277, %276 ], [ %279, %278 ], !dbg !544 | |
| %280 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !544 | |
| br i1 %280, label %281, label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| 281: ; preds = %__nv_isinff.exit.i.i.i | |
| %282 = select i1 %227, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !544 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !544 | |
| __internal_powf_infinite_cases.exit.i.i: ; preds = %281, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %257, %__nv_fabsf.exit1.i.i.i | |
| %.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %258, %257 ], [ %273, %__nv_fabsf.exit5.i.i.i ], [ %282, %281 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !544 | |
| %283 = fcmp oeq float %51, 1.000000e+00, !dbg !544 | |
| %t.0.i.i = select i1 %283, float 1.000000e+00, float %.3.i.i, !dbg !544 | |
| %284 = fadd float %value_phi13.i, %t.0.i.i, !dbg !547 | |
| %.not83 = icmp eq i64 %iv.next, %15, !dbg !548 | |
| %285 = add nuw i64 %iv.next, 1, !dbg !550 | |
| br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !551 | |
| L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i | |
| br label %L467.i, !dbg !552 | |
| L467.i: ; preds = %L467.i.loopexit, %L302.i | |
| %value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %284, %L467.i.loopexit ] | |
| %286 = fpext float %value_phi17.i to double, !dbg !552 | |
| %287 = call i32 @llvm.nvvm.d2i.hi(double %286) #16, !dbg !558 | |
| %288 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !558 | |
| %289 = and i32 %288, 2146435072, !dbg !558 | |
| %290 = icmp eq i32 %289, 1072693248, !dbg !558 | |
| %291 = call double @llvm.fabs.f64(double %286) #14, !dbg !558 | |
| %292 = call fastcc double @__internal_accurate_pow(double %291) #16, !dbg !558 | |
| %293 = icmp slt i32 %287, 0, !dbg !558 | |
| %294 = and i1 %293, %290, !dbg !558 | |
| br i1 %294, label %295, label %300, !dbg !558 | |
| 295: ; preds = %L467.i | |
| %296 = call i32 @llvm.nvvm.d2i.hi(double %292) #16, !dbg !558 | |
| %297 = call i32 @llvm.nvvm.d2i.lo(double %292) #16, !dbg !558 | |
| %298 = xor i32 %296, -2147483648, !dbg !558 | |
| %299 = call double @llvm.nvvm.lohi.i2d(i32 %297, i32 %298) #16, !dbg !558 | |
| br label %300, !dbg !558 | |
| 300: ; preds = %295, %L467.i | |
| %t.0.i37.i = phi double [ %299, %295 ], [ %292, %L467.i ], !dbg !558 | |
| %301 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !558 | |
| br i1 %301, label %302, label %306, !dbg !558 | |
| 302: ; preds = %300 | |
| %spec.select = select i1 %290, i32 %287, i32 0, !dbg !558 | |
| %303 = icmp slt i32 %288, 0, !dbg !558 | |
| %304 = or i32 %spec.select, 2146435072, !dbg !558 | |
| %thi.1.i.i = select i1 %303, i32 %304, i32 %spec.select, !dbg !558 | |
| %305 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !558 | |
| br label %307, !dbg !558 | |
| 306: ; preds = %300 | |
| %spec.select97 = select i1 %293, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !558 | |
| br label %307, !dbg !558 | |
| 307: ; preds = %306, %302 | |
| %t.2.i.i = phi double [ %305, %302 ], [ %spec.select97, %306 ], !dbg !558 | |
| %308 = fadd double %286, 2.500000e-01, !dbg !558 | |
| %309 = call i32 @llvm.nvvm.d2i.hi(double %308) #16, !dbg !558 | |
| %310 = and i32 %309, 2146435072, !dbg !558 | |
| %311 = icmp eq i32 %310, 2146435072, !dbg !558 | |
| br i1 %311, label %312, label %__nv_pow.exit.i, !dbg !558 | |
| 312: ; preds = %307 | |
| %313 = fcmp ugt double %291, 0x7FF0000000000000, !dbg !558 | |
| br i1 %313, label %__nv_pow.exit.i, label %314, !dbg !558 | |
| 314: ; preds = %312 | |
| %315 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %316 = icmp eq i32 %315, 200, !dbg !558 | |
| br i1 %316, label %.critedge, label %317, !dbg !558 | |
| 317: ; preds = %314 | |
| %318 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %319 = icmp eq i32 %318, 350, !dbg !558 | |
| br i1 %319, label %.critedge, label %320, !dbg !558 | |
| 320: ; preds = %317 | |
| %321 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %322 = icmp eq i32 %321, 370, !dbg !558 | |
| br i1 %322, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !558 | |
| __nv_isinfd.exit4.i.i: ; preds = %320 | |
| %323 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !558 | |
| %324 = and i32 %288, 2147483647, !dbg !558 | |
| %325 = icmp eq i32 %324, 2146435072, !dbg !558 | |
| %326 = icmp eq i32 %323, 0, !dbg !558 | |
| %327 = and i1 %326, %325, !dbg !558 | |
| br i1 %327, label %328, label %.critedge, !dbg !558 | |
| 328: ; preds = %__nv_isinfd.exit4.i.i | |
| %329 = fcmp ogt double %291, 1.000000e+00, !dbg !558 | |
| %thi.2.i.i = select i1 %329, i32 2146435072, i32 0, !dbg !558 | |
| %330 = icmp slt i32 %288, 0, !dbg !558 | |
| %331 = xor i32 %thi.2.i.i, 2146435072 | |
| %spec.select8 = select i1 %330, i32 %331, i32 %thi.2.i.i, !dbg !558 | |
| %332 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !558 | |
| %thi.4.i.i = select i1 %332, i32 1072693248, i32 %spec.select8, !dbg !558 | |
| %333 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !558 | |
| br label %__nv_pow.exit.i, !dbg !558 | |
| .critedge: ; preds = %__nv_isinfd.exit4.i.i, %320, %317, %314 | |
| %334 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %335 = icmp eq i32 %334, 200, !dbg !558 | |
| br i1 %335, label %342, label %336, !dbg !558 | |
| 336: ; preds = %.critedge | |
| %337 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %338 = icmp eq i32 %337, 350, !dbg !558 | |
| br i1 %338, label %342, label %339, !dbg !558 | |
| 339: ; preds = %336 | |
| %340 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !558 | |
| %341 = icmp eq i32 %340, 370, !dbg !558 | |
| br i1 %341, label %342, label %344, !dbg !558 | |
| 342: ; preds = %339, %336, %.critedge | |
| %343 = fcmp oeq double %291, 0x7FF0000000000000, !dbg !558 | |
| br label %__nv_isinfd.exit.i.i, !dbg !558 | |
| 344: ; preds = %339 | |
| %345 = call i32 @llvm.nvvm.d2i.lo(double %286) #16, !dbg !558 | |
| %346 = and i32 %287, 2147483647, !dbg !558 | |
| %347 = icmp eq i32 %346, 2146435072, !dbg !558 | |
| %348 = icmp eq i32 %345, 0, !dbg !558 | |
| %349 = and i1 %348, %347, !dbg !558 | |
| br label %__nv_isinfd.exit.i.i, !dbg !558 | |
| __nv_isinfd.exit.i.i: ; preds = %344, %342 | |
| %.0.i39.i.in = phi i1 [ %343, %342 ], [ %349, %344 ] | |
| br i1 %.0.i39.i.in, label %350, label %__nv_pow.exit.i, !dbg !558 | |
| 350: ; preds = %__nv_isinfd.exit.i.i | |
| %.inv87 = icmp slt i32 %288, 0, !dbg !558 | |
| %spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !558 | |
| %351 = and i32 %288, 2147483647, !dbg !558 | |
| %352 = icmp ne i32 %351, 1071644672, !dbg !558 | |
| %or.cond11 = and i1 %352, %294, !dbg !558 | |
| %353 = or i32 %spec.select9, -2147483648, !dbg !558 | |
| %thi.6.i.i = select i1 %or.cond11, i32 %353, i32 %spec.select9, !dbg !558 | |
| %354 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !558 | |
| br label %__nv_pow.exit.i, !dbg !558 | |
| __nv_pow.exit.i: ; preds = %350, %__nv_isinfd.exit.i.i, %328, %312, %307 | |
| %t.6.i.i = phi double [ %t.2.i.i, %307 ], [ %333, %328 ], [ %354, %350 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %308, %312 ], !dbg !558 | |
| %355 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !559 | |
| %356 = select i1 %355, i64 %.fca.2.0.extract, i64 0, !dbg !559 | |
| %357 = load i64, i64* %13, align 8, !dbg !569, !tbaa !301 | |
| %358 = load i64, i64* %14, align 8, !dbg !573, !tbaa !301 | |
| %359 = add i64 %358, -1, !dbg !573 | |
| %360 = mul i64 %359, %356, !dbg !576 | |
| %361 = add i64 %360, %357, !dbg !577 | |
| %362 = icmp sgt i64 %.fca.3.extract, 0, !dbg !578 | |
| %363 = select i1 %362, i64 %.fca.3.extract, i64 0, !dbg !578 | |
| %364 = icmp slt i64 %361, 1, !dbg !587 | |
| %365 = icmp sgt i64 %361, %363, !dbg !587 | |
| %366 = or i1 %364, %365, !dbg !589 | |
| br i1 %366, label %L493.i, label %L491.i, !dbg !589 | |
| L491.i: ; preds = %__nv_pow.exit.i | |
| %367 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !558 | |
| %368 = fptrunc double %t.6.i.i to float, !dbg !590 | |
| %369 = select i1 %367, float 1.000000e+00, float %368, !dbg !590 | |
| %370 = add nsw i64 %361, -1, !dbg !592 | |
| %371 = getelementptr inbounds float, float addrspace(1)* %12, i64 %370, !dbg !599 | |
| store float %369, float addrspace(1)* %371, align 4, !dbg !599, !tbaa !435 | |
| br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !600 | |
| L493.i: ; preds = %__nv_pow.exit.i | |
| %372 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !589 | |
| unreachable, !dbg !589 | |
| fail.i: ; preds = %entry | |
| call fastcc void @gpu_report_exception() #14, !dbg !501 | |
| call fastcc void @gpu_signal_exception() #14, !dbg !501 | |
| call void asm sideeffect "exit;", ""() #16, !dbg !501 | |
| unreachable, !dbg !501 | |
| pass.i: ; preds = %entry | |
| %373 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !601, !range !127 | |
| %374 = udiv i64 %11, %.fca.1.0.0.0.0.extract, !dbg !501 | |
| %375 = mul i64 %374, %.fca.1.0.0.0.0.extract, !dbg !606 | |
| %376 = sub i64 %11, %375, !dbg !608 | |
| %377 = zext i32 %373 to i64, !dbg !499 | |
| %378 = lshr i64 %377, 8, !dbg !501 | |
| %379 = and i64 %377, 255, !dbg !608 | |
| %380 = add nuw nsw i64 %379, 1, !dbg !609 | |
| %381 = shl i64 %376, 8, !dbg !610 | |
| %382 = add i64 %380, %381, !dbg !614 | |
| %383 = add nuw nsw i64 %374, 1, !dbg !615 | |
| %384 = add nuw nsw i64 %383, %378, !dbg !614 | |
| %385 = icmp sgt i64 %382, 0, !dbg !618 | |
| %386 = icmp sle i64 %382, %.fca.0.0.0.0.extract, !dbg !618 | |
| %387 = and i1 %385, %386, !dbg !623 | |
| %388 = icmp sle i64 %384, %.fca.0.0.1.0.extract, !dbg !618 | |
| %389 = and i1 %388, %387, !dbg !624 | |
| br i1 %389, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !626 | |
| julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i | |
| call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %7) #14, !dbg !627 | |
| ret void, !dbg !475 | |
| } | |
| ; Function Attrs: willreturn mustprogress | |
| define internal void @diffejulia_gpu_dist_kernel__5526_inner19({ [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, { i8 addrspace(1)*, i64, [2 x i64], i64 } %"'", { i8 addrspace(1)*, i64, [2 x i64], i64 } %3) local_unnamed_addr #12 !dbg !628 { | |
| entry: | |
| %"'ipa79" = alloca [2 x i64], align 8 | |
| store [2 x i64] zeroinitializer, [2 x i64]* %"'ipa79", align 8 | |
| %4 = alloca [2 x i64], align 8 | |
| %"'ipa" = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !629 | |
| store { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } zeroinitializer, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", align 8, !dbg !629 | |
| %5 = alloca { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, align 8, !dbg !629 | |
| %"'ipc" = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa" to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !629 | |
| %6 = addrspacecast { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5 to { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)*, !dbg !629 | |
| %.fca.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 0, 0, !dbg !629 | |
| %".fca.0.0.0.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !629 | |
| %.fca.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 0, i64 0, !dbg !629 | |
| store i64 %.fca.0.0.0.0.extract, i64* %".fca.0.0.0.0.gep'ipg", align 8, !dbg !629 | |
| store i64 %.fca.0.0.0.0.extract, i64* %.fca.0.0.0.0.gep, align 8, !dbg !629 | |
| %.fca.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 0, 0, 1, 0, !dbg !629 | |
| %".fca.0.0.1.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !629 | |
| %.fca.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 0, i64 0, i64 1, i64 0, !dbg !629 | |
| store i64 %.fca.0.0.1.0.extract, i64* %".fca.0.0.1.0.gep'ipg", align 8, !dbg !629 | |
| store i64 %.fca.0.0.1.0.extract, i64* %.fca.0.0.1.0.gep, align 8, !dbg !629 | |
| %.fca.1.0.0.0.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 0, 0, !dbg !629 | |
| %".fca.1.0.0.0.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !629 | |
| %.fca.1.0.0.0.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 0, i64 0, !dbg !629 | |
| store i64 %.fca.1.0.0.0.0.extract, i64* %".fca.1.0.0.0.0.gep'ipg", align 8, !dbg !629 | |
| store i64 %.fca.1.0.0.0.0.extract, i64* %.fca.1.0.0.0.0.gep, align 8, !dbg !629 | |
| %.fca.1.0.0.1.0.extract = extractvalue { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } %0, 1, 0, 0, 1, 0, !dbg !629 | |
| %".fca.1.0.0.1.0.gep'ipg" = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %"'ipa", i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !629 | |
| %.fca.1.0.0.1.0.gep = getelementptr inbounds { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } }* %5, i64 0, i32 1, i32 0, i64 0, i64 1, i64 0, !dbg !629 | |
| store i64 %.fca.1.0.0.1.0.extract, i64* %".fca.1.0.0.1.0.gep'ipg", align 8, !dbg !629 | |
| store i64 %.fca.1.0.0.1.0.extract, i64* %.fca.1.0.0.1.0.gep, align 8, !dbg !629 | |
| %.fca.0.extract30 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 0, !dbg !629 | |
| %.fca.1.extract32_replacementA = phi i64 , !dbg !629 | |
| %.fca.2.0.extract34 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0, !dbg !629 | |
| %.fca.2.1.extract36 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 1, !dbg !629 | |
| %.fca.3.extract38_replacementA = phi i64 , !dbg !629 | |
| %".fca.0.extract12'ipev" = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %"'", 0, !dbg !629 | |
| %.fca.0.extract12 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 0, !dbg !629 | |
| %.fca.1.extract14_replacementA = phi i64 , !dbg !629 | |
| %.fca.2.0.extract16 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 0, !dbg !629 | |
| %.fca.2.1.extract18 = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %2, 2, 1, !dbg !629 | |
| %.fca.3.extract20_replacementA = phi i64 , !dbg !629 | |
| %.fca.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 0, !dbg !629 | |
| %.fca.1.extract_replacementA = phi i64 , !dbg !629 | |
| %.fca.2.0.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 2, 0, !dbg !629 | |
| %.fca.2.1.extract_replacementA = phi i64 , !dbg !629 | |
| %.fca.3.extract = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %3, 3, !dbg !629 | |
| %_replacementA78 = phi i8* | |
| %_replacementA = phi {}*** | |
| %7 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x() #14, !dbg !630, !range !65 | |
| %8 = icmp sgt i64 %.fca.1.0.0.0.0.extract, 0, !dbg !639 | |
| %9 = zext i32 %7 to i64, !dbg !653 | |
| %10 = bitcast i8 addrspace(1)* %.fca.0.extract to float addrspace(1)*, !dbg !655 | |
| br i1 %8, label %pass.i, label %fail.i, !dbg !655 | |
| L302.i: ; preds = %pass.i | |
| %_augmented = call fastcc i64 @augmented_julia___index_Global_NTuple_5574([2 x i64]* %4, [2 x i64]* %"'ipa79", { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %6, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %"'ipc"), !dbg !658 | |
| store i64 %_augmented, i64* %_augmented_cache, align 8, !dbg !660, !invariant.group !663 | |
| %11 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 0, !dbg !660 | |
| %12 = getelementptr inbounds [2 x i64], [2 x i64]* %4, i64 0, i64 1, !dbg !664 | |
| %.inv = icmp sgt i64 %.fca.2.0.extract34, 0, !dbg !665 | |
| %13 = select i1 %.inv, i64 %.fca.2.0.extract34, i64 0, !dbg !665 | |
| br i1 %.inv, label %L319.i.preheader, label %L467.i, !dbg !669 | |
| L319.i.preheader: ; preds = %L302.i | |
| %14 = load i64, i64* %11, align 8, !tbaa !301 | |
| %15 = icmp sgt i64 %.fca.2.1.extract36, 0 | |
| %16 = select i1 %15, i64 %.fca.2.1.extract36, i64 0 | |
| %17 = icmp sgt i64 %14, 0 | |
| %18 = icmp sle i64 %14, %16 | |
| %19 = and i1 %17, %18 | |
| %20 = add i64 %14, -1 | |
| %21 = mul i64 %20, %13 | |
| %22 = load i64, i64* %12, align 8 | |
| %23 = icmp sgt i64 %.fca.2.0.extract16, 0 | |
| %24 = select i1 %23, i64 %.fca.2.0.extract16, i64 0 | |
| %25 = icmp sgt i64 %.fca.2.1.extract18, 0 | |
| %26 = select i1 %25, i64 %.fca.2.1.extract18, i64 0 | |
| %27 = icmp sgt i64 %22, 0 | |
| %28 = icmp sle i64 %22, %26 | |
| %29 = and i1 %27, %28 | |
| %30 = add i64 %22, -1 | |
| %31 = mul i64 %30, %24 | |
| store i64 %31, i64* %_cache, align 8, !dbg !670, !invariant.group !674 | |
| %32 = add nsw i64 %13, -1, !dbg !670 | |
| %33 = add nuw i64 %32, 1, !dbg !670 | |
| %malloccall = tail call noalias nonnull i8* @malloc(i64 %33) | |
| %.not59_malloccache = bitcast i8* %malloccall to i1* | |
| call void @llvm.memset.p0i8.i64(i8* %malloccall, i8 0, i64 %33, i1 false), !dbg !670 | |
| store i1* %.not59_malloccache, i1** %.not59_cache, align 1, !dbg !670, !invariant.group !675 | |
| br label %L319.i, !dbg !670 | |
| L319.i: ; preds = %__internal_powf_infinite_cases.exit.i.i, %L319.i.preheader | |
| %iv = phi i64 [ %iv.next, %__internal_powf_infinite_cases.exit.i.i ], [ 0, %L319.i.preheader ] | |
| %value_phi13.i = phi float [ %286, %__internal_powf_infinite_cases.exit.i.i ], [ 0.000000e+00, %L319.i.preheader ] | |
| %iv.next = add nuw nsw i64 %iv, 1, !dbg !676 | |
| %34 = icmp ule i64 %iv.next, %13, !dbg !676 | |
| %35 = and i1 %34, %19, !dbg !681 | |
| br i1 %35, label %L340.i, label %L342.i, !dbg !670 | |
| L340.i: ; preds = %L319.i | |
| %36 = add i64 %21, %iv.next, !dbg !682 | |
| %37 = shl i64 %36, 2, !dbg !689 | |
| %38 = add i64 %37, -4, !dbg !689 | |
| %39 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract30, i64 %38, !dbg !694 | |
| %40 = bitcast i8 addrspace(1)* %39 to float addrspace(1)*, !dbg !695 | |
| %41 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %40, i32 noundef 4) #14, !dbg !695 | |
| %42 = icmp ule i64 %iv.next, %24, !dbg !676 | |
| %43 = and i1 %42, %29, !dbg !681 | |
| br i1 %43, label %L405.i, label %L407.i, !dbg !670 | |
| L342.i: ; preds = %L319.i | |
| %44 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !670 | |
| unreachable | |
| L405.i: ; preds = %L340.i | |
| %45 = add i64 %31, %iv.next, !dbg !682 | |
| %46 = shl i64 %45, 2, !dbg !689 | |
| %47 = add i64 %46, -4, !dbg !689 | |
| %"'ipg" = getelementptr i8, i8 addrspace(1)* %".fca.0.extract12'ipev", i64 %47, !dbg !694 | |
| %48 = getelementptr i8, i8 addrspace(1)* %.fca.0.extract12, i64 %47, !dbg !694 | |
| %"'ipc82" = bitcast i8 addrspace(1)* %"'ipg" to float addrspace(1)*, !dbg !695 | |
| %49 = bitcast i8 addrspace(1)* %48 to float addrspace(1)*, !dbg !695 | |
| %50 = call float @llvm.nvvm.ldg.global.f.f32.p1f32(float addrspace(1)* %49, i32 noundef 4) #14, !dbg !695 | |
| %51 = fsub float %41, %50, !dbg !700 | |
| %52 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not59 = icmp eq i32 %52, 0, !dbg !701 | |
| %53 = load i1*, i1** %.not59_cache, align 8, !dbg !701, !dereferenceable !704, !invariant.group !675 | |
| %54 = getelementptr inbounds i1, i1* %53, i64 %iv, !dbg !701 | |
| store i1 %.not59, i1* %54, align 1, !dbg !701, !invariant.group !705 | |
| br i1 %.not59, label %__nv_fabsf.exit.i.i, label %56, !dbg !701 | |
| L407.i: ; preds = %L340.i | |
| %55 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5568() #15, !dbg !670 | |
| unreachable | |
| 56: ; preds = %L405.i | |
| %57 = call float @llvm.nvvm.fabs.ftz.f(float noundef 0.000000e+00) #16, !dbg !701 | |
| br label %__nv_fabsf.exit.i.i, !dbg !701 | |
| __nv_fabsf.exit.i.i: ; preds = %56, %L405.i | |
| %.08.i.i = phi float [ %57, %56 ], [ 0.000000e+00, %L405.i ], !dbg !701 | |
| %58 = fcmp oeq float %.08.i.i, 1.000000e+00, !dbg !701 | |
| %59 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not60 = icmp eq i32 %59, 0, !dbg !701 | |
| br i1 %.not60, label %62, label %60, !dbg !701 | |
| 60: ; preds = %__nv_fabsf.exit.i.i | |
| %61 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !701 | |
| 62: ; preds = %__nv_fabsf.exit.i.i | |
| %63 = call float @llvm.fabs.f32(float %51) #14, !dbg !701 | |
| br label %__nv_fabsf.exit1.i.i, !dbg !701 | |
| __nv_fabsf.exit1.i.i: ; preds = %62, %60 | |
| %.09.i.i = phi float [ %61, %60 ], [ %63, %62 ], !dbg !701 | |
| %64 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %"'ip_phi" = phi i32 , !dbg !701 | |
| %.not61 = icmp eq i32 %64, 0, !dbg !701 | |
| %65 = fcmp olt float %.09.i.i, 0x3810000000000000, !dbg !701 | |
| %66 = fmul float %.09.i.i, 0x4170000000000000, !dbg !701 | |
| %67 = and i1 %65, %.not61, !dbg !701 | |
| %.116.i.i = select i1 %67, float %66, float %.09.i.i, !dbg !701 | |
| %expo.i.i.1.i.i = select i1 %67, float -1.510000e+02, float -1.270000e+02, !dbg !701 | |
| %68 = bitcast float %.116.i.i to i32, !dbg !701 | |
| %69 = and i32 %68, 8388607, !dbg !701 | |
| %70 = or i32 %69, 1065353216, !dbg !701 | |
| %71 = bitcast i32 %70 to float, !dbg !701 | |
| %72 = lshr i32 %68, 23, !dbg !701 | |
| %73 = uitofp i32 %72 to float, !dbg !701 | |
| %74 = fadd float %expo.i.i.1.i.i, %73, !dbg !701 | |
| %75 = fcmp ogt float %71, 0x3FF6A09E60000000, !dbg !701 | |
| %76 = fmul float %71, 5.000000e-01, !dbg !701 | |
| %77 = fadd float %74, 1.000000e+00, !dbg !701 | |
| %expo.i.i.2.i.i = select i1 %75, float %77, float %74, !dbg !701 | |
| %m.i.i.0.i.i = select i1 %75, float %76, float %71, !dbg !701 | |
| %78 = fadd float %m.i.i.0.i.i, -1.000000e+00, !dbg !701 | |
| %79 = fadd float %m.i.i.0.i.i, 1.000000e+00, !dbg !701 | |
| %80 = call float asm "rcp.approx.ftz.f32 $0,$1;", "=f,f"(float %79) #17, !dbg !701, !srcloc !362 | |
| %81 = fmul float %78, 2.000000e+00, !dbg !701 | |
| %82 = fmul float %80, %81, !dbg !701 | |
| %83 = fmul float %82, %82, !dbg !701 | |
| %84 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not62 = icmp eq i32 %84, 0, !dbg !701 | |
| br i1 %.not62, label %87, label %85, !dbg !701 | |
| 85: ; preds = %__nv_fabsf.exit1.i.i | |
| %86 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0x3F631E1FC0000000, float %83, float noundef 0x3F8995EC60000000) #16, !dbg !701 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !701 | |
| 87: ; preds = %__nv_fabsf.exit1.i.i | |
| %88 = call float @llvm.fma.f32(float %83, float noundef 0x3F631E1FC0000000, float noundef 0x3F8995EC60000000) #14, !dbg !701 | |
| br label %__internal_fmad.exit.i.i.i.i, !dbg !701 | |
| __internal_fmad.exit.i.i.i.i: ; preds = %87, %85 | |
| %.020.i.i = phi float [ %86, %85 ], [ %88, %87 ], !dbg !701 | |
| %89 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not63 = icmp eq i32 %89, 0, !dbg !701 | |
| br i1 %.not63, label %92, label %90, !dbg !701 | |
| 90: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %91 = call float @llvm.nvvm.fma.rn.ftz.f(float %.020.i.i, float %83, float noundef 0x3FB55557A0000000) #16, !dbg !701 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !701 | |
| 92: ; preds = %__internal_fmad.exit.i.i.i.i | |
| %93 = call float @llvm.fma.f32(float %.020.i.i, float %83, float noundef 0x3FB55557A0000000) #14, !dbg !701 | |
| br label %__internal_fmad.exit3.i.i.i.i, !dbg !701 | |
| __internal_fmad.exit3.i.i.i.i: ; preds = %92, %90 | |
| %.021.i.i = phi float [ %91, %90 ], [ %93, %92 ], !dbg !701 | |
| %94 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not64 = icmp eq i32 %94, 0, !dbg !701 | |
| br i1 %.not64, label %97, label %95, !dbg !701 | |
| 95: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %96 = call float @llvm.nvvm.mul.rn.ftz.f(float %.021.i.i, float %83) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !701 | |
| 97: ; preds = %__internal_fmad.exit3.i.i.i.i | |
| %98 = fmul float %83, %.021.i.i, !dbg !701 | |
| br label %__nv_fmul_rn.exit4.i.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit4.i.i.i.i: ; preds = %97, %95 | |
| %.022.i.i = phi float [ %96, %95 ], [ %98, %97 ], !dbg !701 | |
| %99 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not65 = icmp eq i32 %99, 0, !dbg !701 | |
| br i1 %.not65, label %102, label %100, !dbg !701 | |
| 100: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %101 = call float @llvm.nvvm.mul.rn.ftz.f(float %.022.i.i, float %82) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !701 | |
| 102: ; preds = %__nv_fmul_rn.exit4.i.i.i.i | |
| %103 = fmul float %82, %.022.i.i, !dbg !701 | |
| br label %__nv_fmul_rn.exit5.i.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit5.i.i.i.i: ; preds = %102, %100 | |
| %.024.i.i = phi float [ %101, %100 ], [ %103, %102 ], !dbg !701 | |
| %104 = fsub float %78, %82, !dbg !701 | |
| %105 = fmul float %104, 2.000000e+00, !dbg !701 | |
| %106 = fneg float %82, !dbg !701 | |
| %107 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not66 = icmp eq i32 %107, 0, !dbg !701 | |
| br i1 %.not66, label %110, label %108, !dbg !701 | |
| 108: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %109 = call float @llvm.nvvm.fma.rn.ftz.f(float %106, float %78, float %105) #16, !dbg !701 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !701 | |
| 110: ; preds = %__nv_fmul_rn.exit5.i.i.i.i | |
| %111 = call float @llvm.fma.f32(float %106, float %78, float %105) #14, !dbg !701 | |
| br label %__nv_fmaf_rn.exit.i.i.i.i, !dbg !701 | |
| __nv_fmaf_rn.exit.i.i.i.i: ; preds = %110, %108 | |
| %.025.i.i = phi float [ %109, %108 ], [ %111, %110 ], !dbg !701 | |
| %112 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not67 = icmp eq i32 %112, 0, !dbg !701 | |
| br i1 %.not67, label %115, label %113, !dbg !701 | |
| 113: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %114 = call float @llvm.nvvm.mul.rn.ftz.f(float %80, float %.025.i.i) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !701 | |
| 115: ; preds = %__nv_fmaf_rn.exit.i.i.i.i | |
| %116 = fmul float %80, %.025.i.i, !dbg !701 | |
| br label %__nv_fmul_rn.exit6.i.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit6.i.i.i.i: ; preds = %115, %113 | |
| %.026.i.i = phi float [ %114, %113 ], [ %116, %115 ], !dbg !701 | |
| %117 = fadd float %82, %.024.i.i, !dbg !701 | |
| %118 = fsub float %82, %117, !dbg !701 | |
| %119 = fadd float %.024.i.i, %118, !dbg !701 | |
| %120 = fadd float %119, %.026.i.i, !dbg !701 | |
| %121 = fadd float %117, %120, !dbg !701 | |
| %122 = fsub float %117, %121, !dbg !701 | |
| %123 = fadd float %120, %122, !dbg !701 | |
| %124 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not68 = icmp eq i32 %124, 0, !dbg !701 | |
| br i1 %.not68, label %127, label %125, !dbg !701 | |
| 125: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %126 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3FE62E4000000000) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !701 | |
| 127: ; preds = %__nv_fmul_rn.exit6.i.i.i.i | |
| %128 = fmul float %expo.i.i.2.i.i, 0x3FE62E4000000000, !dbg !701 | |
| br label %__nv_fmul_rn.exit2.i.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit2.i.i.i.i: ; preds = %127, %125 | |
| %.019.i.i = phi float [ %126, %125 ], [ %128, %127 ], !dbg !701 | |
| %129 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not69 = icmp eq i32 %129, 0, !dbg !701 | |
| br i1 %.not69, label %132, label %130, !dbg !701 | |
| 130: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %131 = call float @llvm.nvvm.mul.rn.ftz.f(float %expo.i.i.2.i.i, float noundef 0x3EB7F7D1C0000000) #16, !dbg !701 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !701 | |
| 132: ; preds = %__nv_fmul_rn.exit2.i.i.i.i | |
| %133 = fmul float %expo.i.i.2.i.i, 0x3EB7F7D1C0000000, !dbg !701 | |
| br label %__internal_log_ep.exit.i.i.i, !dbg !701 | |
| __internal_log_ep.exit.i.i.i: ; preds = %132, %130 | |
| %.018.i.i = phi float [ %131, %130 ], [ %133, %132 ], !dbg !701 | |
| %134 = fadd float %121, %.019.i.i, !dbg !701 | |
| %135 = fsub float %.019.i.i, %134, !dbg !701 | |
| %136 = fadd float %121, %135, !dbg !701 | |
| %137 = fadd float %123, %136, !dbg !701 | |
| %138 = fadd float %137, %.018.i.i, !dbg !701 | |
| %139 = fadd float %134, %138, !dbg !701 | |
| %140 = fsub float %134, %139, !dbg !701 | |
| %141 = fadd float %138, %140, !dbg !701 | |
| %142 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not70 = icmp eq i32 %142, 0, !dbg !701 | |
| br i1 %.not70, label %__nv_fabsf.exit.i6.i.i, label %143, !dbg !701 | |
| 143: ; preds = %__internal_log_ep.exit.i.i.i | |
| %144 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701 | |
| br label %__nv_fabsf.exit.i6.i.i, !dbg !701 | |
| __nv_fabsf.exit.i6.i.i: ; preds = %143, %__internal_log_ep.exit.i.i.i | |
| %.027.i.i = phi float [ %144, %143 ], [ 4.000000e+00, %__internal_log_ep.exit.i.i.i ], !dbg !701 | |
| %145 = fcmp ogt float %.027.i.i, 0x46FED09BE0000000, !dbg !701 | |
| %.013.i.i = select i1 %145, float 0x3F40000000000000, float 4.000000e+00, !dbg !701 | |
| %146 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not71 = icmp eq i32 %146, 0, !dbg !701 | |
| br i1 %.not71, label %149, label %147, !dbg !701 | |
| 147: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %148 = call float @llvm.nvvm.mul.rn.ftz.f(float noundef %.013.i.i, float %139) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !701 | |
| 149: ; preds = %__nv_fabsf.exit.i6.i.i | |
| %150 = fmul float %139, %.013.i.i, !dbg !701 | |
| br label %__nv_fmul_rn.exit.i.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit.i.i.i.i: ; preds = %149, %147 | |
| %.028.i.i = phi float [ %148, %147 ], [ %150, %149 ], !dbg !701 | |
| %151 = fneg float %.028.i.i, !dbg !701 | |
| %152 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not72 = icmp eq i32 %152, 0, !dbg !701 | |
| br i1 %.not72, label %155, label %153, !dbg !701 | |
| 153: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %154 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %139, float %151) #16, !dbg !701 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !701 | |
| 155: ; preds = %__nv_fmul_rn.exit.i.i.i.i | |
| %156 = call float @llvm.fma.f32(float noundef %.013.i.i, float %139, float %151) #14, !dbg !701 | |
| br label %__nv_fmaf_rn.exit.i3.i.i.i, !dbg !701 | |
| __nv_fmaf_rn.exit.i3.i.i.i: ; preds = %155, %153 | |
| %.029.i.i = phi float [ %154, %153 ], [ %156, %155 ], !dbg !701 | |
| %157 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not73 = icmp eq i32 %157, 0, !dbg !701 | |
| br i1 %.not73, label %160, label %158, !dbg !701 | |
| 158: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %159 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef %.013.i.i, float %141, float %.029.i.i) #16, !dbg !701 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !701 | |
| 160: ; preds = %__nv_fmaf_rn.exit.i3.i.i.i | |
| %161 = call float @llvm.fma.f32(float noundef %.013.i.i, float %141, float %.029.i.i) #14, !dbg !701 | |
| br label %__nv_fmaf_rn.exit1.i.i.i.i, !dbg !701 | |
| __nv_fmaf_rn.exit1.i.i.i.i: ; preds = %160, %158 | |
| %.030.i.i = phi float [ %159, %158 ], [ %161, %160 ], !dbg !701 | |
| %162 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not74 = icmp eq i32 %162, 0, !dbg !701 | |
| br i1 %.not74, label %165, label %163, !dbg !701 | |
| 163: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %164 = call float @llvm.nvvm.fma.rn.ftz.f(float noundef 0.000000e+00, float %139, float %.030.i.i) #16, !dbg !701 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !701 | |
| 165: ; preds = %__nv_fmaf_rn.exit1.i.i.i.i | |
| %166 = call float @llvm.fma.f32(float %139, float noundef 0.000000e+00, float %.030.i.i) #14, !dbg !701 | |
| br label %__nv_fmaf_rn.exit2.i.i.i.i, !dbg !701 | |
| __nv_fmaf_rn.exit2.i.i.i.i: ; preds = %165, %163 | |
| %.031.i.i = phi float [ %164, %163 ], [ %166, %165 ], !dbg !701 | |
| %167 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not75 = icmp eq i32 %167, 0, !dbg !701 | |
| br i1 %.not75, label %170, label %168, !dbg !701 | |
| 168: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %169 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %.031.i.i) #16, !dbg !701 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !701 | |
| 170: ; preds = %__nv_fmaf_rn.exit2.i.i.i.i | |
| %171 = fadd float %.028.i.i, %.031.i.i, !dbg !701 | |
| br label %__nv_fadd_rn.exit.i.i.i.i, !dbg !701 | |
| __nv_fadd_rn.exit.i.i.i.i: ; preds = %170, %168 | |
| %.032.i.i = phi float [ %169, %168 ], [ %171, %170 ], !dbg !701 | |
| %172 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not76 = icmp eq i32 %172, 0, !dbg !701 | |
| br i1 %.not76, label %176, label %173, !dbg !701 | |
| 173: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %174 = fneg float %.032.i.i, !dbg !701 | |
| %175 = call float @llvm.nvvm.add.rn.ftz.f(float %.028.i.i, float %174) #16, !dbg !701 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !701 | |
| 176: ; preds = %__nv_fadd_rn.exit.i.i.i.i | |
| %177 = fsub float %.028.i.i, %.032.i.i, !dbg !701 | |
| br label %__nv_fadd_rn.exit3.i.i.i.i, !dbg !701 | |
| __nv_fadd_rn.exit3.i.i.i.i: ; preds = %176, %173 | |
| %.033.i.i = phi float [ %175, %173 ], [ %177, %176 ], !dbg !701 | |
| %178 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not77 = icmp eq i32 %178, 0, !dbg !701 | |
| br i1 %.not77, label %181, label %179, !dbg !701 | |
| 179: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %180 = call float @llvm.nvvm.add.rn.ftz.f(float %.033.i.i, float %.031.i.i) #16, !dbg !701 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !701 | |
| 181: ; preds = %__nv_fadd_rn.exit3.i.i.i.i | |
| %182 = fadd float %.031.i.i, %.033.i.i, !dbg !701 | |
| br label %__internal_dsmul.exit.i.i.i, !dbg !701 | |
| __internal_dsmul.exit.i.i.i: ; preds = %181, %179 | |
| %.034.i.i = phi float [ %180, %179 ], [ %182, %181 ], !dbg !701 | |
| %183 = bitcast float %.032.i.i to i32, !dbg !701 | |
| %184 = icmp eq i32 %183, 1118925336, !dbg !701 | |
| %185 = add i32 %183, -1, !dbg !701 | |
| %186 = bitcast i32 %185 to float, !dbg !701 | |
| %187 = fadd float %.034.i.i, 0x3EE0000000000000, !dbg !701 | |
| %prod.i.044.0.i.i = select i1 %184, float %187, float %.034.i.i, !dbg !701 | |
| %prod.i.145.0.i.i = select i1 %184, float %186, float %.032.i.i, !dbg !701 | |
| %188 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not78 = icmp eq i32 %188, 0, !dbg !701 | |
| br i1 %.not78, label %191, label %189, !dbg !701 | |
| 189: ; preds = %__internal_dsmul.exit.i.i.i | |
| %190 = call float @llvm.nvvm.mul.rn.ftz.f(float %prod.i.145.0.i.i, float noundef 0x3FF7154760000000) #16, !dbg !701 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !701 | |
| 191: ; preds = %__internal_dsmul.exit.i.i.i | |
| %192 = fmul float %prod.i.145.0.i.i, 0x3FF7154760000000, !dbg !701 | |
| br label %__nv_fmul_rn.exit.i10.i.i.i, !dbg !701 | |
| __nv_fmul_rn.exit.i10.i.i.i: ; preds = %191, %189 | |
| %.017.i.i = phi float [ %190, %189 ], [ %192, %191 ], !dbg !701 | |
| %193 = call float @llvm.trunc.f32(float %.017.i.i) #14, !dbg !701 | |
| %194 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not79 = icmp eq i32 %194, 0, !dbg !701 | |
| br i1 %.not79, label %197, label %195, !dbg !701 | |
| 195: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %196 = call float @llvm.nvvm.fabs.ftz.f(float %193) #16, !dbg !701 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !701 | |
| 197: ; preds = %__nv_fmul_rn.exit.i10.i.i.i | |
| %198 = call float @llvm.fabs.f32(float %193) #14, !dbg !701 | |
| br label %__nv_fabsf.exit.i.i.i.i, !dbg !701 | |
| __nv_fabsf.exit.i.i.i.i: ; preds = %197, %195 | |
| %.023.i.i = phi float [ %196, %195 ], [ %198, %197 ], !dbg !701 | |
| %199 = fcmp ogt float %.023.i.i, 1.260000e+02, !dbg !701 | |
| %200 = bitcast float %193 to i32, !dbg !701 | |
| %201 = and i32 %200, -2147483648, !dbg !701 | |
| %202 = or i32 %201, 1123811328, !dbg !701 | |
| %203 = bitcast i32 %202 to float, !dbg !701 | |
| %j.i.i.0.i.i = select i1 %199, float %203, float %193, !dbg !701 | |
| %204 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not80 = icmp eq i32 %204, 0, !dbg !701 | |
| br i1 %.not80, label %207, label %205, !dbg !701 | |
| 205: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %206 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #16, !dbg !701 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !701 | |
| 207: ; preds = %__nv_fabsf.exit.i.i.i.i | |
| %208 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0xBFE62E4300000000, float %prod.i.145.0.i.i) #14, !dbg !701 | |
| br label %__internal_fmad.exit4.i.i.i.i, !dbg !701 | |
| __internal_fmad.exit4.i.i.i.i: ; preds = %207, %205 | |
| %.035.i.i = phi float [ %206, %205 ], [ %208, %207 ], !dbg !701 | |
| %209 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not81 = icmp eq i32 %209, 0, !dbg !701 | |
| br i1 %.not81, label %212, label %210, !dbg !701 | |
| 210: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %211 = call float @llvm.nvvm.fma.rn.ftz.f(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #16, !dbg !701 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !701 | |
| 212: ; preds = %__internal_fmad.exit4.i.i.i.i | |
| %213 = call float @llvm.fma.f32(float %j.i.i.0.i.i, float noundef 0x3E205C6100000000, float %.035.i.i) #14, !dbg !701 | |
| br label %__internal_expf_kernel.exit.i.i.i, !dbg !701 | |
| __internal_expf_kernel.exit.i.i.i: ; preds = %212, %210 | |
| %.036.i.i = phi float [ %211, %210 ], [ %213, %212 ], !dbg !701 | |
| %214 = fmul float %.036.i.i, 0x3FF7154760000000, !dbg !701 | |
| %215 = fadd float %j.i.i.0.i.i, 0x4168000FE0000000, !dbg !701 | |
| %216 = bitcast float %215 to i32, !dbg !701 | |
| %217 = shl i32 %216, 23, !dbg !701 | |
| %218 = bitcast i32 %217 to float, !dbg !701 | |
| %219 = call float @llvm.nvvm.ex2.approx.ftz.f(float %214) #16, !dbg !701 | |
| %220 = fmul float %219, %218, !dbg !701 | |
| %221 = fcmp une float %220, 0x7FF0000000000000, !dbg !701 | |
| br i1 %221, label %222, label %__internal_accurate_powf.exit.i.i, !dbg !701 | |
| 222: ; preds = %__internal_expf_kernel.exit.i.i.i | |
| %223 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not94 = icmp eq i32 %223, 0, !dbg !701 | |
| br i1 %.not94, label %226, label %224, !dbg !701 | |
| 224: ; preds = %222 | |
| %225 = call float @llvm.nvvm.fma.rn.ftz.f(float %220, float %prod.i.044.0.i.i, float %220) #16, !dbg !701 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !701 | |
| 226: ; preds = %222 | |
| %227 = call float @llvm.fma.f32(float %220, float %prod.i.044.0.i.i, float %220) #14, !dbg !701 | |
| br label %__internal_accurate_powf.exit.i.i, !dbg !701 | |
| __internal_accurate_powf.exit.i.i: ; preds = %226, %224, %__internal_expf_kernel.exit.i.i.i | |
| %t.i.0.i.i = phi float [ 0x7FF0000000000000, %__internal_expf_kernel.exit.i.i.i ], [ %225, %224 ], [ %227, %226 ], !dbg !701 | |
| %228 = fcmp olt float %51, 0.000000e+00, !dbg !701 | |
| %229 = and i1 %228, %58, !dbg !701 | |
| %230 = bitcast float %t.i.0.i.i to i32, !dbg !701 | |
| %231 = xor i32 %230, -2147483648, !dbg !701 | |
| %232 = bitcast i32 %231 to float, !dbg !701 | |
| %.010.i.i = select i1 %229, float %232, float %t.i.0.i.i, !dbg !701 | |
| %233 = fcmp oeq float %51, 0.000000e+00, !dbg !701 | |
| %234 = fadd float %51, %51, !dbg !701 | |
| %235 = select i1 %58, float %234, float 0.000000e+00, !dbg !701 | |
| %.212.i.i = select i1 %233, float %235, float %.010.i.i, !dbg !701 | |
| %236 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not = icmp eq i32 %236, 0, !dbg !701 | |
| br i1 %.not, label %239, label %237, !dbg !701 | |
| 237: ; preds = %__internal_accurate_powf.exit.i.i | |
| %238 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !701 | |
| 239: ; preds = %__internal_accurate_powf.exit.i.i | |
| %240 = call float @llvm.fabs.f32(float %51) #14, !dbg !701 | |
| br label %__nv_fabsf.exit.i.i.i, !dbg !701 | |
| __nv_fabsf.exit.i.i.i: ; preds = %239, %237 | |
| %.01.i.i = phi float [ %238, %237 ], [ %240, %239 ], !dbg !701 | |
| %241 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not82 = icmp eq i32 %241, 0, !dbg !701 | |
| br i1 %.not82, label %__nv_fabsf.exit1.i.i.i, label %242, !dbg !701 | |
| 242: ; preds = %__nv_fabsf.exit.i.i.i | |
| %243 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701 | |
| br label %__nv_fabsf.exit1.i.i.i, !dbg !701 | |
| __nv_fabsf.exit1.i.i.i: ; preds = %242, %__nv_fabsf.exit.i.i.i | |
| %.02.i.i = phi float [ %243, %242 ], [ 4.000000e+00, %__nv_fabsf.exit.i.i.i ], !dbg !701 | |
| %244 = fadd float %.01.i.i, %.02.i.i, !dbg !701 | |
| %245 = bitcast float %244 to i32, !dbg !701 | |
| %246 = icmp sgt i32 %245, 2139095039, !dbg !701 | |
| br i1 %246, label %247, label %__internal_powf_infinite_cases.exit.i.i, !dbg !701 | |
| 247: ; preds = %__nv_fabsf.exit1.i.i.i | |
| %248 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not88 = icmp eq i32 %248, 0, !dbg !701 | |
| br i1 %.not88, label %251, label %249, !dbg !701 | |
| 249: ; preds = %247 | |
| %250 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !701 | |
| 251: ; preds = %247 | |
| %252 = call float @llvm.fabs.f32(float %51) #14, !dbg !701 | |
| br label %__nv_isnanf.exit.i.i.i, !dbg !701 | |
| __nv_isnanf.exit.i.i.i: ; preds = %251, %249 | |
| %.06.i.i = phi float [ %250, %249 ], [ %252, %251 ], !dbg !701 | |
| %253 = fcmp ugt float %.06.i.i, 0x7FF0000000000000, !dbg !701 | |
| br i1 %253, label %259, label %254, !dbg !701 | |
| 254: ; preds = %__nv_isnanf.exit.i.i.i | |
| %255 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not89 = icmp eq i32 %255, 0, !dbg !701 | |
| br i1 %.not89, label %__nv_isnanf.exit11.i.i.i, label %256, !dbg !701 | |
| 256: ; preds = %254 | |
| %257 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701 | |
| br label %__nv_isnanf.exit11.i.i.i, !dbg !701 | |
| __nv_isnanf.exit11.i.i.i: ; preds = %256, %254 | |
| %.07.i.i = phi float [ %257, %256 ], [ 4.000000e+00, %254 ], !dbg !701 | |
| %258 = fcmp ugt float %.07.i.i, 0x7FF0000000000000, !dbg !701 | |
| br i1 %258, label %259, label %261, !dbg !701 | |
| 259: ; preds = %__nv_isnanf.exit11.i.i.i, %__nv_isnanf.exit.i.i.i | |
| %260 = fadd float %51, 4.000000e+00, !dbg !701 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701 | |
| 261: ; preds = %__nv_isnanf.exit11.i.i.i | |
| %262 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not90 = icmp eq i32 %262, 0, !dbg !701 | |
| br i1 %.not90, label %__nv_isinff.exit8.i.i.i, label %263, !dbg !701 | |
| 263: ; preds = %261 | |
| %264 = call float @llvm.nvvm.fabs.ftz.f(float noundef 4.000000e+00) #16, !dbg !701 | |
| br label %__nv_isinff.exit8.i.i.i, !dbg !701 | |
| __nv_isinff.exit8.i.i.i: ; preds = %263, %261 | |
| %.05.i.i = phi float [ %264, %263 ], [ 4.000000e+00, %261 ], !dbg !701 | |
| %265 = fcmp oeq float %.05.i.i, 0x7FF0000000000000, !dbg !701 | |
| br i1 %265, label %266, label %276, !dbg !701 | |
| 266: ; preds = %__nv_isinff.exit8.i.i.i | |
| %267 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not93 = icmp eq i32 %267, 0, !dbg !701 | |
| br i1 %.not93, label %270, label %268, !dbg !701 | |
| 268: ; preds = %266 | |
| %269 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !701 | |
| 270: ; preds = %266 | |
| %271 = call float @llvm.fabs.f32(float %51) #14, !dbg !701 | |
| br label %__nv_fabsf.exit5.i.i.i, !dbg !701 | |
| __nv_fabsf.exit5.i.i.i: ; preds = %270, %268 | |
| %.04.i.i = phi float [ %269, %268 ], [ %271, %270 ], !dbg !701 | |
| %272 = fcmp ogt float %.04.i.i, 1.000000e+00, !dbg !701 | |
| %273 = fcmp oeq float %51, -1.000000e+00, !dbg !701 | |
| %274 = select i1 %272, float 0x7FF0000000000000, float 0.000000e+00, !dbg !701 | |
| %275 = select i1 %273, float 1.000000e+00, float %274, !dbg !701 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701 | |
| 276: ; preds = %__nv_isinff.exit8.i.i.i | |
| %277 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([11 x i8], [11 x i8]* @.str, i64 0, i64 0)) #16, !dbg !701 | |
| %.not91 = icmp eq i32 %277, 0, !dbg !701 | |
| br i1 %.not91, label %280, label %278, !dbg !701 | |
| 278: ; preds = %276 | |
| %279 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !701 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !701 | |
| 280: ; preds = %276 | |
| %281 = call float @llvm.fabs.f32(float %51) #14, !dbg !701 | |
| br label %__nv_isinff.exit.i.i.i, !dbg !701 | |
| __nv_isinff.exit.i.i.i: ; preds = %280, %278 | |
| %.03.i.i = phi float [ %279, %278 ], [ %281, %280 ], !dbg !701 | |
| %282 = fcmp oeq float %.03.i.i, 0x7FF0000000000000, !dbg !701 | |
| br i1 %282, label %283, label %__internal_powf_infinite_cases.exit.i.i, !dbg !701 | |
| 283: ; preds = %__nv_isinff.exit.i.i.i | |
| %284 = select i1 %229, float 0xFFF0000000000000, float 0x7FF0000000000000, !dbg !701 | |
| br label %__internal_powf_infinite_cases.exit.i.i, !dbg !701 | |
| __internal_powf_infinite_cases.exit.i.i: ; preds = %283, %__nv_isinff.exit.i.i.i, %__nv_fabsf.exit5.i.i.i, %259, %__nv_fabsf.exit1.i.i.i | |
| %.3.i.i = phi float [ %.212.i.i, %__nv_fabsf.exit1.i.i.i ], [ %260, %259 ], [ %275, %__nv_fabsf.exit5.i.i.i ], [ %284, %283 ], [ %.212.i.i, %__nv_isinff.exit.i.i.i ], !dbg !701 | |
| %285 = fcmp oeq float %51, 1.000000e+00, !dbg !701 | |
| %t.0.i.i = select i1 %285, float 1.000000e+00, float %.3.i.i, !dbg !701 | |
| %286 = fadd float %value_phi13.i, %t.0.i.i, !dbg !706 | |
| %.not83 = icmp eq i64 %iv.next, %13, !dbg !707 | |
| %287 = add nuw i64 %iv.next, 1, !dbg !709 | |
| br i1 %.not83, label %L467.i.loopexit, label %L319.i, !dbg !710 | |
| L467.i.loopexit: ; preds = %__internal_powf_infinite_cases.exit.i.i | |
| br label %L467.i, !dbg !711 | |
| L467.i: ; preds = %L467.i.loopexit, %L302.i | |
| %value_phi17.i = phi float [ 0.000000e+00, %L302.i ], [ %286, %L467.i.loopexit ] | |
| %288 = fpext float %value_phi17.i to double, !dbg !711 | |
| %289 = call i32 @llvm.nvvm.d2i.hi(double %288) #16, !dbg !717 | |
| %290 = call i32 @llvm.nvvm.d2i.hi(double noundef 2.500000e-01) #16, !dbg !717 | |
| %291 = and i32 %290, 2146435072, !dbg !717 | |
| %292 = icmp eq i32 %291, 1072693248, !dbg !717 | |
| %293 = call double @llvm.fabs.f64(double %288) #14, !dbg !717 | |
| %294 = call fastcc double @__internal_accurate_pow(double %293) #16, !dbg !717 | |
| %295 = icmp slt i32 %289, 0, !dbg !717 | |
| %296 = and i1 %295, %292, !dbg !717 | |
| br i1 %296, label %297, label %302, !dbg !717 | |
| 297: ; preds = %L467.i | |
| %298 = call i32 @llvm.nvvm.d2i.hi(double %294) #16, !dbg !717 | |
| %299 = call i32 @llvm.nvvm.d2i.lo(double %294) #16, !dbg !717 | |
| %300 = xor i32 %298, -2147483648, !dbg !717 | |
| %301 = call double @llvm.nvvm.lohi.i2d(i32 %299, i32 %300) #16, !dbg !717 | |
| br label %302, !dbg !717 | |
| 302: ; preds = %297, %L467.i | |
| %t.0.i37.i = phi double [ %301, %297 ], [ %294, %L467.i ], !dbg !717 | |
| %303 = fcmp oeq float %value_phi17.i, 0.000000e+00, !dbg !717 | |
| br i1 %303, label %304, label %308, !dbg !717 | |
| 304: ; preds = %302 | |
| %spec.select = select i1 %292, i32 %289, i32 0, !dbg !717 | |
| %305 = icmp slt i32 %290, 0, !dbg !717 | |
| %306 = or i32 %spec.select, 2146435072, !dbg !717 | |
| %thi.1.i.i = select i1 %305, i32 %306, i32 %spec.select, !dbg !717 | |
| %307 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.1.i.i) #16, !dbg !717 | |
| br label %309, !dbg !717 | |
| 308: ; preds = %302 | |
| %spec.select97 = select i1 %295, double 0xFFF8000000000000, double %t.0.i37.i, !dbg !717 | |
| br label %309, !dbg !717 | |
| 309: ; preds = %308, %304 | |
| %t.2.i.i = phi double [ %307, %304 ], [ %spec.select97, %308 ], !dbg !717 | |
| %310 = fadd double %288, 2.500000e-01, !dbg !717 | |
| %311 = call i32 @llvm.nvvm.d2i.hi(double %310) #16, !dbg !717 | |
| %312 = and i32 %311, 2146435072, !dbg !717 | |
| %313 = icmp eq i32 %312, 2146435072, !dbg !717 | |
| br i1 %313, label %314, label %__nv_pow.exit.i, !dbg !717 | |
| 314: ; preds = %309 | |
| %315 = fcmp ugt double %293, 0x7FF0000000000000, !dbg !717 | |
| br i1 %315, label %__nv_pow.exit.i, label %316, !dbg !717 | |
| 316: ; preds = %314 | |
| %317 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %318 = icmp eq i32 %317, 200, !dbg !717 | |
| br i1 %318, label %.critedge, label %319, !dbg !717 | |
| 319: ; preds = %316 | |
| %320 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %321 = icmp eq i32 %320, 350, !dbg !717 | |
| br i1 %321, label %.critedge, label %322, !dbg !717 | |
| 322: ; preds = %319 | |
| %323 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %324 = icmp eq i32 %323, 370, !dbg !717 | |
| br i1 %324, label %.critedge, label %__nv_isinfd.exit4.i.i, !dbg !717 | |
| __nv_isinfd.exit4.i.i: ; preds = %322 | |
| %325 = call i32 @llvm.nvvm.d2i.lo(double noundef 2.500000e-01) #16, !dbg !717 | |
| %326 = and i32 %290, 2147483647, !dbg !717 | |
| %327 = icmp eq i32 %326, 2146435072, !dbg !717 | |
| %328 = icmp eq i32 %325, 0, !dbg !717 | |
| %329 = and i1 %328, %327, !dbg !717 | |
| br i1 %329, label %330, label %.critedge, !dbg !717 | |
| 330: ; preds = %__nv_isinfd.exit4.i.i | |
| %331 = fcmp ogt double %293, 1.000000e+00, !dbg !717 | |
| %thi.2.i.i = select i1 %331, i32 2146435072, i32 0, !dbg !717 | |
| %332 = icmp slt i32 %290, 0, !dbg !717 | |
| %333 = xor i32 %thi.2.i.i, 2146435072 | |
| %spec.select8 = select i1 %332, i32 %333, i32 %thi.2.i.i, !dbg !717 | |
| %334 = fcmp oeq float %value_phi17.i, -1.000000e+00, !dbg !717 | |
| %thi.4.i.i = select i1 %334, i32 1072693248, i32 %spec.select8, !dbg !717 | |
| %335 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.4.i.i) #16, !dbg !717 | |
| br label %__nv_pow.exit.i, !dbg !717 | |
| .critedge: ; preds = %__nv_isinfd.exit4.i.i, %322, %319, %316 | |
| %336 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %337 = icmp eq i32 %336, 200, !dbg !717 | |
| br i1 %337, label %344, label %338, !dbg !717 | |
| 338: ; preds = %.critedge | |
| %339 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %340 = icmp eq i32 %339, 350, !dbg !717 | |
| br i1 %340, label %344, label %341, !dbg !717 | |
| 341: ; preds = %338 | |
| %342 = call i32 @__nvvm_reflect(i8* noundef getelementptr inbounds ([12 x i8], [12 x i8]* @.str.1, i64 0, i64 0)) #16, !dbg !717 | |
| %343 = icmp eq i32 %342, 370, !dbg !717 | |
| br i1 %343, label %344, label %346, !dbg !717 | |
| 344: ; preds = %341, %338, %.critedge | |
| %345 = fcmp oeq double %293, 0x7FF0000000000000, !dbg !717 | |
| br label %__nv_isinfd.exit.i.i, !dbg !717 | |
| 346: ; preds = %341 | |
| %347 = call i32 @llvm.nvvm.d2i.lo(double %288) #16, !dbg !717 | |
| %348 = and i32 %289, 2147483647, !dbg !717 | |
| %349 = icmp eq i32 %348, 2146435072, !dbg !717 | |
| %350 = icmp eq i32 %347, 0, !dbg !717 | |
| %351 = and i1 %350, %349, !dbg !717 | |
| br label %__nv_isinfd.exit.i.i, !dbg !717 | |
| __nv_isinfd.exit.i.i: ; preds = %346, %344 | |
| %.0.i39.i.in = phi i1 [ %345, %344 ], [ %351, %346 ] | |
| br i1 %.0.i39.i.in, label %352, label %__nv_pow.exit.i, !dbg !717 | |
| 352: ; preds = %__nv_isinfd.exit.i.i | |
| %.inv87 = icmp slt i32 %290, 0, !dbg !717 | |
| %spec.select9 = select i1 %.inv87, i32 0, i32 2146435072, !dbg !717 | |
| %353 = and i32 %290, 2147483647, !dbg !717 | |
| %354 = icmp ne i32 %353, 1071644672, !dbg !717 | |
| %or.cond11 = and i1 %354, %296, !dbg !717 | |
| %355 = or i32 %spec.select9, -2147483648, !dbg !717 | |
| %thi.6.i.i = select i1 %or.cond11, i32 %355, i32 %spec.select9, !dbg !717 | |
| %356 = call double @llvm.nvvm.lohi.i2d(i32 noundef 0, i32 %thi.6.i.i) #16, !dbg !717 | |
| br label %__nv_pow.exit.i, !dbg !717 | |
| __nv_pow.exit.i: ; preds = %352, %__nv_isinfd.exit.i.i, %330, %314, %309 | |
| %t.6.i.i = phi double [ %t.2.i.i, %309 ], [ %335, %330 ], [ %356, %352 ], [ %t.2.i.i, %__nv_isinfd.exit.i.i ], [ %310, %314 ], !dbg !717 | |
| %357 = icmp sgt i64 %.fca.2.0.extract, 0, !dbg !718 | |
| %358 = select i1 %357, i64 %.fca.2.0.extract, i64 0, !dbg !718 | |
| %359 = load i64, i64* %11, align 8, !dbg !728, !tbaa !301 | |
| %360 = load i64, i64* %12, align 8, !dbg !732, !tbaa !301 | |
| %361 = add i64 %360, -1, !dbg !732 | |
| %362 = mul i64 %361, %358, !dbg !735 | |
| %363 = add i64 %362, %359, !dbg !736 | |
| %364 = icmp sgt i64 %.fca.3.extract, 0, !dbg !737 | |
| %365 = select i1 %364, i64 %.fca.3.extract, i64 0, !dbg !737 | |
| %366 = icmp slt i64 %363, 1, !dbg !746 | |
| %367 = icmp sgt i64 %363, %365, !dbg !746 | |
| %368 = or i1 %366, %367, !dbg !748 | |
| br i1 %368, label %L493.i, label %L491.i, !dbg !748 | |
| L491.i: ; preds = %__nv_pow.exit.i | |
| %369 = fcmp oeq float %value_phi17.i, 1.000000e+00, !dbg !717 | |
| %370 = fptrunc double %t.6.i.i to float, !dbg !749 | |
| %371 = select i1 %369, float 1.000000e+00, float %370, !dbg !749 | |
| %372 = add nsw i64 %363, -1, !dbg !751 | |
| %373 = getelementptr inbounds float, float addrspace(1)* %10, i64 %372, !dbg !758 | |
| store float %371, float addrspace(1)* %373, align 4, !dbg !758, !tbaa !435 | |
| br label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !759 | |
| L493.i: ; preds = %__nv_pow.exit.i | |
| %374 = call fastcc nonnull {} addrspace(10)* @julia__throw_boundserror_5571() #15, !dbg !748 | |
| unreachable, !dbg !748 | |
| fail.i: ; preds = %entry | |
| call fastcc void @gpu_report_exception() #14, !dbg !655 | |
| call fastcc void @gpu_signal_exception() #14, !dbg !655 | |
| call void asm sideeffect "exit;", ""() #16, !dbg !655 | |
| unreachable, !dbg !655 | |
| pass.i: ; preds = %entry | |
| %375 = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() #14, !dbg !760, !range !127 | |
| %376 = udiv i64 %9, %.fca.1.0.0.0.0.extract, !dbg !655 | |
| %377 = mul i64 %376, %.fca.1.0.0.0.0.extract, !dbg !765 | |
| %378 = sub i64 %9, %377, !dbg !767 | |
| %379 = zext i32 %375 to i64, !dbg !653 | |
| %380 = lshr i64 %379, 8, !dbg !655 | |
| %381 = and i64 %379, 255, !dbg !767 | |
| %382 = add nuw nsw i64 %381, 1, !dbg !768 | |
| %383 = shl i64 %378, 8, !dbg !769 | |
| %384 = add i64 %382, %383, !dbg !773 | |
| %385 = add nuw nsw i64 %376, 1, !dbg !774 | |
| %386 = add nuw nsw i64 %385, %380, !dbg !773 | |
| %387 = icmp sgt i64 %384, 0, !dbg !777 | |
| %388 = icmp sle i64 %384, %.fca.0.0.0.0.extract, !dbg !777 | |
| %389 = and i1 %387, %388, !dbg !782 | |
| %390 = icmp sle i64 %386, %.fca.0.0.1.0.extract, !dbg !777 | |
| %391 = and i1 %390, %389, !dbg !783 | |
| br i1 %391, label %L302.i, label %julia_gpu_dist_kernel__5526_inner.exit, !dbg !785 | |
| julia_gpu_dist_kernel__5526_inner.exit: ; preds = %pass.i, %L491.i | |
| call void @llvm.lifetime.end.p0i8(i64 noundef 16, i8* noundef nonnull align 8 dereferenceable(16) %_replacementA78) #14, !dbg !786 | |
| br label %invertjulia_gpu_dist_kernel__5526_inner.exit, !dbg !629 | |
| allocsForInversion: ; No predecessors! | |
| %"iv'ac" = alloca i64, align 8 | |
| %_augmented_cache = alloca i64, align 8 | |
| store i64 0, i64* %_augmented_cache, align 8 | |
| %"value_phi13.i'de" = alloca float, align 4 | |
| store float 0.000000e+00, float* %"value_phi13.i'de", align 4 | |
| %"'de" = alloca float, align 4 | |
| store float 0.000000e+00, float* %"'de", align 4 | |
| %"'de80" = alloca float, align 4 | |
| store float 0.000000e+00, float* %"'de80", align 4 | |
| %"'de81" = alloca float, align 4 | |
| store float 0.000000e+00, float* %"'de81", align 4 | |
| %_cache = alloca i64, align 8 | |
| store i64 0, i64* %_cache, align 8 | |
| %.not59_cache = alloca i1*, align 8 | |
| %"'de87" = alloca float, align 4 | |
| store float 0.000000e+00, float* %"'de87", align 4 | |
| invertentry: ; No predecessors! | |
| ret void | |
| invertL302.i: ; preds = %invertL319.i.preheader | |
| %392 = load i64, i64* %_augmented_cache, align 8, !invariant.group !663 | |
| call fastcc void @diffejulia___index_Global_NTuple_5574([2 x i64]* %4, [2 x i64]* %"'ipa79", { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %6, { [1 x [2 x [1 x i64]]], { [1 x [2 x [1 x i64]]] } } addrspace(11)* %"'ipc", i64 %392), !dbg !658 | |
| br label %invertpass.i | |
| invertL319.i.preheader: ; preds = %invertL319.i | |
| %393 = load i64, i64* %"iv'ac", align 8 | |
| %forfree = load i1*, i1** %.not59_cache, align 1, !dereferenceable !787, !invariant.group !675 | |
| %394 = bitcast i1* %forfree to i8* | |
| tail call void @free(i8* nonnull %394), !dbg !629 | |
| br label %invertL302.i | |
| invertL319.i: ; preds = %invertL340.i | |
| %395 = load float, float* %"value_phi13.i'de", align 4 | |
| store float 0.000000e+00, float* %"value_phi13.i'de", align 4 | |
| %396 = load i64, i64* %"iv'ac", align 8 | |
| %397 = icmp eq i64 %396, 0 | |
| %398 = xor i1 %397, true | |
| %399 = select fast i1 %398, float %395, float 0.000000e+00 | |
| %400 = load float, float* %"'de", align 4 | |
| %401 = fadd fast float %400, %395 | |
| %402 = select fast i1 %397, float %400, float %401 | |
| store float %402, float* %"'de", align 4 | |
| br i1 %397, label %invertL319.i.preheader, label %incinvertL319.i | |
| incinvertL319.i: ; preds = %invertL319.i | |
| %403 = load i64, i64* %"iv'ac", align 8 | |
| %404 = add nsw i64 %403, -1 | |
| store i64 %404, i64* %"iv'ac", align 8 | |
| br label %invert__internal_powf_infinite_cases.exit.i.i | |
| invertL340.i: ; preds = %invertL405.i | |
| br label %invertL319.i | |
| invertL342.i: ; No predecessors! | |
| invertL405.i: ; preds = %invert__nv_fabsf.exit.i.i, %invert | |
| %405 = load float, float* %"'de80", align 4 | |
| %406 = fneg fast float %405 | |
| store float 0.000000e+00, float* %"'de80", align 4 | |
| %407 = load float, float* %"'de81", align 4 | |
| %408 = fadd fast float %407, %406 | |
| store float %408, float* %"'de81", align 4 | |
| %409 = load float, float* %"'de81", align 4 | |
| store float 0.000000e+00, float* %"'de81", align 4 | |
| %410 = load i64, i64* %"iv'ac", align 8 | |
| %411 = load i64, i64* %_cache, align 8, !invariant.group !674 | |
| %iv.next_unwrap = add nuw nsw i64 %410, 1 | |
| %_unwrap = add i64 %411, %iv.next_unwrap | |
| %_unwrap83 = shl i64 %_unwrap, 2 | |
| %_unwrap84 = add i64 %_unwrap83, -4 | |
| %"'ipg_unwrap" = getelementptr i8, i8 addrspace(1)* %".fca.0.extract12'ipev", i64 %_unwrap84 | |
| %"'ipc82_unwrap" = bitcast i8 addrspace(1)* %"'ipg_unwrap" to float addrspace(1)* | |
| %412 = atomicrmw fadd float addrspace(1)* %"'ipc82_unwrap", float %409 monotonic | |
| br label %invertL340.i | |
| invertL407.i: ; No predecessors! | |
| invert: ; preds = %invert__nv_fabsf.exit.i.i | |
| br label %invertL405.i | |
| invert__nv_fabsf.exit.i.i: ; No predecessors! | |
| %413 = load i64, i64* %"iv'ac", align 8 | |
| %.fca.2.0.extract34_unwrap = extractvalue { i8 addrspace(1)*, i64, [2 x i64], i64 } %1, 2, 0 | |
| %.inv_unwrap = icmp sgt i64 %.fca.2.0.extract34_unwrap, 0 | |
| %_unwrap85 = select i1 %.inv_unwrap, i64 %.fca.2.0.extract34_unwrap, i64 0 | |
| %_unwrap86 = add nsw i64 %_unwrap85, -1 | |
| %414 = add nuw i64 %_unwrap86, 1 | |
| %415 = load i1*, i1** %.not59_cache, align 8, !dereferenceable !704, !invariant.group !675 | |
| %416 = getelementptr inbounds i1, i1* %415, i64 %413 | |
| %417 = load i1, i1* %416, align 1, !invariant.group !705 | |
| br i1 %417, label %invertL405.i, label %invert | |
| invert1: ; No predecessors! | |
| %418 = load float, float* %"'de87", align 4 | |
| store float 0.000000e+00, float* %"'de87", align 4 | |
| invert2: ; No predecessors! | |
| invert__nv_fabsf.exit1.i.i: ; No predecessors! | |
| invert3: ; No predecessors! | |
| invert4: ; No predecessors! | |
| invert__internal_fmad.exit.i.i.i.i: ; No predecessors! | |
| invert5: ; No predecessors! | |
| invert6: ; No predecessors! | |
| invert__internal_fmad.exit3.i.i.i.i: ; No predecessors! | |
| invert7: ; No predecessors! | |
| invert8: ; No predecessors! | |
| invert__nv_fmul_rn.exit4.i.i.i.i: ; No predecessors! | |
| invert9: ; No predecessors! | |
| invert10: ; No predecessors! | |
| invert__nv_fmul_rn.exit5.i.i.i.i: ; No predecessors! | |
| invert11: ; No predecessors! | |
| invert12: ; No predecessors! | |
| invert__nv_fmaf_rn.exit.i.i.i.i: ; No predecessors! | |
| invert13: ; No predecessors! | |
| invert14: ; No predecessors! | |
| invert__nv_fmul_rn.exit6.i.i.i.i: ; No predecessors! | |
| invert15: ; No predecessors! | |
| invert16: ; No predecessors! | |
| invert__nv_fmul_rn.exit2.i.i.i.i: ; No predecessors! | |
| invert17: ; No predecessors! | |
| invert18: ; No predecessors! | |
| invert__internal_log_ep.exit.i.i.i: ; No predecessors! | |
| invert19: ; No predecessors! | |
| invert__nv_fabsf.exit.i6.i.i: ; No predecessors! | |
| invert20: ; No predecessors! | |
| invert21: ; No predecessors! | |
| invert__nv_fmul_rn.exit.i.i.i.i: ; No predecessors! | |
| invert22: ; No predecessors! | |
| invert23: ; No predecessors! | |
| invert__nv_fmaf_rn.exit.i3.i.i.i: ; No predecessors! | |
| invert24: ; No predecessors! | |
| invert25: ; No predecessors! | |
| invert__nv_fmaf_rn.exit1.i.i.i.i: ; No predecessors! | |
| invert26: ; No predecessors! | |
| invert27: ; No predecessors! | |
| invert__nv_fmaf_rn.exit2.i.i.i.i: ; No predecessors! | |
| invert28: ; No predecessors! | |
| invert29: ; No predecessors! | |
| invert__nv_fadd_rn.exit.i.i.i.i: ; No predecessors! | |
| invert30: ; No predecessors! | |
| invert31: ; No predecessors! | |
| invert__nv_fadd_rn.exit3.i.i.i.i: ; No predecessors! | |
| invert32: ; No predecessors! | |
| invert33: ; No predecessors! | |
| invert__internal_dsmul.exit.i.i.i: ; No predecessors! | |
| invert34: ; No predecessors! | |
| invert35: ; No predecessors! | |
| invert__nv_fmul_rn.exit.i10.i.i.i: ; No predecessors! | |
| invert36: ; No predecessors! | |
| invert37: ; No predecessors! | |
| invert__nv_fabsf.exit.i.i.i.i: ; No predecessors! | |
| invert38: ; No predecessors! | |
| invert39: ; No predecessors! | |
| invert__internal_fmad.exit4.i.i.i.i: ; No predecessors! | |
| invert40: ; No predecessors! | |
| invert41: ; No predecessors! | |
| invert__internal_expf_kernel.exit.i.i.i: ; No predecessors! | |
| invert42: ; No predecessors! | |
| invert43: ; No predecessors! | |
| invert44: ; No predecessors! | |
| invert__internal_accurate_powf.exit.i.i: ; No predecessors! | |
| invert45: ; No predecessors! | |
| invert46: ; No predecessors! | |
| invert__nv_fabsf.exit.i.i.i: ; No predecessors! | |
| invert47: ; No predecessors! | |
| invert__nv_fabsf.exit1.i.i.i: ; No predecessors! | |
| invert48: ; No predecessors! | |
| invert49: ; No predecessors! | |
| invert50: ; No predecessors! | |
| invert__nv_isnanf.exit.i.i.i: ; No predecessors! | |
| invert51: ; No predecessors! | |
| invert52: ; No predecessors! | |
| invert__nv_isnanf.exit11.i.i.i: ; No predecessors! | |
| invert53: ; No predecessors! | |
| invert54: ; No predecessors! | |
| invert55: ; No predecessors! | |
| invert__nv_isinff.exit8.i.i.i: ; No predecessors! | |
| invert56: ; No predecessors! | |
| invert57: ; No predecessors! | |
| invert58: ; No predecessors! | |
| invert__nv_fabsf.exit5.i.i.i: ; No predecessors! | |
| invert59: ; No predecessors! | |
| invert60: ; No predecessors! | |
| invert61: ; No predecessors! | |
| invert__nv_isinff.exit.i.i.i: ; No predecessors! | |
| invert62: ; No predecessors! | |
| invert__internal_powf_infinite_cases.exit.i.i: ; preds = %incinvertL319.i | |
| invertL467.i.loopexit: ; No predecessors! | |
| invertL467.i: ; No predecessors! | |
| invert63: ; No predecessors! | |
| invert64: ; No predecessors! | |
| invert65: ; No predecessors! | |
| invert66: ; No predecessors! | |
| invert67: ; No predecessors! | |
| invert68: ; No predecessors! | |
| invert69: ; No predecessors! | |
| invert70: ; No predecessors! | |
| invert71: ; No predecessors! | |
| invert__nv_isinfd.exit4.i.i: ; No predecessors! | |
| invert72: ; No predecessors! | |
| invert.critedge: ; No predecessors! | |
| invert73: ; No predecessors! | |
| invert74: ; No predecessors! | |
| invert75: ; No predecessors! | |
| invert76: ; No predecessors! | |
| invert__nv_isinfd.exit.i.i: ; No predecessors! | |
| invert77: ; No predecessors! | |
| invert__nv_pow.exit.i: ; No predecessors! | |
| invertL491.i: ; No predecessors! | |
| invertL493.i: ; No predecessors! | |
| invertfail.i: ; No predecessors! | |
| invertpass.i: ; preds = %invertL302.i | |
| invertjulia_gpu_dist_kernel__5526_inner.exit: ; preds = %julia_gpu_dist_kernel__5526_inner.exit | |
| } | |
| cannot handle (reverse) unknown intrinsic | |
| llvm.nvvm.fabs.ftz.f | |
| %59 = call float @llvm.nvvm.fabs.ftz.f(float %51) #16, !dbg !187 | |
| Stacktrace: | |
| [1] #^ | |
| @ ~/.julia/packages/CUDA/DfvRa/src/device/intrinsics/math.jl:233 | |
| [2] #^ | |
| @ ~/.julia/packages/CUDA/DfvRa/src/device/intrinsics/math.jl:243 | |
| [3] literal_pow | |
| @ ./intfuncs.jl:316 | |
| [4] macro expansion | |
| @ ~/proj/mnistjl/distances.jl:43 | |
| [5] gpu_dist_kernel! | |
| @ ~/.julia/packages/KernelAbstractions/1ZLga/src/macros.jl:80 | |
| [6] gpu_dist_kernel! (repeats 2 times) | |
| @ ./none:0 | |
| Stacktrace: | |
| [1] julia_error(cstr::Cstring, val::Ptr{LLVM.API.LLVMOpaqueValue}, errtype::Enzyme.API.ErrorType, data::Ptr{Nothing}) | |
| @ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:2636 | |
| [2] EnzymeCreatePrimalAndGradient(logic::Enzyme.Logic, todiff::LLVM.Function, retType::Enzyme.API.CDIFFE_TYPE, constant_args::Vector{Enzyme.API.CDIFFE_TYPE}, TA::Enzyme.TypeAnalysis, returnValue::Bool, dretUsed::Bool, mode::Enzyme.API.CDerivativeMode, width::Int64, additionalArg::Ptr{Nothing}, typeInfo::Enzyme.FnTypeInfo, uncacheable_args::Vector{Bool}, augmented::Ptr{Nothing}, atomicAdd::Bool) | |
| @ Enzyme.API ~/.julia/packages/Enzyme/di3zM/src/api.jl:111 | |
| [3] enzyme!(job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}, mod::LLVM.Module, primalf::LLVM.Function, adjoint::GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{Const{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}}, Const{CuDeviceMatrix{Float32, 1}}, Duplicated{CuDeviceMatrix{Float32, 1}}, Const{CuDeviceMatrix{Float32, 1}}}}, mode::Enzyme.API.CDerivativeMode, width::Int64, parallel::Bool, actualRetType::Type, dupClosure::Bool, wrap::Bool, modifiedBetween::Bool, returnPrimal::Bool) | |
| @ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:3271 | |
| [4] codegen(output::Symbol, job::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}; libraries::Bool, deferred_codegen::Bool, optimize::Bool, ctx::LLVM.Context, strip::Bool, validate::Bool, only_entry::Bool, parent_job::GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}) | |
| @ Enzyme.Compiler ~/.julia/packages/Enzyme/di3zM/src/compiler.jl:4158 | |
| [5] (::GPUCompiler.var"#114#117"{LLVM.Context, GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}})() | |
| @ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:296 | |
| [6] get!(default::GPUCompiler.var"#114#117"{LLVM.Context, GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}, GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}}, h::Dict{GPUCompiler.CompilerJob, String}, key::GPUCompiler.CompilerJob{Enzyme.Compiler.EnzymeTarget, Enzyme.Compiler.EnzymeCompilerParams, GPUCompiler.FunctionSpec{typeof(gpu_dist_kernel!), Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}, CuDeviceMatrix{Float32, 1}}}}) | |
| @ Base ./dict.jl:464 | |
| [7] macro expansion | |
| @ ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:295 [inlined] | |
| [8] emit_llvm(job::GPUCompiler.CompilerJob, method_instance::Any; libraries::Bool, deferred_codegen::Bool, optimize::Bool, cleanup::Bool, only_entry::Bool, ctx::LLVM.Context) | |
| @ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/utils.jl:64 | |
| [9] cufunction_compile(job::GPUCompiler.CompilerJob, ctx::LLVM.Context) | |
| @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:353 | |
| [10] #224 | |
| @ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:347 [inlined] | |
| [11] JuliaContext(f::CUDA.var"#224#225"{GPUCompiler.CompilerJob{GPUCompiler.PTXCompilerTarget, CUDA.CUDACompilerParams, GPUCompiler.FunctionSpec{KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}}}) | |
| @ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/driver.jl:76 | |
| [12] cufunction_compile(job::GPUCompiler.CompilerJob) | |
| @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:346 | |
| [13] cached_compilation(cache::Dict{UInt64, Any}, job::GPUCompiler.CompilerJob, compiler::typeof(CUDA.cufunction_compile), linker::typeof(CUDA.cufunction_link)) | |
| @ GPUCompiler ~/.julia/packages/GPUCompiler/jVY4I/src/cache.jl:90 | |
| [14] cufunction(f::KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}; name::Nothing, kwargs::Base.Pairs{Symbol, Union{}, Tuple{}, NamedTuple{(), Tuple{}}}) | |
| @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:299 | |
| [15] cufunction(f::KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}, tt::Type{Tuple{KernelAbstractions.CompilerMetadata{KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.DynamicCheck, Nothing, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, KernelAbstractions.NDIteration.NDRange{2, KernelAbstractions.NDIteration.DynamicSize, KernelAbstractions.NDIteration.StaticSize{(256, 1)}, CartesianIndices{2, Tuple{Base.OneTo{Int64}, Base.OneTo{Int64}}}, Nothing}}, CuDeviceMatrix{Float32, 1}, Duplicated{CuDeviceMatrix{Float32, 1}}, CuDeviceMatrix{Float32, 1}}}) | |
| @ CUDA ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:293 | |
| [16] macro expansion | |
| @ ~/.julia/packages/CUDA/DfvRa/src/compiler/execution.jl:102 [inlined] | |
| [17] (::KernelAbstractions.Kernel{CUDADevice, KernelAbstractions.NDIteration.StaticSize{(256,)}, KernelAbstractions.NDIteration.DynamicSize, KernelGradients.var"#df#1"{typeof(gpu_dist_kernel!), typeof(gpu_dist_kernel!)}})(::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ::Vararg{Any}; ndrange::Tuple{Int64, Int64}, dependencies::CUDAKernels.CudaEvent, workgroupsize::Nothing, progress::Function) | |
| @ CUDAKernels ~/.julia/packages/CUDAKernels/YWLqR/src/CUDAKernels.jl:273 | |
| [18] Δdist!(x::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, y::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, result::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}, ∂y::CuArray{Float32, 2, CUDA.Mem.DeviceBuffer}) | |
| @ Main ~/proj/mnistjl/distances.jl:62 | |
| [19] top-level scope | |
| @ ./timing.jl:220 | |
| in expression starting at /home/tmb/proj/mnistjl/distances.jl:77 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| [deps] | |
| CUDA = "052768ef-5323-5732-b1bb-66c8b64840ba" | |
| CUDAKernels = "72cfdca4-0801-4ab0-bf6a-d52aa10adc57" | |
| Enzyme = "7da242da-08ed-463a-9acd-ee780be4f1d9" | |
| KernelAbstractions = "63c18a36-062a-441e-b654-da1e3ab1ce7c" | |
| KernelGradients = "e5faadeb-7f6c-408e-9747-a7a26e81c66a" | |
| Test = "8dfed614-e22c-5e08-85e1-65c5234f0b40" | |
| Tullio = "bc48ee85-29a4-5162-ae0b-a64e1601d4bc" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment