Created
July 18, 2018 11:58
-
-
Save haampie/932daac77ed4abb4b9cecb551271d333 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
using LinearAlgebra | |
using LinearAlgebra: givensAlgorithm | |
using IRAM | |
using BenchmarkTools | |
struct Rotation2D{Tc,Ts} | |
c::Tc | |
s::Ts | |
end | |
function get_rotation(p₁, p₂) | |
c, s, nrm = givensAlgorithm(p₁, p₂) | |
Rotation2D(c, s), nrm | |
end | |
function single_shift!(H::AbstractMatrix{Tv}, μ, rotations::Vector{<:Rotation2D}, nc::Int = 50) where {Tv<:Number} | |
m, n = size(H) | |
@inbounds p₁ = H[1, 1] - μ | |
@inbounds p₂ = H[2, 1] | |
G₁, _ = get_rotation(p₁, p₂) | |
left!(G₁, H, 1, 1, 1) | |
@inbounds rotations[1] = G₁ | |
panel_start = 2 | |
panel_end = 2 + nc - 1 | |
@inbounds while panel_start ≤ n | |
# Apply previous rotations from the left. | |
for i = 1 : panel_start - 1 | |
left!(rotations[i], H, i, panel_start, min(panel_end, n)) | |
end | |
# Introduce a new bulge and chase it through this pane. | |
for i = panel_start : min(panel_end, n - 1) | |
# New bulge | |
right!(H, rotations[i-1], i - 1, 1, min(i + 1, m)) | |
# Chase bulge | |
p₁ = H[i+0, i-1] | |
p₂ = H[i+1, i-1] | |
G, nrm = get_rotation(p₁, p₂) | |
H[i+0, i-1] = nrm | |
H[i+1, i-1] = zero(Tv) | |
left!(G, H, i, i, min(panel_end, n)) | |
# Store the rotation. | |
rotations[i] = G | |
end | |
panel_start += nc | |
panel_end += nc | |
end | |
right!(H, rotations[n - 1], n - 1, 1, m) | |
H | |
end | |
@inline function left!(G::Rotation2D, A::AbstractMatrix, row::Int, from::Int, to::Int) | |
@inbounds for j = from:to | |
a₁ = A[row+0,j] | |
a₂ = A[row+1,j] | |
a₁′ = G.c * a₁ + G.s * a₂ | |
a₂′ = -G.s' * a₁ + G.c * a₂ | |
A[row+0,j] = a₁′ | |
A[row+1,j] = a₂′ | |
end | |
A | |
end | |
@inline function right!(A::AbstractMatrix, G::Rotation2D, col::Int, from::Int, to::Int) | |
@inbounds for j = from:to | |
a₁ = A[j,col+0] | |
a₂ = A[j,col+1] | |
a₁′ = a₁ * G.c + a₂ * G.s' | |
a₂′ = a₁ * -G.s + a₂ * G.c | |
A[j,col+0] = a₁′ | |
A[j,col+1] = a₂′ | |
end | |
A | |
end | |
function compare(n = 257, nc = 8) | |
H = randn(n, n) | |
μ = rand(Float64) | |
rotations = Vector{Rotation2D{Float64,Float64}}(undef, n - 1) | |
H1 = copy(H) | |
H2 = copy(H) | |
@time IRAM._single_shift_schur!(H1, 1, n, μ) | |
@time single_shift!(H2, μ, rotations, nc) | |
@show norm(H1 - H2) # should be 0.0 exactly. | |
end | |
function go(ns = round.(Int, 10. * (√√2) .^ (0 : 36))) | |
iram_timings = Float64[] | |
new_timings = Float64[] | |
for n = ns | |
println("Running ", n) | |
H = randn(n, n) | |
μ = rand(Float64) | |
rotations = Vector{Rotation2D{Float64,Float64}}(undef, n - 1) | |
fst = @benchmark IRAM._single_shift_schur!(H1, 1, $n, $μ) setup = (H1 = copy($H)) | |
snd = @benchmark single_shift!(H1, $μ, $rotations, 14) setup = (H1 = copy($H)) | |
push!(iram_timings, time(fst)) | |
push!(new_timings, time(snd)) | |
end | |
# FLOPS | |
flops_iram = 6 .* ns .^ 2 ./ iram_timings | |
flops_new = 6 .* ns .^ 2 ./ new_timings | |
return ns, flops_iram, flops_new | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment