Skip to content

Instantly share code, notes, and snippets.

@ehermes
Created August 10, 2015 18:33
Show Gist options
  • Select an option

  • Save ehermes/f84361958f0b285dfe73 to your computer and use it in GitHub Desktop.

Select an option

Save ehermes/f84361958f0b285dfe73 to your computer and use it in GitHub Desktop.
module d3_fort
use d3params, only: sp, dp, qp, Bohr, Hartree, k1, k2, k3, alp, max_elem, &
max_cn, numcn, cntab, r2r4a, rcova, r0
implicit none
private
public d3_calc
contains
subroutine d3_calc(natoms, atomnumber, cell, xyz, rcutin, rcutcn, s6, s18, &
rs6, rs18, alp6, alp8, pbc, bj, threebody, energy, forces, stress)
use d3params, only: initialize_c6, cross_prod, outer_prod
implicit none
!f2py threadsafe
integer, intent(in) :: natoms, atomnumber(natoms)
real(dp), intent(in) :: cell(3, 3), xyz(3, natoms), rcutin, rcutcn
real(dp), intent(in) :: s6, s18, rs6, rs18, alp6, alp8
logical, intent(in) :: pbc(3), bj, threebody
integer :: a, b, c, i, j, k, ii, jj, kk
integer :: na, nb, nc, ncna, ncnb
integer :: elemab, elemac, elembc
integer :: images_2b(3), images_3b(3)
integer :: nthreads, myid, start, start_3b
integer, external :: OMP_GET_NUM_THREADS, OMP_GET_THREAD_NUM
real(dp) :: rcut
real(dp) :: tvec(3), tvec_3b(3), rcut2, rcutcn2, rcovab
real(dp) :: cn(natoms)
real(dp) :: cnexp, cnab, dcnab(3)
real(dp) :: lij, lijsum, dlij(3, natoms), slij(3, 3)
real(dp) :: xyza(3), xyzb(3), xyzc(3), xyzab(3), xyzac(3), xyzbc(3)
real(dp) :: uxyzab(3), uxyzac(3), uxyzbc(3)
real(dp) :: self, lattlen(3), V
real(dp) :: c8, c9, e6, e8, e9, f6ab(3), f8ab(3)
real(dp) :: a1, a2, dmp
real(dp) :: rab, rab2, rab3, rab6, rab8
real(dp) :: rac, rac2, rac3
real(dp) :: rbc, rbc2, rbc3
real(dp) :: dmp6, dmp8, rav6, rav8
real(dp) :: dmp9, ddmp9, dadmp9(3), dbdmp9(3), dcdmp9(3)
real(dp) :: fa(3), fb(3), fc(3)
real(dp) :: r9, dar9(3), dbr9(3), dcr9(3)
real(dp) :: fa9(3), fb9(3), fc9(3)
real(dp) :: angles, daangles(3), dbangles(3), dcangles(3)
real(dp) :: rav9, darav9(3), dbrav9(3), dcrav9(3)
real(dp) :: c6ab, c6ac, c6bc
real(dp) :: alph, beta, gamm
real(dp) :: daalph(3), dabeta(3), dagamm(3)
real(dp) :: dbalph(3), dbbeta(3), dbgamm(3)
real(dp) :: dcalph(3), dcbeta(3), dcgamm(3)
real(dp) :: sc6ab(3, 3), sc6ac(3, 3), sc6bc(3, 3), sc8(3, 3)
real(dp) :: sc9(3, 3)
real(dp) :: dc6ab(3, natoms), dc6ac(3, natoms), dc6bc(3, natoms)
real(dp) :: dc8(3, natoms), dc9(3, natoms)
real(dp) :: sc_tmp(3, 3), c6abij
real(dp) :: c6(natoms, natoms), dc6(3, natoms, natoms, natoms)
real(dp) :: sc6(3, 3, natoms, natoms)
real(dp) :: r0ab, r0ac, r0bc
logical :: central
real(dp), allocatable :: dcn(:, :, :), scn(:, :, :)
real(dp), allocatable :: c6_ref(:, :, :)
real(qp), allocatable :: cn_t(:, :), dcn_t(:, :, :, :)
real(qp), allocatable :: scn_t(:, :, :, :), e_t(:), f_t(:, :, :)
real(qp), allocatable :: s_t(:, :, :)
real(dp), intent(out) :: energy, forces(3, natoms), stress(3, 3)
if (rcutcn > rcutin) then
rcut = rcutcn
else
rcut = rcutin
endif
if (bj) then
a1 = rs6
a2 = rs18 * Bohr
endif
rcut2 = rcut**2
rcutcn2 = rcutcn**2
do i = 1, 3
lattlen(i) = sqrt(dot_product(cell(:, i), cell(:, i)))
enddo
V = dot_product(cell(:, 1), cross_prod(cell(:, 2), cell(:, 3)))
! Number of atoms within 2body cutoff
images_2b = ceiling(rcut * product(lattlen) / (V * lattlen))
do i = 1, 3
if (.not. pbc(i)) images_2b(i) = 0
enddo
! Number of atoms within 3body cutoff
images_3b = ceiling(rcutcn * product(lattlen) / (V * lattlen))
do i = 1, 3
if (.not. pbc(i)) images_3b(i) = 0
enddo
! Calculate number of 2body and 3body interactions, and calculate
! cn & dcn
allocate(dcn(3, natoms, natoms))
allocate(scn(3, 3, natoms))
cn = 0
dcn = 0
scn = 0
!$OMP PARALLEL default(private) shared(cell, images_3b) &
!$OMP shared(atomnumber, rcutcn2, natoms, xyz) &
!$OMP shared(cn_t, dcn_t, scn_t)
nthreads = OMP_GET_NUM_THREADS()
myid = OMP_GET_THREAD_NUM() + 1
if (myid == 1) then
allocate(cn_t(natoms, nthreads))
allocate(dcn_t(3, natoms, natoms, nthreads))
allocate(scn_t(3, 3, natoms, nthreads))
cn_t = 0
dcn_t = 0
scn_t = 0
endif
!$OMP BARRIER
!$OMP DO schedule(dynamic, natoms) collapse(4)
do a = 1, natoms
do i = -images_3b(1), images_3b(1)
do j = -images_3b(2), images_3b(2)
do k = -images_3b(3), images_3b(3)
xyza = xyz(:, a)
na = atomnumber(a)
if ((i == 0) .and. (j == 0) .and. (k == 0)) then
central = .true.
tvec = 0
start = a + 1
else
central = .false.
tvec = matmul((/i, j, k/), cell)
start = 1
endif
do b = start, natoms
xyzb = xyz(:, b) + tvec
nb = atomnumber(b)
xyzab = xyzb - xyza
rab2 = dot_product(xyzab, xyzab)
if (rab2 < rcutcn2) then
rab = sqrt(rab2)
uxyzab = xyzab / rab
rcovab = rcova(na) + rcova(nb)
cnexp = exp(-k1 * (rcovab / rab - 1.0_dp))
cnab = 1.0_dp / (1.0_dp + cnexp)
dcnab = cnexp * k1 * rcovab * cnab**2 * uxyzab / rab2
cn_t(a, myid) = cn_t(a, myid) + cnab
if (central) cn_t(b, myid) = cn_t(b, myid) + cnab
if (b > a) then
dcn_t(:, b, a, myid) = dcn_t(:, b, a, myid) - dcnab
dcn_t(:, a, a, myid) = dcn_t(:, a, a, myid) + dcnab
dcn_t(:, a, b, myid) = dcn_t(:, a, b, myid) + dcnab
dcn_t(:, b, b, myid) = dcn_t(:, b, b, myid) - dcnab
endif
sc_tmp = spread(xyzab, 1, 3) * spread(dcnab, 2, 3)
scn_t(:, :, a, myid) = scn_t(:, :, a, myid) + sc_tmp
if (central) scn_t(:, :, b, myid) = scn_t(:, :, b, myid) + sc_tmp
endif
enddo
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
cn = sum(cn_t, 2)
dcn = sum(dcn_t, 4)
scn = sum(scn_t, 4)
deallocate(cn_t)
deallocate(dcn_t)
deallocate(scn_t)
allocate(c6_ref(max_cn, max_cn, (max_elem + 1) * max_elem / 2))
call initialize_c6(c6_ref)
!$OMP PARALLEL default(private) shared(natoms, atomnumber, numcn, cn, cntab) &
!$OMP shared(dcn, scn, c6_ref, c6, dc6, sc6)
!$OMP DO schedule(dynamic, natoms) collapse(2)
do a = 1, natoms
do b = 1, natoms
na = atomnumber(a)
nb = atomnumber(b)
ncna = numcn(na)
ncnb = numcn(nb)
if (na < nb) then
elemab = max_elem * (na - 1) - (na * (na - 1))/2 + nb
else
elemab = max_elem * (nb - 1) - (nb * (nb - 1))/2 + na
endif
if ((ncna == 1) .and. (ncnb == 1)) then
c6(b, a) = c6_ref(1, 1, elemab)
dc6(:, :, b, a) = 0
sc6(:, :, b, a) = 0
else
lijsum = 0
dlij = 0
slij = 0
c6ab = 0
dc6ab = 0
sc6ab = 0
do i = 1, ncna
do j = 1, ncnb
if (na < nb) then
c6abij = c6_ref(i, j, elemab)
else
c6abij = c6_ref(j, i, elemab)
endif
lij = exp(-k3 * ((cn(a) - cntab(i, na))**2 + (cn(b) - cntab(j, nb))**2))
dlij = dlij - 2.0_dp * k3 * lij &
* ((cn(a) - cntab(i, na)) * dcn(:, :, a) &
+ (cn(b) - cntab(j, nb)) * dcn(:, :, b))
slij = slij - 2.0_dp * k3 * lij &
* ((cn(a) - cntab(i, na)) * scn(:, :, a) &
+ (cn(b) - cntab(j, nb)) * scn(:, :, b))
lijsum = lijsum + lij
c6ab = c6ab + c6abij * lij
dc6ab = dc6ab - c6abij &
* 2.0_dp * k3 * lij &
* ((cn(a) - cntab(i, na)) * dcn(:, :, a) &
+ (cn(b) - cntab(j, nb)) * dcn(:, :, b))
sc6ab = sc6ab - c6abij &
* 2.0_dp * k3 * lij &
* ((cn(a) - cntab(i, na)) * scn(:, :, a) &
+ (cn(b) - cntab(j, nb)) * scn(:, :, b))
enddo
enddo
c6ab = c6ab / lijsum
c6(b, a) = c6ab
dc6(:, :, b, a) = (dc6ab - c6ab * dlij) / lijsum
sc6(:, :, b, a) = (sc6ab - c6ab * slij) / lijsum
endif
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
deallocate(c6_ref)
deallocate(dcn)
deallocate(scn)
energy = 0
forces = 0
stress = 0
!$OMP PARALLEL default(private) shared(natoms, xyz, atomnumber, cell) &
!$OMP shared(rcut2, rcutcn2, cn, cntab, c6, dc6, sc6) &
!$OMP shared(r2r4a, a1, a2, rs6, alp6, rs18, alp8, energy, forces, stress) &
!$OMP shared(bj, threebody, s6, s18, images_2b, images_3b) &
!$OMP shared(e_t, f_t, s_t)
nthreads = OMP_GET_NUM_THREADS()
myid = OMP_GET_THREAD_NUM() + 1
if (myid == 1) then
allocate(e_t(nthreads))
allocate(f_t(3, natoms, nthreads))
allocate(s_t(3, 3, nthreads))
endif
!$OMP BARRIER
e_t(myid) = 0
f_t(:, :, myid) = 0
s_t(:, :, myid) = 0
!$OMP DO schedule(dynamic,natoms) collapse(4)
do a = 1, natoms
do i = -images_2b(1), images_2b(1)
do j = -images_2b(2), images_2b(2)
do k = -images_2b(3), images_2b(3)
xyza = xyz(:, a)
na = atomnumber(a)
if ((i == 0) .and. (j == 0) .and. (k == 0)) then
tvec = 0
start = a + 1
else
tvec = matmul((/i, j, k/), cell)
start = a
endif
do b = start, natoms
nb = atomnumber(b)
xyzb = xyz(:, b) + tvec
xyzab = xyzb - xyza
rab2 = dot_product(xyzab, xyzab)
if (rab2 > rcut2) cycle
rab = sqrt(rab2)
rab2 = rab**2
rab6 = rab2**3
rab8 = rab6 * rab2
uxyzab = xyzab / rab
if (a == b) then
self = 0.5_dp
else
self = 1.0_dp
endif
if (na < nb) then
elemab = max_elem * (na - 1) - (na * (na - 1))/2 + nb
else
elemab = max_elem * (nb - 1) - (nb * (nb - 1))/2 + na
endif
c6ab = c6(b, a)
dc6ab = dc6(:, :, b, a)
sc6ab = sc6(:, :, b, a)
r0ab = r0(elemab)
e6 = 0
e8 = 0
c8 = 3.0_dp * c6ab * r2r4a(na) * r2r4a(nb)
dc8 = 3.0_dp * dc6ab * r2r4a(na) * r2r4a(nb)
sc8 = 3.0_dp * sc6ab * r2r4a(na) * r2r4a(nb)
if (bj) then
dmp = a1 * sqrt(3.0_dp * r2r4a(na) * r2r4a(nb)) + a2
e6 = -1.0_dp / (rab6 + dmp**6)
f6ab = -6.0_dp * e6**2 * rab2**2 * xyzab
e8 = -1.0_dp / (rab8 + dmp**8)
f8ab = -8.0_dp * e8**2 * rab6 * xyzab
else
rav6 = (rs6 * r0ab / rab)**alp6
dmp6 = 1.0_dp / (1.0_dp + 6.0_dp * rav6)
e6 = -dmp6 / rab6
f6ab = 6.0_dp * (xyzab * e6 / rab2 &
+ dmp6**2 * xyzab * alp6 * rav6 / rab8)
rav8 = (rs18 * r0ab / rab)**alp8
dmp8 = 1.0_dp / (1.0_dp + 6.0_dp * rav8)
e8 = -dmp8 / rab8
f8ab = 8.0_dp * (xyzab * e8 / rab2 &
+ dmp8**2 * xyzab * alp8 * rav8 / (rab2 * rab8))
endif
e6 = e6 * s6 * self
e8 = e8 * s18 * self
f6ab = c6ab * f6ab * s6 * self
f8ab = c8 * f8ab * s18 * self
e_t(myid) = e_t(myid) + c6ab * e6 + c8 * e8
if (a /= b) then
f_t(:, a, myid) = f_t(:, a, myid) - f6ab - f8ab
f_t(:, b, myid) = f_t(:, b, myid) + f6ab + f8ab
endif
f_t(:, :, myid) = f_t(:, :, myid) - dc6ab * e6 - dc8 * e8
s_t(:, :, myid) = s_t(:, :, myid) + sc6ab * e6 + sc8 * e8 &
+ outer_prod(3, xyzab, f6ab + f8ab)
if (threebody .and. (rab2 < rcutcn2)) then
do ii = -images_3b(1), images_3b(1)
do jj = -images_3b(2), images_3b(2)
do kk = -images_3b(3), images_3b(3)
if ((ii == 0) .and. (jj == 0) .and. (kk == 0)) then
central = .true.
tvec_3b = 0
else
central = .false.
tvec_3b = matmul((/ii, jj, kk/), cell)
endif
if ((i == ii) .and. (j == jj) .and. (k == kk)) then
start_3b = b + 1
else
start_3b = b
endif
do c = start_3b, natoms
if ((central) .and. (c == a)) cycle
xyzc = xyz(:, c) + tvec_3b
xyzac = xyzc - xyza
rac2 = dot_product(xyzac, xyzac)
if (rac2 > rcutcn2) cycle
xyzbc = xyzc - xyzb
rbc2 = dot_product(xyzbc, xyzbc)
if (rbc2 > rcutcn2) cycle
rab3 = rab2 * rab
rac = sqrt(rac2)
rac3 = rac2 * rac
rbc = sqrt(rbc2)
rbc3 = rbc2 * rbc
nc = atomnumber(c)
if (na < nc) then
elemac = max_elem * (na - 1) - na * (na - 1)/2 + nc
else
elemac = max_elem * (nc - 1) - nc * (nc - 1)/2 + na
endif
if (nb < nc) then
elembc = max_elem * (nb - 1) - nb * (nb - 1)/2 + nc
else
elembc = max_elem * (nc - 1) - nc * (nc - 1)/2 + nb
endif
r0ac = r0(elemac)
r0bc = r0(elembc)
c6ac = c6(c, a)
dc6ac = dc6(:, :, c, a)
sc6ac = sc6(:, :, c, a)
c6bc = c6(c, b)
dc6bc = dc6(:, :, c, b)
sc6bc = sc6(:, :, c, b)
if ((a == b) .or. (a == c) .or. (b == c)) then
if ((a == b) .and. (a == c)) then
self = 1.0_dp / 6.0_dp
else
self = 1.0_dp / 2.0_dp
endif
else
self = 1.0_dp
endif
alph = dot_product(xyzab, xyzac) / (rab * rac)
beta = -dot_product(xyzab, xyzbc) / (rab * rbc)
gamm = dot_product(xyzac, xyzbc) / (rac * rbc)
! Gradient of alph, beta, gamm. Very complicated...
! Figured this all out using Mathematica and defining
! alph = dot_product(xyzab,xyzac)/(rab * rac), etc.
daalph = alph * (xyzac / rac2 + xyzab / rab2) &
- (xyzac + xyzab) / (rab * rac)
dabeta = xyzbc / (rab * rbc) + xyzab * beta / rab2
dagamm = -xyzbc / (rac * rbc) + xyzac * gamm / rac2
dbalph = xyzac / (rab * rac) - xyzab * alph / rab2
dbbeta = beta * (xyzbc / rbc2 - xyzab / rab2) &
+ (xyzab - xyzbc) / (rab * rbc)
dbgamm = -xyzac / (rac * rbc) + xyzbc * gamm / rbc2
dcalph = xyzab / (rab * rac) - xyzac * alph / rac2
dcbeta = -xyzab / (rab * rbc) - xyzbc * beta / rbc2
dcgamm = -gamm * (xyzac / rac2 + xyzbc / rbc2) &
+ (xyzac + xyzbc) / (rac * rbc)
! I have no idea what 'rav' stands for, but that's what Grimme
! called this variable. Cube root of the product of the
! ratios of r0ab/rab, times 4/3 for some reason. I don't know.
rav9 = (4.0_dp/3.0_dp) * (r0ab * r0ac * r0bc &
/ (rab * rbc * rac))**(1.0_dp/3.0_dp)
darav9 = (rav9/3.0_dp) * (xyzab / rab2 + xyzac / rac2)
dbrav9 = (rav9/3.0_dp) * (-xyzab / rab2 + xyzbc / rbc2)
dcrav9 = -(rav9/3.0_dp) * (xyzac / rac2 + xyzbc / rbc2)
! Three-body term *always* uses "zero" damping, even if
! we are using the BJ version of DFT-D3
dmp9 = 1.0_dp/(1.0_dp + 6.0_dp * rav9**alp8)
ddmp9 = -6.0_dp * alp8 * rav9**(alp8-1) * dmp9**2
! Three-body depends on "average" r^9
r9 = 1.0_dp / (rab3 * rac3 * rbc3)
dar9 = 3.0_dp * r9 * (xyzab / rab2 + xyzac / rac2)
dbr9 = 3.0_dp * r9 * (-xyzab / rab2 + xyzbc / rbc2)
dcr9 = -3.0_dp * r9 * (xyzac / rac2 + xyzbc / rbc2)
! Angle term of the three body energy, and its gradient
angles = 3.0_dp * alph * beta * gamm + 1.0_dp
daangles = 3.0_dp * (daalph * beta * gamm &
+ alph * dabeta * gamm &
+ alph * beta * dagamm)
dbangles = 3.0_dp * (dbalph * beta * gamm &
+ alph * dbbeta * gamm &
+ alph * beta * dbgamm)
dcangles = 3.0_dp * (dcalph * beta * gamm &
+ alph * dcbeta * gamm &
+ alph * beta * dcgamm)
! Damping derivatives
dadmp9 = ddmp9 * darav9
dbdmp9 = ddmp9 * dbrav9
dcdmp9 = ddmp9 * dcrav9
! Three-body energy
e9 = -angles * dmp9 * r9 * self
c9 = -sqrt(c6ab * c6ac * c6bc / Hartree)
! Forces
fa9 = c9 * (-daangles * dmp9 * r9 &
- angles * dadmp9 * r9 &
- angles * dmp9 * dar9) * self
fb9 = c9 * (-dbangles * dmp9 * r9 &
- angles * dbdmp9 * r9 &
- angles * dmp9 * dbr9) * self
fc9 = c9 * (-dcangles * dmp9 * r9 &
- angles * dcdmp9 * r9 &
- angles * dmp9 * dcr9) * self
dc9 = (dc6ab * c6ac * c6bc &
+ c6ab * dc6ac * c6bc &
+ c6ab * c6ac * dc6bc) &
/ (2 * Hartree * c9)
sc9 = (sc6ab * c6ac * c6bc &
+ c6ab * sc6ac * c6bc &
+ c6ab * c6ac * sc6bc) &
/ (2 * Hartree * c9)
e_t(myid) = e_t(myid) + c9 * e9
f_t(:, a, myid) = f_t(:, a, myid) - fa9
f_t(:, b, myid) = f_t(:, b, myid) - fb9
f_t(:, c, myid) = f_t(:, c, myid) - fc9
f_t(:, :, myid) = f_t(:, :, myid) - dc9 * e9
s_t(:, :, myid) = s_t(:, :, myid) + sc9 * e9 &
- spread(xyza, 1, 3) * spread(fa9, 2, 3) &
- spread(xyzb, 1, 3) * spread(fb9, 2, 3) &
- spread(xyzc, 1, 3) * spread(fc9, 2, 3)
enddo
enddo
enddo
enddo
endif
enddo
enddo
enddo
enddo
enddo
!$OMP END DO
!$OMP END PARALLEL
energy = sum(e_t)
forces = sum(f_t, 3)
stress = sum(s_t, 3)
deallocate(e_t)
deallocate(f_t)
deallocate(s_t)
stress = -(stress + transpose(stress)) / (2.0_dp * V)
end subroutine d3_calc
end module d3_fort
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment