Merge pull request #38 from USCCACS/change_neighborlist_dot

Change neighborlist dot
USCCACS · Mar 28, 2019 · f1921e3 · f1921e3
2 parents fbed796 + 5b9b57b
commit f1921e3
Show file tree

Hide file tree

Showing 5 changed files with 59 additions and 47 deletions.
diff --git a/src/init.F90 b/src/init.F90
@@ -160,7 +160,7 @@ SUBROUTINE INITSYSTEM(atype, pos, v, f, q)
 !--- Linked List & Near Neighb Parameters
 call allocatori2d(nbrlist,1,NBUFFER,0,MAXNEIGHBS)
 call allocatori2d(nbrindx,1,NBUFFER,1,MAXNEIGHBS)
-call allocatori2d(nbplist,1,NBUFFER,0,MAXNEIGHBS10)
+call allocatori2d(nbplist,0,MAXNEIGHBS10,1,NBUFFER)
 call allocatori1d(llist,1,NBUFFER)
 call allocatori3d(header,-MAXLAYERS,cc(1)-1+MAXLAYERS,-MAXLAYERS,cc(2)-1+MAXLAYERS,-MAXLAYERS,cc(3)-1+MAXLAYERS)
 call allocatori3d(nacell,-MAXLAYERS,cc(1)-1+MAXLAYERS,-MAXLAYERS,cc(2)-1+MAXLAYERS,-MAXLAYERS,cc(3)-1+MAXLAYERS)

diff --git a/src/main.F90 b/src/main.F90
@@ -433,7 +433,7 @@ subroutine GetNonbondingPairList(pos)
 call system_clock(ti,tk)
 
 ! reset non-bonding pair list
-nbplist(:,0)=0
+nbplist(0,:)=0
 
 !$omp parallel do default(shared),private(c1,c2,c3,c4,c5,c6,i,j,m,n,mn,iid,jid,dr,dr2)
 do c1=0, nbcc(1)-1
@@ -457,8 +457,8 @@ subroutine GetNonbondingPairList(pos)
                dr2 = sum(dr(1:3)*dr(1:3))
 
                if(dr2<=rctap2) then
-                 nbplist(i,0)=nbplist(i,0)+1
-                 nbplist(i,nbplist(i,0))=j
+                 nbplist(0,i)=nbplist(0,i)+1
+                 nbplist(nbplist(0,i),i)=j
                endif
 
             endif

diff --git a/src/pot.F90 b/src/pot.F90
@@ -594,9 +594,9 @@ subroutine Ehb()
 
       if( (jty==2) .and. (BO(0,i,j1)>MINBO0) ) then
 
-          do kk=1, nbplist(i,0)
+          do kk=1, nbplist(0,i)
 
-            k = nbplist(i,kk)
+            k = nbplist(kk,i)
 
             kty = itype(k)
 
@@ -658,7 +658,7 @@ subroutine Ehb()
                endif ! if(rik2<rchb2)
             endif
 
-        enddo !do kk=1, nbplist(i,0)
+        enddo !do kk=1, nbplist(0,i)
 
       endif ! if(BO(0,j,i1)>MINBO0)
    enddo 
@@ -707,8 +707,8 @@ subroutine ENbond()
 
    PE(13) = PE(13) + CEchrge*(chi(ity)*q(i) + 0.5d0*eta(ity)*q(i)**2)
 
-    do j1 = 1, nbplist(i,0) 
-         j = nbplist(i,j1)
+    do j1 = 1, nbplist(0,i) 
+         j = nbplist(j1,i)
 
          jid = gtype(j)
 
@@ -769,7 +769,7 @@ subroutine ENbond()
             endif
          endif
 
-    enddo  !do j1 = 1, nbplist(i,0) 
+    enddo  !do j1 = 1, nbplist(0,i) 
 enddo
 !$omp end do
 
@@ -826,8 +826,8 @@ subroutine ENbond_PQEq()
 
    qic = q(i) + Zpqeq(ity)
 
-   do j1 = 1, nbplist(i,0) 
-         j = nbplist(i,j1)
+   do j1 = 1, nbplist(0,i) 
+         j = nbplist(j1,i)
 
          jid = gtype(j)
 
@@ -911,7 +911,7 @@ subroutine ENbond_PQEq()
 
          endif
 
-    enddo  !do j1 = 1, nbplist(i,0) 
+    enddo  !do j1 = 1, nbplist(0,i) 
 enddo
 !$omp end do
 

diff --git a/src/pqeq.F90 b/src/pqeq.F90
@@ -77,8 +77,8 @@ subroutine PQEq(atype, pos, q)
 
 #ifdef QEQDUMP 
 do i=1, NATOMS
-   do j1=1,nbplist(i,0)
-      j = nbplist(i,j1)
+   do j1=1,nbplist(0,i)
+      j = nbplist(j1,i)
       write(91,'(4i6,4es25.15)') -1, l2g(atype(i)),nint(atype(i)),l2g(atype(j)),hessian(j1,i)
    enddo
 enddo
@@ -207,9 +207,9 @@ subroutine update_shell_positions()
    sforce(i,1:3) = sforce(i,1:3) - Kspqeq(ity)*spos(i,1:3) ! Eq. (37)
    shelli(1:3) = pos(i,1:3) + spos(i,1:3)
 
-   do j1 = 1, nbplist(i,0)
+   do j1 = 1, nbplist(0,i)
 
-      j = nbplist(i,j1)
+      j = nbplist(j1,i)
       jty = nint(atype(j))
 
       qjc = q(j) + Zpqeq(jty)
@@ -276,7 +276,7 @@ subroutine qeq_initialize()
 
 call system_clock(ti,tk)
 
-nbplist(:,0) = 0
+nbplist(0,:) = 0
 
 !$omp parallel do schedule(runtime), default(shared), &
 !$omp private(i,j,ity,jty,n,m,mn,nn,c1,c2,c3,c4,c5,c6,dr,dr2,drtb,itb,inxn,pqeqc,pqeqs,ff)
@@ -309,8 +309,8 @@ subroutine qeq_initialize()
 
 !--- make neighbor-list upto the taper function cutoff
 !$omp atomic
-               nbplist(i,0) = nbplist(i,0) + 1
-               nbplist(i,nbplist(i,0)) = j
+               nbplist(0,i) = nbplist(0,i) + 1
+               nbplist(nbplist(0,i),i) = j
 
 !--- get table index and residual value
                itb = int(dr2*UDRi)
@@ -321,7 +321,7 @@ subroutine qeq_initialize()
                ! contribution from core(i)-core(j)
                call get_coulomb_and_dcoulomb_pqeq(dr,alphacc(ity,jty),pqeqc,inxnpqeq(ity, jty),TBL_Eclmb_pcc,ff)
 
-               hessian(nbplist(i,0),i) = Cclmb0_qeq * pqeqc
+               hessian(nbplist(0,i),i) = Cclmb0_qeq * pqeqc
 
                fpqeq(i) = fpqeq(i) + Cclmb0_qeq * pqeqc * Zpqeq(jty) ! Eq. 30
 
@@ -347,7 +347,7 @@ subroutine qeq_initialize()
 
 !--- for array size stat
 if(mod(nstep,pstep)==0) then
-  nn=maxval(nbplist(1:NATOMS,0))
+  nn=maxval(nbplist(0,1:NATOMS))
   i=nstep/pstep+1
   maxas(i,3)=nn
 endif
@@ -391,8 +391,8 @@ subroutine get_hsh(Est)
 
    Est = Est + chi(ity)*q(i) + 0.5d0*eta_ity*q(i)*q(i)
 
-   do j1 = 1, nbplist(i,0)
-      j = nbplist(i,j1)
+   do j1 = 1, nbplist(0,i)
+      j = nbplist(j1,i)
       jty = nint(atype(j))
 
 !--- for PQEq
@@ -454,8 +454,8 @@ subroutine get_gradient(Gnew)
 
    gssum=0.d0
    gtsum=0.d0
-   do j1=1, nbplist(i,0) 
-      j = nbplist(i,j1)
+   do j1=1, nbplist(0,i) 
+      j = nbplist(j1,i)
       gssum = gssum + hessian(j1,i)*qs(j)
       gtsum = gtsum + hessian(j1,i)*qt(j)
    enddo

diff --git a/src/qeq.F90 b/src/qeq.F90
@@ -25,6 +25,7 @@ subroutine QEq(atype, pos, q)
 real(8) :: ssum, tsum, mu
 real(8) :: qsum, gqsum
 real(8) :: QCopyDr(3)
+real(8) :: hshs_sum,hsht_sum
 
 call system_clock(i1,k1)
 
@@ -73,8 +74,8 @@ subroutine QEq(atype, pos, q)
 
 #ifdef QEQDUMP 
 do i=1, NATOMS
-   do j1=1,nbplist(i,0)
-      j = nbplist(i,j1)
+   do j1=1,nbplist(0,i)
+      j = nbplist(j1,i)
       write(91,'(4i6,4es25.15)') -1, l2g(atype(i)),nint(atype(i)),l2g(atype(j)),hessian(j1,i)
    enddo
 enddo
@@ -100,7 +101,7 @@ subroutine QEq(atype, pos, q)
   call MPI_ALLREDUCE(qsum, gqsum, 1, MPI_DOUBLE_PRECISION, MPI_SUM,  MPI_COMM_WORLD, ierr)
 #endif
 
-  call get_hsh(Est)
+  call get_hsh(Est,hshs_sum,hsht_sum)
 
   call MPI_ALLREDUCE(Est, GEst1, 1, MPI_DOUBLE_PRECISION, MPI_SUM,  MPI_COMM_WORLD, ierr)
 
@@ -114,11 +115,11 @@ subroutine QEq(atype, pos, q)
 
 !--- line minimization factor of <s> vector
   g_h(1) = dot_product(gs(1:NATOMS), hs(1:NATOMS))
-  h_hsh(1) = dot_product(hs(1:NATOMS), hshs(1:NATOMS))
+  h_hsh(1) = hshs_sum
 
 !--- line minimization factor of <t> vector
   g_h(2) = dot_product(gt(1:NATOMS), ht(1:NATOMS))
-  h_hsh(2) = dot_product(ht(1:NATOMS), hsht(1:NATOMS))
+  h_hsh(2) = hsht_sum
 
   buf(1)=g_h(1);   buf(2)=g_h(2)
   buf(3)=h_hsh(1); buf(4)=h_hsh(2)
@@ -195,8 +196,6 @@ subroutine qeq_initialize()
 call system_clock(ti,tk)
 
 
-nbplist(:,0) = 0
-
 !$omp parallel do schedule(runtime), default(shared), &
 !$omp private(i,j,ity,jty,n,m,mn,nn,c1,c2,c3,c4,c5,c6,dr,dr2,drtb,itb,inxn)
 do c1=0, nbcc(1)-1
@@ -207,6 +206,7 @@ subroutine qeq_initialize()
    do m = 1, nbnacell(c1,c2,c3)
 
    ity=nint(atype(i))
+   nbplist(0,i) = 0
 
    do mn = 1, nbnmesh
       c4 = c1 + nbmesh(1,mn)
@@ -226,8 +226,8 @@ subroutine qeq_initialize()
 
 !--- make a neighbor list with cutoff length = 10[A]
 !$omp atomic
-               nbplist(i,0) = nbplist(i,0) + 1
-               nbplist(i,nbplist(i,0)) = j
+               nbplist(0,i) = nbplist(0,i) + 1
+               nbplist(nbplist(0,i),i) = j
 
 !--- get table index and residual value
                itb = int(dr2*UDRi)
@@ -236,22 +236,26 @@ subroutine qeq_initialize()
 
                inxn = inxn2(ity, jty)
 
-               hessian(nbplist(i,0),i) = (1.d0-drtb)*TBL_Eclmb_QEq(itb,inxn) + drtb*TBL_Eclmb_QEq(itb+1,inxn)
+               hessian(nbplist(0,i),i) = (1.d0-drtb)*TBL_Eclmb_QEq(itb,inxn) + drtb*TBL_Eclmb_QEq(itb+1,inxn)
             endif
          endif
 
          j=nbllist(j)
       enddo
    enddo !   do mn = 1, nbnmesh
 
+   if (nbplist(0,i) > MAXNEIGHBS10) then
+      write(6,*) "ERROR: In qeq.F90 inside qeq_initialize, nbplist greater then MAXNEIGHBS10 on mpirank",myid,"with value",nbplist(0,i)
+      call MPI_FINALIZE(ierr)
+   end if
    i=nbllist(i)
    enddo
 enddo; enddo; enddo
 !$omp end parallel do
 
 !--- for array size stat
 if(mod(nstep,pstep)==0) then
-  nn=maxval(nbplist(1:NATOMS,0))
+  nn=maxval(nbplist(0,1:NATOMS))
   i=nstep/pstep+1
   maxas(i,3)=nn
 endif
@@ -262,39 +266,47 @@ subroutine qeq_initialize()
 end subroutine 
 
 !-----------------------------------------------------------------------------------------------------------------------
-subroutine get_hsh(Est)
+subroutine get_hsh(Est,hshs_sum,hsht_sum)
 use atoms; use parameters
 ! This subroutine updates hessian*cg array <hsh> and the electrostatic energy <Est>.  
 !-----------------------------------------------------------------------------------------------------------------------
 implicit none
 real(8),intent(OUT) :: Est
 integer :: i,j,j1, ity
 real(8) :: eta_ity, Est1
+real(8) :: t_hshs,t_hsht
+real(8) :: hshs_sum,hsht_sum
 
 integer :: ti,tj,tk
 call system_clock(ti,tk)
 
 Est = 0.d0
-!$omp parallel do default(shared), schedule(runtime), private(i,j,j1,ity,eta_ity,Est1),reduction(+:Est)
+hshs_sum = 0.d0
+hsht_sum = 0.d0
+
+!$omp parallel do default(shared), schedule(runtime), private(i,j,j1,ity,eta_ity,Est1,t_hshs,t_hsht),reduction(+:Est,hshs_sum,hsht_sum)
 do i=1, NATOMS
    ity = nint(atype(i))
    eta_ity = eta(ity)
 
-   hshs(i) = eta_ity*hs(i)
-   hsht(i) = eta_ity*ht(i)
+   t_hshs = eta_ity*hs(i)
+   t_hsht = eta_ity*ht(i)
 
    Est = Est + chi(ity)*q(i) + 0.5d0*eta_ity*q(i)*q(i)
 
-   do j1 = 1, nbplist(i,0)
-      j = nbplist(i,j1)
-      hshs(i) = hshs(i) + hessian(j1,i)*hs(j)
-      hsht(i) = hsht(i) + hessian(j1,i)*ht(j)
+   do j1 = 1, nbplist(0,i)
+      j = nbplist(j1,i)
+      t_hshs = t_hshs + hessian(j1,i)*hs(j)
+      t_hsht = t_hsht + hessian(j1,i)*ht(j)
 !--- get half of potential energy, then sum it up if atoms are resident.
       Est1 = 0.5d0*hessian(j1,i)*q(i)*q(j)
       Est = Est + Est1
       if(j<=NATOMS) Est = Est + Est1
    enddo
 
+   hshs_sum = hshs_sum + t_hshs*hs(i)
+   hsht_sum = hsht_sum + t_hsht*ht(i)
+
 enddo
 !$omp end parallel do
 
@@ -323,8 +335,8 @@ subroutine get_gradient(Gnew)
 
    gssum=0.d0
    gtsum=0.d0
-   do j1=1, nbplist(i,0) 
-      j = nbplist(i,j1)
+   do j1=1, nbplist(0,i) 
+      j = nbplist(j1,i)
       gssum = gssum + hessian(j1,i)*qs(j)
       gtsum = gtsum + hessian(j1,i)*qt(j)
    enddo