@@ -696,13 +696,14 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas,
696
696
double precision , allocatable , dimension (:,:) :: atomic_distance
697
697
698
698
! Pre-computed terms in the full distance matrix
699
- double precision , allocatable , dimension (:,: ) :: selfl21
699
+ double precision , allocatable , dimension (:) :: selfl21
700
700
701
701
! Pre-computed sine terms
702
702
double precision , allocatable , dimension (:,:,:) :: sin1
703
703
704
704
! Value of PI at full FORTRAN precision.
705
705
double precision , parameter :: pi = 4.0d0 * atan (1.0d0 )
706
+ double precision :: mol_dist
706
707
707
708
r_width2 = r_width** 2
708
709
c_width2 = c_width** 2
@@ -723,14 +724,22 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas,
723
724
enddo
724
725
! $OMP END PARALLEL DO
725
726
726
- allocate (selfl21(nm1, maxval (n1) ))
727
+ allocate (selfl21(nm1))
727
728
728
- ! $OMP PARALLEL DO PRIVATE(ni)
729
+ selfl21 = 0.0d0
730
+
731
+ ! $OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl21)
729
732
do i = 1 , nm1
730
733
ni = n1(i)
731
734
do i_1 = 1 , ni
732
- selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), &
733
- & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width)
735
+ do j_1 = 1 , ni
736
+
737
+ selfl21(i) = selfl21(i) + atomic_distl2(q1(i,i_1,:,:), q1(i,j_1,:,:), n1(i), n1(i), &
738
+ & sin1(i,i_1,:), sin1(i,j_1,:), width, cut_distance, r_width, c_width) &
739
+ & * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z1(i,j_1,1 ))** 2 ) * &
740
+ & c_width2/ (c_width2 + (z1(i,i_1,2 ) - z1(i,j_1,2 ))** 2 ))
741
+
742
+ enddo
734
743
enddo
735
744
enddo
736
745
! $OMP END PARALLEL DO
@@ -740,10 +749,11 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas,
740
749
kernels(:,:,:) = 0.0d0
741
750
atomic_distance(:,:) = 0.0d0
742
751
743
- ! $OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic)
752
+ ! $OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj,mol_dist ) schedule(dynamic)
744
753
do j = 1 , nm1
745
754
nj = n1(j)
746
- do i = 1 , j
755
+ do i = 1 , j! nm1
756
+
747
757
ni = n1(i)
748
758
749
759
atomic_distance(:,:) = 0.0d0
@@ -754,17 +764,18 @@ subroutine fget_global_symmetric_kernels_arad(q1, z1, n1, sigmas, nm1, nsigmas,
754
764
l2dist = atomic_distl2(q1(i,i_1,:,:), q1(j,j_1,:,:), n1(i), n1(j), &
755
765
& sin1(i,i_1,:), sin1(j,j_1,:), width, cut_distance, r_width, c_width)
756
766
757
- l2dist = selfl21(i,i_1) + selfl21(j,j_1) - 2.0d0 * l2dist &
758
- & * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z1(j,j_1,1 ))** 2 ) &
759
- & * c_width2/ (c_width2 + (z1(i,i_1,2 ) - z1(j,j_1,2 ))** 2 ))
767
+ L2dist = l2dist * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z1(j,j_1,1 ))** 2 ) * &
768
+ & c_width2/ (c_width2 + (z1(i,i_1,2 ) - z1(j,j_1,2 ))** 2 ))
760
769
761
770
atomic_distance(i_1,j_1) = l2dist
762
771
763
772
enddo
764
773
enddo
765
774
775
+ mol_dist = selfl21(i) + selfl21(j) - 2.0d0 * sum (atomic_distance(:ni,:nj))
776
+
766
777
do k = 1 , nsigmas
767
- kernels(k, i, j) = exp (sum (atomic_distance(:ni,:nj)) * inv_sigma2(k))
778
+ kernels(k, i, j) = exp (mol_dist * inv_sigma2(k))
768
779
kernels(k, j, i) = kernels(k, i, j)
769
780
enddo
770
781
@@ -834,8 +845,8 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns
834
845
double precision , allocatable , dimension (:,:) :: atomic_distance
835
846
836
847
! Pre-computed terms in the full distance matrix
837
- double precision , allocatable , dimension (:,: ) :: selfl21
838
- double precision , allocatable , dimension (:,: ) :: selfl22
848
+ double precision , allocatable , dimension (:) :: selfl21
849
+ double precision , allocatable , dimension (:) :: selfl22
839
850
840
851
! Pre-computed sine terms
841
852
double precision , allocatable , dimension (:,:,:) :: sin1
@@ -844,6 +855,8 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns
844
855
! Value of PI at full FORTRAN precision.
845
856
double precision , parameter :: pi = 4.0d0 * atan (1.0d0 )
846
857
858
+ double precision :: mol_dist
859
+
847
860
r_width2 = r_width** 2
848
861
c_width2 = c_width** 2
849
862
@@ -882,25 +895,43 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns
882
895
enddo
883
896
! $OMP END PARALLEL DO
884
897
885
- allocate (selfl21(nm1, maxval (n1) ))
886
- allocate (selfl22(nm2, maxval (n2) ))
898
+ allocate (selfl21(nm1))
899
+ allocate (selfl22(nm2))
887
900
888
- ! $OMP PARALLEL DO PRIVATE(ni)
901
+ selfl21 = 0.0d0
902
+ selfl22 = 0.0d0
903
+
904
+ ! $OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl21)
889
905
do i = 1 , nm1
890
906
ni = n1(i)
891
907
do i_1 = 1 , ni
892
- selfl21(i,i_1) = atomic_distl2(q1(i,i_1,:,:), q1(i,i_1,:,:), n1(i), n1(i), &
893
- & sin1(i,i_1,:), sin1(i,i_1,:), width, cut_distance, r_width, c_width)
908
+ do j_1 = 1 , ni
909
+
910
+ selfl21(i) = selfl21(i) + atomic_distl2(q1(i,i_1,:,:), q1(i,j_1,:,:), n1(i), n1(i), &
911
+ & sin1(i,i_1,:), sin1(i,j_1,:), width, cut_distance, r_width, c_width) &
912
+ & * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z1(i,j_1,1 ))** 2 ) * &
913
+ & c_width2/ (c_width2 + (z1(i,i_1,2 ) - z1(i,j_1,2 ))** 2 ))
914
+
915
+ enddo
916
+
894
917
enddo
895
918
enddo
896
919
! $OMP END PARALLEL DO
897
920
898
- ! $OMP PARALLEL DO PRIVATE(ni)
921
+ ! $OMP PARALLEL DO PRIVATE(ni) REDUCTION(+:selfl22)
899
922
do i = 1 , nm2
900
923
ni = n2(i)
901
924
do i_1 = 1 , ni
902
- selfl22(i,i_1) = atomic_distl2(q2(i,i_1,:,:), q2(i,i_1,:,:), n2(i), n2(i), &
903
- & sin2(i,i_1,:), sin2(i,i_1,:), width, cut_distance, r_width, c_width)
925
+ do j_1 = 1 , ni
926
+
927
+ selfl22(i) = selfl22(i) + atomic_distl2(q2(i,i_1,:,:), q2(i,j_1,:,:), n2(i), n2(i), &
928
+ & sin2(i,i_1,:), sin2(i,j_1,:), width, cut_distance, r_width, c_width) &
929
+ &* (r_width2/ (r_width2 + (z2(i,i_1,1 ) - z2(i,j_1,1 ))** 2 ) * &
930
+ & c_width2/ (c_width2 + (z2(i,i_1,2 ) - z2(i,j_1,2 ))** 2 ))
931
+
932
+
933
+ enddo
934
+
904
935
enddo
905
936
enddo
906
937
! $OMP END PARALLEL DO
@@ -911,7 +942,9 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns
911
942
kernels(:,:,:) = 0.0d0
912
943
atomic_distance(:,:) = 0.0d0
913
944
914
- ! $OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj) schedule(dynamic)
945
+
946
+ ! $OMP PARALLEL DO PRIVATE(l2dist,atomic_distance,ni,nj,mol_dist) schedule(dynamic)
947
+
915
948
do j = 1 , nm2
916
949
nj = n2(j)
917
950
do i = 1 , nm1
@@ -925,17 +958,21 @@ subroutine fget_global_kernels_arad(q1, q2, z1, z2, n1, n2, sigmas, nm1, nm2, ns
925
958
l2dist = atomic_distl2(q1(i,i_1,:,:), q2(j,j_1,:,:), n1(i), n2(j), &
926
959
& sin1(i,i_1,:), sin2(j,j_1,:), width, cut_distance, r_width, c_width)
927
960
928
- l2dist = selfl21(i,i_1) + selfl22(j,j_1) - 2.0d0 * l2dist &
929
- & * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z2(j,j_1,1 ))** 2 ) * &
930
- & c_width2/ (c_width2 + (z1(i,i_1,2 ) - z2(j,j_1,2 ))** 2 ))
961
+
962
+ L2dist = l2dist * (r_width2/ (r_width2 + (z1(i,i_1,1 ) - z2(j,j_1,1 ))** 2 ) * &
963
+ & c_width2/ (c_width2 + (z1(i,i_1,2 ) - z2(j,j_1,2 ))** 2 ))
964
+
931
965
932
966
atomic_distance(i_1,j_1) = l2dist
933
967
934
968
enddo
935
969
enddo
936
970
971
+ mol_dist = selfl21(i) + selfl22(j) - 2.0d0 * sum (atomic_distance(:ni,:nj))
972
+
937
973
do k = 1 , nsigmas
938
- kernels(k, i, j) = exp (sum (atomic_distance(:ni,:nj)) * inv_sigma2(k))
974
+ kernels(k, i, j) = exp (mol_dist * inv_sigma2(k))
975
+
939
976
enddo
940
977
941
978
enddo
0 commit comments