module fftsub_mod

  use algebra
  use sub_mod
  use mpi_mod

contains

subroutine psik2ukvk( psik, uk, vk, ukopt, vkopt )
!-- converting psi to u and v
  use Math_Const
  use savegloval_define
  implicit none
  complex(kind(0d0)), dimension(:,:), intent(in) :: psik      ! stream function
  complex(kind(0d0)), dimension(size(psik,1),size(psik,2)), intent(inout) :: uk
                                        ! advecting speed of x direction
  complex(kind(0d0)), dimension(size(psik,1),size(psik,2)), intent(inout) :: vk
                                        ! advecting speed of y direction
  complex(kind(0d0)), intent(in), optional :: ukopt  ! 0,0 component of uk
  complex(kind(0d0)), intent(in), optional :: vkopt  ! 0,0 component of uk

  integer :: i, j, ix, jy
  double precision :: lxi, lyi, pi2

  uk=0.0d0
  vk=0.0d0

  if(present(ukopt))then
     uk(1,1)=ukopt
  end if
  if(present(vkopt))then
     vk(1,1)=vkopt
  end if

  ix=size(psik,1)
  jy=size(psik,2)

  lxi=1.0d0/Lx
  lyi=1.0d0/Ly
  pi2=2.0d0*pi_dp

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i)

  do i=2,hxnt+1
! uk=0.0
     vk(i,1)=pi2*img_cdp*dble(i-1)*lxi*psik(i,1)
     vk(ix-i+2,1)=dconjg(vk(i,1))
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j)

  do j=2,hynt+1
     uk(1,j)=-pi2*img_cdp*dble(j-1)*lyi*psik(1,j)
     uk(1,jy-j+2)=pi2*img_cdp*dble(j-1)*lyi*psik(1,jy-j+2)
! vk=0.0
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
!-- これは他の微分計算でも同じ (ADV_ の非線形や, W_diverge など)
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j)

  do j=2,hynt+1
     do i=2,hxnt+1
        uk(i,j)=-pi2*img_cdp*dble(j-1)*lyi*psik(i,j)
        uk(ix-i+2,jy-j+2)=dconjg(uk(i,j))
        uk(i,jy-j+2)=pi2*img_cdp*dble(j-1)*lyi*psik(i,jy-j+2)
        uk(ix-i+2,j)=dconjg(uk(i,jy-j+2))
        vk(i,j)=pi2*img_cdp*dble(i-1)*lxi*psik(i,j)
        vk(ix-i+2,jy-j+2)=dconjg(vk(i,j))
        vk(i,jy-j+2)=pi2*img_cdp*dble(i-1)*lxi*psik(i,jy-j+2)
        vk(ix-i+2,j)=dconjg(vk(i,jy-j+2))
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
!-- これは他の微分計算でも同じ (ADV_ の非線形や, W_diverge など)
     end do
  end do

!$omp end do
!$omp end parallel

end subroutine psik2ukvk


subroutine zetak2psik( zk, psik, psikopt )
!-- converting zeta to psi using fft with poisson.
  use Math_Const
  use savegloval_define
  implicit none
  complex(kind(0d0)), dimension(:,:), intent(in) :: zk
  complex(kind(0d0)), dimension(size(zk,1),size(zk,2)), intent(inout) :: psik
  complex(kind(0d0)), intent(in), optional :: psikopt   ! 0,0 component of psik
  integer :: i, j, ix, jy
  double precision :: lxi, lyi, pii4

  psik=0.0d0

  if(present(psikopt))then
     psik(1,1)=psikopt
  end if

  ix=size(zk,1)
  jy=size(zk,2)

  lxi=1.0d0/Lx
  lyi=1.0d0/Ly
  pii4=0.25d0/((pi_dp)**2)

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i)

  do i=2,hxnt+1
     psik(i,1)=-pii4*zk(i,1)/((dble(i-1)*lxi)**2)
     psik(ix-i+2,1)=dconjg(psik(i,1))
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j)

  do j=2,hynt+1
     psik(1,j)=-pii4*zk(1,j)/((dble(j-1)*lyi)**2)
     psik(1,jy-j+2)=-pii4*zk(1,jy-j+2)/((dble(j-1)*lyi)**2)
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j)

  do j=2,hynt+1
     do i=2,hxnt+1
        psik(i,j)=-pii4*zk(i,j)/((dble(i-1)*lxi)**2+(dble(j-1)*lyi)**2)
        psik(ix-i+2,jy-j+2)=dconjg(psik(i,j))
        psik(i,jy-j+2)=-pii4*zk(i,jy-j+2)/((dble(i-1)*lxi)**2+(dble(j-1)*lyi)**2)
        psik(ix-i+2,j)=dconjg(psik(i,jy-j+2))
     end do
  end do

!$omp end do
!$omp end parallel

end subroutine zetak2psik


subroutine psik2zetak( psik, zk, zkopt )
!-- converting psi to zeta using fft.
  use Math_Const
  use savegloval_define
  implicit none
  complex(kind(0d0)), dimension(:,:), intent(in) :: psik
  complex(kind(0d0)), dimension(size(psik,1),size(psik,2)), intent(inout) :: zk
  complex(kind(0d0)), intent(in), optional :: zkopt   ! 0,0 component of zk
  integer :: i, j, ix, jy
  double precision :: lxi, lyi, pi4

  zk=0.0d0

  if(present(zkopt))then
     zk(1,1)=zkopt
  end if

  ix=size(psik,1)
  jy=size(psik,2)

  lxi=1.0d0/Lx
  lyi=1.0d0/Ly
  pi4=4.0d0*pi_dp*pi_dp

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i)

  do i=2,hxnt+1
     zk(i,1)=-pi4*psik(i,1)*((dble(i-1)*lxi)**2)
     zk(ix-i+2,1)=dconjg(zk(i,1))
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j)

  do j=2,hynt+1
     zk(1,j)=-pi4*psik(1,j)*((dble(j-1)*lyi)**2)
     zk(1,jy-j+2)=-pi4*psik(1,jy-j+2)*((dble(j-1)*lyi)**2)
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j)

  do j=2,hynt+1
     do i=2,hxnt+1
        zk(i,j)=-pi4*psik(i,j)*((dble(i-1)*lxi)**2+(dble(j-1)*lyi)**2)
        zk(ix-i+2,jy-j+2)=dconjg(zk(i,j))
        zk(i,jy-j+2)=-pi4*psik(i,jy-j+2)*((dble(i-1)*lxi)**2+(dble(j-1)*lyi)**2)
        zk(ix-i+2,j)=dconjg(zk(i,jy-j+2))
     end do
  end do

!$omp end do
!$omp end parallel

end subroutine psik2zetak


subroutine phys2spec( ival, oval )
!-- convert ival (physical space) to oval (spectral space)
  use savegloval_define
  implicit none
  double precision, intent(in) :: ival(:,:)
  complex(kind(0d0)), intent(inout) :: oval(:,:)
  double precision, dimension(ny,nx) :: tmpr_isp, tmp_work
  double precision :: tmpk_isp(-hynt:hynt,-hxnt:hxnt)

  if(size(ival,1)/=nx)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in ival is mismatch."
     stop
  end if

  if(size(ival,2)/=ny)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in ival is mismatch."
     stop
  end if

  if(size(oval,1)/=kxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in oval is mismatch."
     stop
  end if

  if(size(oval,2)/=kynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in oval is mismatch."
     stop
  end if

  call rearrange_rxy2ryx( nx, ny, ival(1:nx,1:ny), tmpr_isp(1:ny,1:nx) )

  CALL P2G2SA( hynt, hxnt, ny, nx, tmpr_isp(1:ny,1:nx),  &
  &            tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &            tmp_work(1:ny,1:nx), ITJR, TJR, ITIR, TIR )

  call rearrange_ryx2cxy( hxnt, hynt, tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &                       oval(1:kxnt,1:kynt) )

end subroutine phys2spec


subroutine spec2phys( ival, oval )
!-- convert ival (spectral space) to oval (physical space)
  use savegloval_define
  implicit none
  complex(kind(0d0)), intent(in) :: ival(:,:)
  double precision, intent(inout) :: oval(:,:)
  double precision, dimension(ny,nx) :: tmpr_isp, tmp_work
  double precision :: tmpk_isp(-hynt:hynt,-hxnt:hxnt)

  if(size(ival,1)/=kxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in ival is mismatch."
     stop
  end if

  if(size(ival,2)/=kynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in ival is mismatch."
     stop
  end if

  if(size(oval,1)/=nx)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in oval is mismatch."
     stop
  end if

  if(size(oval,2)/=ny)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in oval is mismatch."
     stop
  end if

  call rearrange_cxy2ryx( hxnt, hynt, ival(1:kxnt,1:kynt),  &
  &                       tmpk_isp(-hynt:hynt,-hxnt:hxnt) )

  CALL P2S2GA( hynt, hxnt, ny, nx, tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &            tmpr_isp(1:ny,1:nx),  &
  &            tmp_work(1:ny,1:nx), ITJR, TJR, ITIR, TIR )

  call rearrange_ryx2rxy( nx, ny, tmpr_isp(1:ny,1:nx),  &
  &                       oval(1:nx,1:ny) )

end subroutine spec2phys


subroutine phys2spec_isp( ival_isp, oval )
!-- convert ival (physical space) to oval (spectral space) on jxnt, jynt
  use savegloval_define
  implicit none
  double precision, intent(in) :: ival_isp(:,:)
  complex(kind(0d0)), intent(inout) :: oval(:,:)
  double precision, dimension(jynt,jxnt) :: tmp_work
  double precision :: tmpk_isp(-hynt:hynt,-hxnt:hxnt)

  if(size(ival_isp,1)/=jynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in ival_isp is mismatch."
     stop
  end if

  if(size(ival_isp,2)/=jxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in ival_isp is mismatch."
     stop
  end if

  if(size(oval,1)/=kxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in oval is mismatch."
     stop
  end if

  if(size(oval,2)/=kynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in oval is mismatch."
     stop
  end if

  CALL P2G2SA( hynt, hxnt, jynt, jxnt, ival_isp(1:jynt,1:jxnt),  &
  &            tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &            tmp_work(1:jynt,1:jxnt), ITJJ, TJJ, ITIJ, TIJ )

  call rearrange_ryx2cxy( hxnt, hynt, tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &                       oval(1:kxnt,1:kynt) )

end subroutine phys2spec_isp


subroutine spec2phys_isp( ival, oval_isp )
!-- convert ival (spectral space) to oval_isp (physical space) on jxnt, jynt
  use savegloval_define
  implicit none
  complex(kind(0d0)), intent(in) :: ival(:,:)
  double precision, intent(inout) :: oval_isp(:,:)
  double precision, dimension(jynt,jxnt) :: tmp_work
  double precision :: tmpk_isp(-hynt:hynt,-hxnt:hxnt)

  if(size(ival,1)/=kxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in ival is mismatch."
     stop
  end if

  if(size(ival,2)/=kynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in ival is mismatch."
     stop
  end if

  if(size(oval_isp,1)/=jynt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 1st array in oval_isp is mismatch."
     stop
  end if

  if(size(oval_isp,2)/=jxnt)then
     write(*,*) "*** ERROR (convert_phys2spec) *** : 2nd array in oval_isp is mismatch."
     stop
  end if

  call rearrange_cxy2ryx( hxnt, hynt, ival(1:kxnt,1:kynt),  &
  &                       tmpk_isp(-hynt:hynt,-hxnt:hxnt) )

  CALL P2S2GA( hynt, hxnt, jynt, jxnt, tmpk_isp(-hynt:hynt,-hxnt:hxnt),  &
  &            oval_isp(1:jynt,1:jxnt),  &
  &            tmp_work(1:jynt,1:jxnt), ITJJ, TJJ, ITIJ, TIJ )

end subroutine spec2phys_isp


subroutine W_divergence( uk_mbl, vk_mbl, wk_mbl )
!-- calculating vertical flow based on a mass continuity
  use mpi
  use Math_Const
  use savegloval_define
  implicit none
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(in) :: uk_mbl
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(in) :: vk_mbl
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(inout) :: wk_mbl

  integer :: i, j, k, IERROR
  double precision :: lxi, lyi, pi2
  double precision, dimension(0:nz) :: tmpz
  complex(kind(0d0)), dimension(kxnt,kynt,nz) :: wk_mbl_tot
  complex(kind(0d0)), dimension(kxnt,kynt,0:nz) :: tmpwk_tot
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1) :: tmpwk

  wk_mbl_tot=0.0d0
  tmpwk_tot=0.0d0
  tmpwk=0.0d0
  wk_mbl=0.0d0

  lxi=1.0d0/Lx
  lyi=1.0d0/Ly
  pi2=2.0d0*pi_dp

  tmpz(1:nz)=z(1:nz)
  tmpz(0)=0.0d0

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i,k)

  do k=1,nzp
     do i=2,hxnt+1
        tmpwk(i,1,k)=-img_cdp*pi2*(dble(i-1)*lxi*uk_mbl(i,1,k))
        tmpwk(kxnt-i+2,1,k)=dconjg(tmpwk(i,1,k))
     end do
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j,k)

  do k=1,nzp
     do j=2,hynt+1
        tmpwk(1,j,k)=-img_cdp*pi2*(dble(j-1)*lyi*vk_mbl(1,j,k))
        tmpwk(1,kynt-j+2,k)=img_cdp*pi2*(dble(j-1)*lyi*vk_mbl(1,kynt-j+2,k))
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
     end do
  end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j,k)

  do k=1,nzp
     do j=2,hynt+1
        do i=2,hxnt+1
           tmpwk(i,j,k)=-img_cdp*pi2  &
  &                     *(dble(i-1)*lxi*uk_mbl(i,j,k)  &
  &                      +dble(j-1)*lyi*vk_mbl(i,j,k))
           tmpwk(kxnt-i+2,kynt-j+2,k)=dconjg(tmpwk(i,j,k))
           tmpwk(i,kynt-j+2,k)=-img_cdp*pi2  &
  &                            *(dble(i-1)*lxi*uk_mbl(i,kynt-j+2,k)  &
  &                             -dble(j-1)*lyi*vk_mbl(i,kynt-j+2,k))
           tmpwk(kxnt-i+2,j,k)=dconjg(tmpwk(i,kynt-j+2,k))
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
        end do
     end do
  end do

!$omp end do
!$omp end parallel

  call mpi_gather_w( IERROR, tmpwk, tmpwk_tot(1:kxnt,1:kynt,1:nz) )

  if(MY_RANK==0)then  ! only root process

!    !$omp parallel default(shared)
!    !$omp do schedule(runtime) private(i,k)

     do k=1,nz
        do i=2,hxnt+1
           call rectangle_int( tmpz(0:k), tmpwk_tot(i,1,0:k),  &
  &                            tmpz(0), tmpz(k), wk_mbl_tot(i,1,k) )
           wk_mbl_tot(kxnt-i+2,1,k)=dconjg(wk_mbl_tot(i,1,k))
        end do
     end do

!    !$omp end do
!    !$omp barrier
!    !$omp do schedule(runtime) private(j,k)

     do k=1,nz
        do j=2,hynt+1
           call rectangle_int( tmpz(0:k), tmpwk_tot(1,j,0:k),  &
  &                            tmpz(0), tmpz(k), wk_mbl_tot(1,j,k) )
           call rectangle_int( tmpz(0:k), tmpwk_tot(1,kynt-j+2,0:k),  &
  &                            tmpz(0), tmpz(k), wk_mbl_tot(1,kynt-j+2,k) )
        end do
     end do

!    !$omp end do
!    !$omp barrier
!    !$omp do schedule(runtime) private(i,j,k)

     do k=1,nz
        do j=2,hynt+1
           do i=2,hxnt+1
              call rectangle_int( tmpz(0:k), tmpwk_tot(i,j,0:k),  &
  &                               tmpz(0), tmpz(k), wk_mbl_tot(i,j,k) )
              wk_mbl_tot(kxnt-i+2,kynt-j+2,k)=dconjg(wk_mbl_tot(i,j,k))
              call rectangle_int( tmpz(0:k), tmpwk_tot(i,kynt-j+2,0:k),  &
  &                               tmpz(0), tmpz(k), wk_mbl_tot(i,kynt-j+2,k) )
              wk_mbl_tot(kxnt-i+2,j,k)=dconjg(wk_mbl_tot(i,kynt-j+2,k))
           end do
        end do
     end do

!    !$omp end do
!    !$omp end parallel

  end if

  call mpi_scatter_w( IERROR, wk_mbl_tot, wk_mbl )

end subroutine W_divergence


subroutine W_divergence2( uk_mbl, vk_mbl, wk_mbl )
!-- calculating vertical flow based on a mass continuity without gather/scatter
  use mpi
  use Math_Const
  use savegloval_define
  implicit none
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(in) :: uk_mbl
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(in) :: vk_mbl
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1), intent(inout) :: wk_mbl

  integer :: i, j, k, m, IERROR
  double precision :: lxi, lyi, pi2
  double precision, dimension(0:nzp) :: tmpz
  complex(kind(0d0)), dimension(kxnt,kynt,0:nzp+1) :: tmpwk

  tmpwk=0.0d0
  wk_mbl=0.0d0

  lxi=1.0d0/Lx
  lyi=1.0d0/Ly
  pi2=2.0d0*pi_dp

  tmpz(0:nzp)=z_pe(0:nzp)

  do k=1,nzp

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i)

     do i=2,hxnt+1
        tmpwk(i,1,k)=-img_cdp*pi2*(dble(i-1)*lxi*uk_mbl(i,1,k))
        tmpwk(kxnt-i+2,1,k)=dconjg(tmpwk(i,1,k))
     end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j)

     do j=2,hynt+1
        tmpwk(1,j,k)=-img_cdp*pi2*(dble(j-1)*lyi*vk_mbl(1,j,k))
        tmpwk(1,kynt-j+2,k)=img_cdp*pi2*(dble(j-1)*lyi*vk_mbl(1,kynt-j+2,k))
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
     end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j)

     do j=2,hynt+1
        do i=2,hxnt+1
           tmpwk(i,j,k)=-img_cdp*pi2  &
  &                     *(dble(i-1)*lxi*uk_mbl(i,j,k)  &
  &                      +dble(j-1)*lyi*vk_mbl(i,j,k))
           tmpwk(kxnt-i+2,kynt-j+2,k)=dconjg(tmpwk(i,j,k))
           tmpwk(i,kynt-j+2,k)=-img_cdp*pi2  &
  &                            *(dble(i-1)*lxi*uk_mbl(i,kynt-j+2,k)  &
  &                             -dble(j-1)*lyi*vk_mbl(i,kynt-j+2,k))
           tmpwk(kxnt-i+2,j,k)=dconjg(tmpwk(i,kynt-j+2,k))
!-- ここでは, ISPACK の仕様により, l 方向は -l について独立
!-- なので, jy-j+2 では l 方向の波数が奇数乗でかかる場合,
!-- (-l) でかける必要がある (偶数乗はかけなくても同じ結果). 
        end do
     end do

!$omp end do
!$omp end parallel

  end do

!  call mpi_gather_w( IERROR, tmpwk, tmpwk_tot(1:kxnt,1:kynt,1:nz) )

  do m=0,PETOT-1

     if(MY_RANK==m)then  ! 該当プロセスのみ鉛直積分

     do k=1,nzp

!$omp parallel default(shared)
!$omp do schedule(runtime) private(i)

        do i=2,hxnt+1
           wk_mbl(i,1,k)=wk_mbl(i,1,k-1)  &
  &                    +(tmpwk(i,1,k-1)+tmpwk(i,1,k))*(tmpz(k)-tmpz(k-1))*0.5d0
           wk_mbl(kxnt-i+2,1,k)=dconjg(wk_mbl(i,1,k))
        end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(j)

        do j=2,hynt+1
           wk_mbl(1,j,k)=wk_mbl(1,j,k-1)  &
  &                    +(tmpwk(1,j,k-1)+tmpwk(1,j,k))*(tmpz(k)-tmpz(k-1))*0.5d0
           wk_mbl(1,kynt-j+2,k)=wk_mbl(1,kynt-j+2,k-1)  &
  &                    +(tmpwk(1,kynt-j+2,k-1)+tmpwk(1,kynt-j+2,k))  &
  &                     *(tmpz(k)-tmpz(k-1))*0.5d0
        end do

!$omp end do
!$omp barrier
!$omp do schedule(runtime) private(i,j)

        do j=2,hynt+1
           do i=2,hxnt+1
              wk_mbl(i,j,k)=wk_mbl(i,j,k-1)  &
  &                       +(tmpwk(i,j,k-1)+tmpwk(i,j,k))*(tmpz(k)-tmpz(k-1))*0.5d0
              wk_mbl(kxnt-i+2,kynt-j+2,k)=dconjg(wk_mbl(i,j,k))
              wk_mbl(i,kynt-j+2,k)=wk_mbl(i,kynt-j+2,k-1)  &
  &                       +(tmpwk(i,kynt-j+2,k-1)+tmpwk(i,kynt-j+2,k))  &
  &                        *(tmpz(k)-tmpz(k-1))*0.5d0
              wk_mbl(kxnt-i+2,j,k)=dconjg(wk_mbl(i,kynt-j+2,k))
           end do
        end do

!$omp end do
!$omp end parallel

     end do
     end if

     if(m<PETOT-1)then
        !-- m -> m+1 へ nzp での鉛直流と収束量を渡す.
        call mpi_sendrecvp2p_valcpd( IERROR, m, m+1,  &
  &                                  wk_mbl(1:kxnt,1:kynt,nzp),  &
  &                                  tmpwk(1:kxnt,1:kynt,nzp),  &
  &                                  wk_mbl(1:kxnt,1:kynt,0),  &
  &                                  tmpwk(1:kxnt,1:kynt,0) )

        call MPI_BARRIER( MPI_COMM_WORLD, IERROR )  ! 同期待ち
     end if

  end do

!  call mpi_scatter_w( IERROR, wk_mbl_tot, wk_mbl )

end subroutine W_divergence2


end module fftsub_mod
