module pbl_mym_simeq
  implicit none

  private

  public :: pbl_mym_simeq_tend
  public :: pbl_mym_simeq_cov
  public :: pbl_mym_diff_matcoef

contains
  subroutine pbl_mym_simeq_tend(                                           &
    & coef, dfm, prod, disp_coef, field, &
    & f2h_m, f2h_p, rdz_f, rdz_h, tend)

    use pp_vardef
    use pbl_grid, only: nz
    use pbl_const, only: timestep
    use pbl_implic, only: pbl_implic_solve
    implicit none

    real(r_size), intent(in) :: coef
                   ! factor for the diffusion coefficients to those for
                   ! momentum

    real(r_size), intent(in) :: dfm(nz)
                                ! diffusion coefficients fot momentum
    real(r_size), intent(in) :: prod(nz)
                                ! production term
    real(r_size), intent(in) :: disp_coef(nz)
                                ! coefficients of dissipation term
    real(r_size), intent(in) :: field(nz)
    real(r_size), intent(in) :: f2h_m(nz-1)
    real(r_size), intent(in) :: f2h_p(nz-1)
    real(r_size), intent(in) :: rdz_f(nz)
    real(r_size), intent(in) :: rdz_h(nz-1)

    ! Intent OUT Variables
    real(r_size), intent(out) :: tend(nz) 
                                ! tendency

    ! Local variables
    integer(4) :: kz
                         ! Loop indexes
    real(r_size) :: aa(nz)
    real(r_size) :: bb(nz)
    real(r_size) :: cc(nz)
                         ! coefficients of tri-diagonal equations


    ! Calculate the coefficients of tri-diagonal eqs. due to diffusion
    call pbl_mym_diff_matcoef(coef, dfm, &
      &                       f2h_m, f2h_p, rdz_f, rdz_h, &
      &                       aa, bb, cc)

    kz = 1
    tend(kz) = prod(kz) - disp_coef(kz) * field(kz) &
      &    + bb(kz) * field(kz) + cc(kz) * field(kz + 1)
    aa(kz) = - aa(kz) * timestep
    bb(kz) = 1.0 - bb(kz) * timestep + timestep * disp_coef(kz)
    cc(kz) = - cc(kz) * timestep

    do kz = 2, nz - 1
      tend(kz) = prod(kz) - disp_coef(kz) * field(kz) &
        &   + aa(kz) * field(kz - 1) + bb(kz) * field(kz) &
        &   + cc(kz) * field(kz + 1)

      aa(kz) = - aa(kz) * timestep
      bb(kz) = 1.0 - bb(kz) * timestep + timestep * disp_coef(kz)
      cc(kz) = - cc(kz) * timestep
    end do

    kz = nz
    tend(kz) = prod(kz) - disp_coef(kz) * field(kz) &
      &    + aa(kz) * field(kz - 1) + bb(kz) * field(kz)
    aa(kz) = - aa(kz) * timestep
    bb(kz) = 1.0 - bb(kz) * timestep + timestep * disp_coef(kz)
    cc(kz) = - cc(kz) * timestep

    ! Solve the tri-diagonal equations
    call pbl_implic_solve(aa, bb, cc, tend)
    return
  end subroutine pbl_mym_simeq_tend
  !
  subroutine pbl_mym_simeq_cov(                               &
    qkw, el, dfm,                                           &
    f2h_m, f2h_p, rdz_f, rdz_h,                                   &
    pdt_tsq, pdt_cov, pdt,                                        &
    pdq_qsq, pdq_cov, pdq,                                        &
    pdc_cov, pdc_tsq, pdc_qsq, pdc,                               &
    tsq, qsq, cov,                                                    &
    tend_tsq, tend_qsq, tend_cov)

    use pp_vardef
    use pbl_mym_const
    use pbl_grid, only: nz
    use pbl_const, only: timestep
    
    implicit none

    ! Intent IN Variables
    real(r_size), intent(in) :: qkw(nz)
                                ! sqrt(qke) = sqrt(2TKE)
    real(r_size), intent(in) :: el(nz)
                                ! mixing length
    real(r_size), intent(in) :: dfm(nz)
                                ! diffusion coefficients fot momentum

    real(r_size), intent(in) :: f2h_m(nz-1)
    real(r_size), intent(in) :: f2h_p(nz-1)
    real(r_size), intent(in) :: rdz_f(nz)
    real(r_size), intent(in) :: rdz_h(nz-1)

    real(r_size), intent(in) :: pdt_tsq(nz)
                                ! a linear part to tsq in the production term of tsq
    real(r_size), intent(in) :: pdt_cov(nz)
                                ! a linear part to cov in the production term of tsq
    real(r_size), intent(in) :: pdt(nz)
                                ! production term of tsq at timestep n
    real(r_size), intent(in) :: pdq_qsq(nz)
                                ! a linear part to qsq in the production term of qsq
    real(r_size), intent(in) :: pdq_cov(nz)
                                ! a linear part to cov in the production term of qsq
    real(r_size), intent(in) :: pdq(nz)
                                ! production term of qsq at timestep n
    real(r_size), intent(in) :: pdc_cov(nz)
                                ! a linear part to cov in the production term of cov
    real(r_size), intent(in) :: pdc_tsq(nz)
                                ! a linear part to tsq in the production term of cov
    real(r_size), intent(in) :: pdc_qsq(nz)
                                ! a linear part to qsq in the production term of cov
    real(r_size), intent(in) :: pdc(nz)
                                ! the production term of cov at timestep n

    real(r_size), intent(in) :: tsq(nz)
                              ! Self covariance of liquid potential temperature
                              ! (thetal'**2) 
    real(r_size), intent(in) :: qsq(nz)
                              ! Self covariance of total water
                              ! (qw'**2) 
    real(r_size), intent(in) :: cov(nz)
                              ! Correlation between thetal and qw
                              ! (thetal'qw') 

    ! Intent OUT Variables
    real(r_size), intent(out) :: tend_tsq(nz)
    real(r_size), intent(out) :: tend_qsq(nz)
    real(r_size), intent(out) :: tend_cov(nz)

    ! Local Variables
    integer(4) :: kz
                     ! loop indexes, etc.
    real(r_size) :: elem
                     ! work variables

    real(r_size) :: disp_coef
                     ! coefficients of the prognostic variables in
                     ! dissipation terms

    real(r_size) :: aa(nz)
    real(r_size) :: bb(nz)
    real(r_size) :: cc(nz)
                      ! tri-diagonal matrix elements due to diffusion
    real(r_size) :: qq_tsq(nz)
    real(r_size) :: qq_qsq(nz)
    real(r_size) :: qq_cov(nz)
    real(r_size) :: aa_tsq(nz)
    real(r_size) :: bb_tsq(nz)
    real(r_size) :: cc_tsq(nz)
    real(r_size) :: pp_tc(nz)
    real(r_size) :: aa_qsq(nz)
    real(r_size) :: bb_qsq(nz)
    real(r_size) :: cc_qsq(nz)
    real(r_size) :: pp_qc(nz)
    real(r_size) :: aa_cov(nz)
    real(r_size) :: bb_cov(nz)
    real(r_size) :: cc_cov(nz)
    real(r_size) :: pp_ct(nz)
    real(r_size) :: pp_cq(nz)
                     ! matrix elements (see the documents for details)

    call pbl_mym_diff_matcoef(coef_trbvar_diff, dfm,&
      &                       f2h_m, f2h_p, rdz_f, rdz_h, &
      &                       aa, bb, cc)

    kz = 1

    disp_coef = 2.0 * qkw(kz) / (b2 * el(kz))
    qq_tsq(kz) = - disp_coef * tsq(kz) &
      & + bb(kz) * tsq(kz) + cc(kz) * tsq(kz + 1) &
      & + 2.0 * pdt(kz)

    qq_qsq(kz) = - disp_coef * qsq(kz) &
      & + bb(kz) * qsq(kz) + cc(kz) * qsq(kz + 1) &
      & + 2.0 * pdq(kz)

    qq_cov(kz) = - disp_coef * cov(kz) &
      & + bb(kz) * cov(kz) + cc(kz) * cov(kz + 1) &
      & + 2.0 * pdc(kz)

    elem = 1.0 - bb(kz) * timestep + timestep * disp_coef
    bb_tsq(kz) = elem - 2.0 * pdt_tsq(kz) * timestep
    bb_qsq(kz) = elem - 2.0 * pdq_qsq(kz) * timestep
    bb_cov(kz) = elem - 2.0 * pdc_cov(kz) * timestep

    elem = -aa(kz) * timestep
    aa_tsq(kz) = elem
    aa_qsq(kz) = elem
    aa_cov(kz) = elem

    elem = -cc(kz) * timestep
    cc_tsq(kz) = elem
    cc_qsq(kz) = elem
    cc_cov(kz) = elem

    pp_tc(kz) = - 2.0 * pdt_cov(kz) * timestep
    pp_qc(kz) = - 2.0 * pdq_cov(kz) * timestep
    pp_ct(kz) = - 2.0 * pdc_tsq(kz) * timestep
    pp_cq(kz) = - 2.0 * pdc_qsq(kz) * timestep

    ! set maxtrix elements
    do kz = 2, nz - 1
      disp_coef = 2.0 * qkw(kz) / (b2 * el(kz))

      qq_tsq(kz) = - disp_coef * tsq(kz) &
        & + aa(kz) * tsq(kz - 1) + bb(kz) * tsq(kz) + cc(kz) * tsq(kz + 1) &
        & + 2.0 * pdt(kz)

      qq_qsq(kz) = - disp_coef * qsq(kz) &
        & + aa(kz) * qsq(kz - 1) + bb(kz) * qsq(kz) + cc(kz) * qsq(kz + 1) &
        & + 2.0 * pdq(kz)

      qq_cov(kz) = - disp_coef * cov(kz) &
        & + aa(kz) * cov(kz - 1) + bb(kz) * cov(kz) + cc(kz) * cov(kz + 1) &
        & + 2.0 * pdc(kz)

      elem = 1.0 - bb(kz) * timestep + timestep * disp_coef
      bb_tsq(kz) = elem - 2.0 * pdt_tsq(kz) * timestep
      bb_qsq(kz) = elem - 2.0 * pdq_qsq(kz) * timestep
      bb_cov(kz) = elem - 2.0 * pdc_cov(kz) * timestep


      elem = -aa(kz) * timestep
      aa_tsq(kz) = elem
      aa_qsq(kz) = elem
      aa_cov(kz) = elem


      elem = -cc(kz) * timestep
      cc_tsq(kz) = elem
      cc_qsq(kz) = elem
      cc_cov(kz) = elem

      pp_tc(kz) = - 2.0 * pdt_cov(kz) * timestep
      pp_qc(kz) = - 2.0 * pdq_cov(kz) * timestep
      pp_ct(kz) = - 2.0 * pdc_tsq(kz) * timestep
      pp_cq(kz) = - 2.0 * pdc_qsq(kz) * timestep
    end do

    kz = nz

    disp_coef = 2.0 * qkw(kz) / (b2 * el(kz))

    qq_tsq(kz) = - disp_coef * tsq(kz) &
      & + aa(kz) * tsq(kz - 1) + bb(kz) * tsq(kz)  &
      & + 2.0 * pdt(kz)

    qq_qsq(kz) = - disp_coef * qsq(kz) &
      & + aa(kz) * qsq(kz - 1) + bb(kz) * qsq(kz) &
      & + 2.0 * pdq(kz)

    qq_cov(kz) = - disp_coef * cov(kz) &
      & + aa(kz) * cov(kz - 1) + bb(kz) * cov(kz) &
      & + 2.0 * pdc(kz)


    elem = 1.0 - bb(kz) * timestep + timestep * disp_coef
    bb_tsq(kz) = elem - 2.0 * pdt_tsq(kz) * timestep
    bb_qsq(kz) = elem - 2.0 * pdq_qsq(kz) * timestep
    bb_cov(kz) = elem - 2.0 * pdc_cov(kz) * timestep

    elem = -aa(kz) * timestep
    aa_tsq(kz) = elem
    aa_qsq(kz) = elem
    aa_cov(kz) = elem

    elem = -cc(kz) * timestep
    cc_tsq(kz) = elem
    cc_qsq(kz) = elem
    cc_cov(kz) = elem

    pp_tc(kz) = - 2.0 * pdt_cov(kz) * timestep
    pp_qc(kz) = - 2.0 * pdq_cov(kz) * timestep
    pp_ct(kz) = - 2.0 * pdc_tsq(kz) * timestep
    pp_cq(kz) = - 2.0 * pdc_qsq(kz) * timestep
    

    ! Solve the simultaneous equations for tendency of tsq, qsq and cov
    call pbl_mym_simeq_solve(                                         &
      qq_tsq, qq_qsq, qq_cov,                                         &
      aa_tsq, bb_tsq, cc_tsq, pp_tc,                                  &
      aa_qsq, bb_qsq, cc_qsq, pp_qc,                                  &
      aa_cov, bb_cov, cc_cov, pp_ct, pp_cq,                           &
      tend_tsq, tend_qsq, tend_cov)

    return

  end subroutine pbl_mym_simeq_cov


  subroutine pbl_mym_simeq_solve(                                             &
    qq_tsq_k, qq_qsq_k, qq_cov_k,                                           &
    aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                                    &
    aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                                    &
    aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                            &
    tend_tsq_k, tend_qsq_k, tend_cov_k)

    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    real(r_size), intent(inout) :: qq_tsq_k(nz)
    real(r_size), intent(inout) :: qq_qsq_k(nz)
    real(r_size), intent(inout) :: qq_cov_k(nz)
    real(r_size), intent(inout) :: aa_tsq_k(nz)
    real(r_size), intent(inout) :: bb_tsq_k(nz)
    real(r_size), intent(inout) :: cc_tsq_k(nz)
    real(r_size), intent(inout) :: pp_tc_k(nz)
    real(r_size), intent(inout) :: aa_qsq_k(nz)
    real(r_size), intent(inout) :: bb_qsq_k(nz)
    real(r_size), intent(inout) :: cc_qsq_k(nz)
    real(r_size), intent(inout) :: pp_qc_k(nz)
    real(r_size), intent(inout) :: aa_cov_k(nz)
    real(r_size), intent(inout) :: bb_cov_k(nz)
    real(r_size), intent(inout) :: cc_cov_k(nz)
    real(r_size), intent(inout) :: pp_ct_k(nz)
    real(r_size), intent(inout) :: pp_cq_k(nz)

    real(r_size), intent(out) :: tend_tsq_k(nz)
    real(r_size), intent(out) :: tend_qsq_k(nz)
    real(r_size), intent(out) :: tend_cov_k(nz)

    ! Local variables
    integer(4) :: kz
    integer(4) :: endflag

    ! Parameters
    integer(4), parameter :: max_itr    = 500
               ! the maximum iteration number

    real(r_size), parameter :: eps       = 1.0d-15
               ! convergence creteria

    real(r_size), parameter :: tsq_scale = 1.0d0
    real(r_size), parameter :: qsq_scale = 1.0d6
    real(r_size), parameter :: cov_scale = 1.0d3
    real(r_size), parameter :: r_tsq_scale = 1.0 / tsq_scale
    real(r_size), parameter :: r_qsq_scale = 1.0 / qsq_scale
    real(r_size), parameter :: r_cov_scale = 1.0 / cov_scale
    real(r_size), parameter :: tc_scale = tsq_scale * r_cov_scale
    real(r_size), parameter :: qc_scale = qsq_scale * r_cov_scale
    real(r_size), parameter :: ct_scale = cov_scale * r_tsq_scale
    real(r_size), parameter :: cq_scale = cov_scale * r_qsq_scale
                  ! scaling factors for the matrix elements

    do kz = 1, nz
      qq_tsq_k(kz) = qq_tsq_k(kz) * tsq_scale
      qq_qsq_k(kz) = qq_qsq_k(kz) * qsq_scale
      qq_cov_k(kz) = qq_cov_k(kz) * cov_scale

      pp_tc_k(kz)  = pp_tc_k(kz)  * tc_scale

      pp_qc_k(kz)  = pp_qc_k(kz)  * qc_scale

      pp_ct_k(kz)  = pp_ct_k(kz)  * ct_scale
      pp_cq_k(kz)  = pp_cq_k(kz)  * cq_scale
    end do

    call  pbl_mym_simeq_bcgstab(                                    &
      max_itr, eps,                                             &
      qq_tsq_k, qq_qsq_k, qq_cov_k,                             &
      aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                    &
      aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                    &
      aa_cov_k, bb_cov_k, cc_cov_k,                             &
      pp_ct_k, pp_cq_k,                                         &
      tend_tsq_k, tend_qsq_k, tend_cov_k, endflag)

    if (endflag < 0) then
      ! if failed to converge, solve eqs. by LU decomposition
      call pbl_mym_simeq_lud(                                       &
        qq_tsq_k, qq_qsq_k, qq_cov_k,                             &
        aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                    &
        aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                    &
        aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,           &
        tend_tsq_k, tend_qsq_k, tend_cov_k)
    end if

    ! scale back to the original
    do kz = 1, nz
      tend_tsq_k(kz) = tend_tsq_k(kz) * r_tsq_scale
      tend_qsq_k(kz) = tend_qsq_k(kz) * r_qsq_scale
      tend_cov_k(kz) = tend_cov_k(kz) * r_cov_scale
    end do

    return
  end subroutine pbl_mym_simeq_solve
  
  !

  !
  subroutine pbl_mym_simeq_ilud2(                                           &
    qq_tsq_k, qq_qsq_k, qq_cov_k,                                     &
    aap_tsq_k, r_bbp_tsq_k, ccp_tsq_k,                                &
    ppp_tc_k, pp1_tc_k,  pp2_tc_k,                                    &
    aap_qsq_k, r_bbp_qsq_k, ccp_qsq_k,                                &
    ppp_qc_k, pp1_qc_k,  pp2_qc_k,                                    &
    aap_cov_k, r_bbp_cov_k, ccp_cov_k,                                &
    ppp_ct_k, ppp_cq_k, pp1_ct_k, pp1_cq_k, pp2_ct_k, pp2_cq_k,       &
    tsq_k, qsq_k, cov_k)

    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    ! intent in variables
    real(r_size), intent(in) :: qq_tsq_k(nz)
    real(r_size), intent(in) :: qq_qsq_k(nz)
    real(r_size), intent(in) :: qq_cov_k(nz)
    real(r_size), intent(in) :: aap_tsq_k(nz)
    real(r_size), intent(in) :: r_bbp_tsq_k(nz)
    real(r_size), intent(in) :: ccp_tsq_k(nz)
    real(r_size), intent(in) :: ppp_tc_k(nz)
    real(r_size), intent(in) :: pp1_tc_k(nz)
    real(r_size), intent(in) :: pp2_tc_k(nz)
    real(r_size), intent(in) :: aap_qsq_k(nz)
    real(r_size), intent(in) :: r_bbp_qsq_k(nz)
    real(r_size), intent(in) :: ccp_qsq_k(nz)
    real(r_size), intent(in) :: ppp_qc_k(nz)
    real(r_size), intent(in) :: pp1_qc_k(nz)
    real(r_size), intent(in) :: pp2_qc_k(nz)
    real(r_size), intent(in) :: aap_cov_k(nz)
    real(r_size), intent(in) :: r_bbp_cov_k(nz)
    real(r_size), intent(in) :: ccp_cov_k(nz)
    real(r_size), intent(in) :: ppp_ct_k(nz)
    real(r_size), intent(in) :: ppp_cq_k(nz)
    real(r_size), intent(in) :: pp1_ct_k(nz)
    real(r_size), intent(in) :: pp1_cq_k(nz)
    real(r_size), intent(in) :: pp2_ct_k(nz)
    real(r_size), intent(in) :: pp2_cq_k(nz)
                     ! matrix elements of ILU decomposed matrix

    real(r_size), intent(out) :: tsq_k(nz)
    real(r_size), intent(out) :: qsq_k(nz)
    real(r_size), intent(out) :: cov_k(nz)
                     ! solution vectors

    integer :: kz
                     ! loop indexes


    tsq_k(1) = qq_tsq_k(1) * r_bbp_tsq_k(1)
    qsq_k(1) = qq_qsq_k(1) * r_bbp_qsq_k(1)

    do kz = 2, nz
      tsq_k(kz) = (qq_tsq_k(kz) - aap_tsq_k(kz) * tsq_k(kz - 1))            &
        * r_bbp_tsq_k(kz)
      qsq_k(kz) = (qq_qsq_k(kz) - aap_qsq_k(kz) * qsq_k(kz - 1))            &
        * r_bbp_qsq_k(kz)
    end do


    kz = 1
    cov_k(kz) = (qq_cov_k(kz) - ppp_ct_k(kz) * tsq_k(kz)                    &
      - pp1_ct_k(kz) * tsq_k(kz + 1)                &
      - pp2_ct_k(kz) * tsq_k(kz + 2)                &
      - ppp_cq_k(kz) * qsq_k(kz)                    &
      - pp1_cq_k(kz) * qsq_k(kz + 1)                &
      - pp2_cq_k(kz) * qsq_k(kz + 2))               &
      * r_bbp_cov_k(kz)

    do kz = 2, nz - 2
      cov_k(kz) = (qq_cov_k(kz) - ppp_ct_k(kz) * tsq_k(kz)                  &
        - pp1_ct_k(kz) * tsq_k(kz + 1)              &
        - pp2_ct_k(kz) * tsq_k(kz + 2)              &
        - ppp_cq_k(kz) * qsq_k(kz)                  &
        - pp1_cq_k(kz) * qsq_k(kz + 1)              &
        - pp2_cq_k(kz) * qsq_k(kz + 2)              &
        - aap_cov_k(kz) * cov_k(kz - 1))            &
        * r_bbp_cov_k(kz)
    end do

    kz = nz - 1
    cov_k(kz) = (qq_cov_k(kz) - ppp_ct_k(kz) * tsq_k(kz)                    &
      - pp1_ct_k(kz) * tsq_k(kz + 1)                &
      - ppp_cq_k(kz) * qsq_k(kz)                    &
      - pp1_cq_k(kz) * qsq_k(kz + 1)                &
      - aap_cov_k(kz) * cov_k(kz - 1))              &
      * r_bbp_cov_k(kz)


    kz = nz
    cov_k(kz) = (qq_cov_k(kz) - ppp_ct_k(kz) * tsq_k(kz)                    &
      - ppp_cq_k(kz) * qsq_k(kz)                    &
      - aap_cov_k(kz) * cov_k(kz - 1))              &
      * r_bbp_cov_k(kz)


    do kz = nz - 1, 1, -1
      cov_k(kz) = cov_k(kz)                                               &
        - ccp_cov_k(kz) * cov_k(kz + 1) * r_bbp_cov_k(kz)
    end do

    kz = nz
    qsq_k(kz) = qsq_k(kz) - (ppp_qc_k(kz) * cov_k(kz)                       &
      + pp1_qc_k(kz) * cov_k(kz - 1)                  &
      + pp2_qc_k(kz) * cov_k(kz - 2))                 &
      * r_bbp_qsq_k(kz)
    tsq_k(kz) = tsq_k(kz) - (ppp_tc_k(kz) * cov_k(kz)                       &
      + pp1_tc_k(kz) * cov_k(kz - 1)                  &
      + pp2_tc_k(kz) * cov_k(kz - 2))                 &
      * r_bbp_tsq_k(kz)

    do kz = nz - 1, 3, -1
      qsq_k(kz) = qsq_k(kz)                                               &
        - (ppp_qc_k(kz) * cov_k(kz) + ccp_qsq_k(kz) * qsq_k(kz + 1)     &
        + pp1_qc_k(kz) * cov_k(kz - 1)                             &
        + pp2_qc_k(kz) * cov_k(kz - 2))                            &
        * r_bbp_qsq_k(kz)
      tsq_k(kz) = tsq_k(kz)                                               &
        - (ppp_tc_k(kz) * cov_k(kz) + ccp_tsq_k(kz) * tsq_k(kz + 1)     &
        + pp1_tc_k(kz) * cov_k(kz - 1)                             &
        + pp2_tc_k(kz) * cov_k(kz - 2))                            &
        * r_bbp_tsq_k(kz)
    end do

    kz = 2
    qsq_k(kz) = qsq_k(kz)                                                 &
      - (ppp_qc_k(kz) * cov_k(kz) + ccp_qsq_k(kz) * qsq_k(kz + 1)       &
      + pp1_qc_k(kz) * cov_k(kz - 1))                              &
      * r_bbp_qsq_k(kz)
    tsq_k(kz) = tsq_k(kz)                                                 &
      - (ppp_tc_k(kz) * cov_k(kz) + ccp_tsq_k(kz) * tsq_k(kz + 1)       &
      + pp1_tc_k(kz) * cov_k(kz - 1))                              &
      * r_bbp_tsq_k(kz)


    kz = 1
    qsq_k(kz) = qsq_k(kz)                                                 &
      - (ppp_qc_k(kz) * cov_k(kz) + ccp_qsq_k(kz) * qsq_k(kz + 1))      &
      * r_bbp_qsq_k(kz)
    tsq_k(kz) = tsq_k(kz)                                                 &
      - (ppp_tc_k(kz) * cov_k(kz) + ccp_tsq_k(kz) * tsq_k(kz + 1))      &
      * r_bbp_tsq_k(kz)

    return

  end subroutine pbl_mym_simeq_ilud2
  !
  subroutine pbl_mym_simeq_bcgstab(                                     &
    max_itr, eps,                                                        &
    qq_tsq_k, qq_qsq_k, qq_cov_k,                                        &
    aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                               &
    aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                               &
    aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                      &
    tend_tsq_k, tend_qsq_k, tend_cov_k, endflag)

    use pp_vardef
    use pbl_grid, only: nz

    implicit none

    ! Intent IN Variables
    integer(4), intent(in) :: max_itr
                          ! the maximum number of iterations

    real(r_size), intent(in) :: eps
                          ! convergence condition

    real(r_size), intent(in) :: qq_tsq_k(nz)
    real(r_size), intent(in) :: qq_qsq_k(nz)
    real(r_size), intent(in) :: qq_cov_k(nz)
    real(r_size), intent(in) :: aa_tsq_k(nz)
    real(r_size), intent(in) :: bb_tsq_k(nz)
    real(r_size), intent(in) :: cc_tsq_k(nz)
    real(r_size), intent(in) :: pp_tc_k(nz)
    real(r_size), intent(in) :: aa_qsq_k(nz)
    real(r_size), intent(in) :: bb_qsq_k(nz)
    real(r_size), intent(in) :: cc_qsq_k(nz)
    real(r_size), intent(in) :: pp_qc_k(nz)
    real(r_size), intent(in) :: aa_cov_k(nz)
    real(r_size), intent(in) :: bb_cov_k(nz)
    real(r_size), intent(in) :: cc_cov_k(nz)
    real(r_size), intent(in) :: pp_ct_k(nz)
    real(r_size), intent(in) :: pp_cq_k(nz)
                          ! matrix elements

    real(r_size), intent(out) :: tend_tsq_k(nz)
    real(r_size), intent(out) :: tend_qsq_k(nz)
    real(r_size), intent(out) :: tend_cov_k(nz)
              ! solved tendencies of tsq, qsq and cov

    integer(4), intent(out) :: endflag
              ! to indicate if converged
              ! positive means proper solution is obtains.
              ! 0: converged
              ! 1: obtained an exact solution (residual = 0)
              ! -1: max_itr iterations were done, but not converged
              ! -2: solution in iterations becomes unexpectedly large,
              !     so gave up

    ! Local variables
    integer(4) :: kz
    integer(4) :: m
                                ! loop indexes
    integer(4) :: nitr
                                ! a number of iterations

    real(r_size) :: norm
                                ! residual norm
    real(r_size) :: r_qq_norm
                                ! reciprocal of the inirial residual norm
    real(r_size) :: err
                                ! norm * r_qq_norm
    real(r_size) :: bet
                                ! beta
    real(r_size) :: alp_num
                                ! numerator of alpha
    real(r_size) :: alp_den
                                ! denominator of alpha
    real(r_size) :: alp
                                ! alpha
    real(r_size) :: omg_num
                                ! numerator of omega
    real(r_size) :: omg_den
                                ! denominator of omega
    real(r_size) :: omg
                                ! omega
    real(r_size) :: max_val
                                ! maximum value of solutions

    ! Vectors used in the BCG algorithm.
    ! See the document
    real(r_size) :: rvec_tsq(nz)
    real(r_size) :: rvec_qsq(nz)
    real(r_size) :: rvec_cov(nz)
    real(r_size) :: r0vec_tsq(nz)
    real(r_size) :: r0vec_qsq(nz)
    real(r_size) :: r0vec_cov(nz)
    real(r_size) :: pvec_tsq(nz)
    real(r_size) :: pvec_qsq(nz)
    real(r_size) :: pvec_cov(nz)
    real(r_size) :: ppvec_tsq(nz)
    real(r_size) :: ppvec_qsq(nz)
    real(r_size) :: ppvec_cov(nz)
    real(r_size) :: vvec_tsq(nz)
    real(r_size) :: vvec_qsq(nz)
    real(r_size) :: vvec_cov(nz)
    real(r_size) :: svec_tsq(nz)
    real(r_size) :: svec_qsq(nz)
    real(r_size) :: svec_cov(nz)
    real(r_size) :: ssvec_tsq(nz)
    real(r_size) :: ssvec_qsq(nz)
    real(r_size) :: ssvec_cov(nz)
    real(r_size) :: tvec_tsq(nz)
    real(r_size) :: tvec_qsq(nz)
    real(r_size) :: tvec_cov(nz)

    ! elements of ILU(2)
    ! the second dimension corresponds to the fill-in level
    real(r_size) :: aap_tsq_k(nz)
    real(r_size) :: r_bbp_tsq_k(nz)
    real(r_size) :: ccp_tsq_k(nz)
    real(r_size) :: aap_qsq_k(nz)
    real(r_size) :: r_bbp_qsq_k(nz)
    real(r_size) :: ccp_qsq_k(nz)
    real(r_size) :: aap_cov_k(nz)
    real(r_size) :: r_bbp_cov_k(nz)
    real(r_size) :: ccp_cov_k(nz)
    real(r_size) :: ppp_tc_k(nz, 0:2)
    real(r_size) :: ppp_qc_k(nz, 0:2)
    real(r_size) :: ppp_ct_k(nz, 0:2)
    real(r_size) :: ppp_cq_k(nz, 0:2)

    call pbl_mym_simeq_ilud2_decmp(                                      &
      aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                             &
      aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                             &
      aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                    &
      aap_tsq_k, r_bbp_tsq_k, ccp_tsq_k,                                 &
      ppp_tc_k(1, 0), ppp_tc_k(1, 1), ppp_tc_k(1, 2),                    &
      aap_qsq_k, r_bbp_qsq_k, ccp_qsq_k,                                 &
      ppp_qc_k(1, 0), ppp_qc_k(1, 1), ppp_qc_k(1, 2),                    &
      aap_cov_k, r_bbp_cov_k, ccp_cov_k,                                 &
      ppp_ct_k(1, 0), ppp_cq_k(1, 0),                                    &
      ppp_ct_k(1, 1), ppp_cq_k(1, 1),                                    &
      ppp_ct_k(1, 2), ppp_cq_k(1, 2))

    r_qq_norm = 0.0
    alp_num = 0.0
    do kz = 1, nz
      ! set the initial values
      tend_tsq_k(kz) = 0.0
      tend_qsq_k(kz) = 0.0
      tend_cov_k(kz) = 0.0

      ! rvec is a residual vector
      rvec_tsq(kz) = qq_tsq_k(kz)
      rvec_qsq(kz) = qq_qsq_k(kz)
      rvec_cov(kz) = qq_cov_k(kz)

      r0vec_tsq(kz) = rvec_tsq(kz)
      r0vec_qsq(kz) = rvec_qsq(kz)
      r0vec_cov(kz) = rvec_cov(kz)

      pvec_tsq(kz) = rvec_tsq(kz)
      pvec_qsq(kz) = rvec_qsq(kz)
      pvec_cov(kz) = rvec_cov(kz)

      alp_num = alp_num + r0vec_tsq(kz) * rvec_tsq(kz)                      &
        + r0vec_qsq(kz) * rvec_qsq(kz)                      &
        + r0vec_cov(kz) * rvec_cov(kz)

      r_qq_norm = r_qq_norm +  qq_tsq_k(kz) * qq_tsq_k(kz)                  &
        +  qq_qsq_k(kz) * qq_qsq_k(kz)                  &
        +  qq_cov_k(kz) * qq_cov_k(kz)

    end do

    if (r_qq_norm == 0.0) then
      r_qq_norm = 0.0
      endflag = 2
      nitr = 0
    else
      r_qq_norm = 1.0 / r_qq_norm
      endflag = -1
      nitr = max_itr
    end if

    do m = 1, nitr
      call pbl_mym_simeq_ilud2(                                         &
        pvec_tsq, pvec_qsq, pvec_cov,                                    &
        aap_tsq_k, r_bbp_tsq_k, ccp_tsq_k,                               &
        ppp_tc_k(1, 0), ppp_tc_k(1, 1), ppp_tc_k(1, 2),                  &
        aap_qsq_k, r_bbp_qsq_k, ccp_qsq_k,                               &
        ppp_qc_k(1, 0), ppp_qc_k(1, 1), ppp_qc_k(1, 2),                  &
        aap_cov_k, r_bbp_cov_k, ccp_cov_k,                               &
        ppp_ct_k(1, 0), ppp_cq_k(1, 0),                                  &
        ppp_ct_k(1, 1), ppp_cq_k(1, 1),                                  &
        ppp_ct_k(1, 2), ppp_cq_k(1, 2),                                  &
        ppvec_tsq, ppvec_qsq, ppvec_cov)

      ! v = A pp
      call pbl_mym_simeq_matrix_prod(                                         &
        aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                           &
        aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                           &
        aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                  &
        ppvec_tsq, ppvec_qsq, ppvec_cov,                                 &
        vvec_tsq, vvec_qsq, vvec_cov)


      alp_den = 0.0
      do kz = 1, nz
        alp_den = alp_den + r0vec_tsq(kz) * vvec_tsq(kz)                    &
          + r0vec_qsq(kz) * vvec_qsq(kz)                                   &
          + r0vec_cov(kz) * vvec_cov(kz)
      end do

      if (alp_den == 0.0) then
        endflag = 1
      else
        alp = alp_num / alp_den

        do kz = 1, nz
          svec_tsq(kz) = rvec_tsq(kz) - alp * vvec_tsq(kz)
          svec_qsq(kz) = rvec_qsq(kz) - alp * vvec_qsq(kz)
          svec_cov(kz) = rvec_cov(kz) - alp * vvec_cov(kz)
        end do

        call pbl_mym_simeq_ilud2(                                       &
          svec_tsq, svec_qsq, svec_cov,                                  &
          aap_tsq_k, r_bbp_tsq_k, ccp_tsq_k,                             &
          ppp_tc_k(1, 0), ppp_tc_k(1, 1), ppp_tc_k(1, 2),                &
          aap_qsq_k, r_bbp_qsq_k, ccp_qsq_k,                             &
          ppp_qc_k(1, 0), ppp_qc_k(1, 1), ppp_qc_k(1, 2),                &
          aap_cov_k, r_bbp_cov_k, ccp_cov_k,                             &
          ppp_ct_k(1, 0), ppp_cq_k(1, 0),                                &
          ppp_ct_k(1, 1), ppp_cq_k(1, 1),                                &
          ppp_ct_k(1, 2), ppp_cq_k(1, 2),                                &
          ssvec_tsq, ssvec_qsq, ssvec_cov)

        ! t = A ss
        call pbl_mym_simeq_matrix_prod(                                       &
          aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                         &
          aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                         &
          aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                &
          ssvec_tsq, ssvec_qsq, ssvec_cov,                               &
          tvec_tsq, tvec_qsq, tvec_cov)

        omg_num = 0.0
        omg_den = 0.0
        do kz = 1, nz
          omg_num = omg_num + tvec_tsq(kz) * svec_tsq(kz)                   &
            + tvec_qsq(kz) * svec_qsq(kz)                   &
            + tvec_cov(kz) * svec_cov(kz)
          omg_den = omg_den + tvec_tsq(kz) * tvec_tsq(kz)                   &
            + tvec_qsq(kz) * tvec_qsq(kz)                   &
            + tvec_cov(kz) * tvec_cov(kz)
        end do

        omg = omg_num / omg_den

        alp_den = alp_num

        alp_num = 0.0
        norm = 0.0
        max_val = 0.0
        do kz = 1, nz
          tend_tsq_k(kz) = tend_tsq_k(kz) &
            &       + alp * ppvec_tsq(kz) + omg * ssvec_tsq(kz)
          tend_qsq_k(kz) = tend_qsq_k(kz) &
            &       + alp * ppvec_qsq(kz) + omg * ssvec_qsq(kz)
          tend_cov_k(kz) = tend_cov_k(kz) &
            &       + alp * ppvec_cov(kz) + omg * ssvec_cov(kz)
          rvec_tsq(kz) = svec_tsq(kz) - omg * tvec_tsq(kz)
          rvec_qsq(kz) = svec_qsq(kz) - omg * tvec_qsq(kz)
          rvec_cov(kz) = svec_cov(kz) - omg * tvec_cov(kz)

          alp_num = alp_num + r0vec_tsq(kz) * rvec_tsq(kz)                  &
            + r0vec_qsq(kz) * rvec_qsq(kz)                  &
            + r0vec_cov(kz) * rvec_cov(kz)
          norm = norm + rvec_tsq(kz) * rvec_tsq(kz)                         &
            + rvec_qsq(kz) * rvec_qsq(kz)                         &
            + rvec_cov(kz) * rvec_cov(kz)

          max_val = max(max_val, abs(tend_tsq_k(kz)),    &
            abs(tend_qsq_k(kz)),                           &
            abs(tend_cov_k(kz)))
        end do
        err = sqrt(norm * r_qq_norm)

        if (err >= eps .and. m < 30 .and. max_val < 1.0e10) then
          ! continue to the next step
        else if (max_val > 100.0) then
          ! Unexpectedly huge
          endflag = -2
        else if (err < eps) then
          ! Converged
          endflag = 0
        end if
      end if
      if (endflag /= -1) then
        exit
      else
        bet = alp_num * alp / (alp_den * omg)
        do kz = 1, nz
          pvec_tsq(kz) = rvec_tsq(kz)                                       &
            + bet * (pvec_tsq(kz) - omg * vvec_tsq(kz))
          pvec_qsq(kz) = rvec_qsq(kz)                                       &
            + bet * (pvec_qsq(kz) - omg * vvec_qsq(kz))
          pvec_cov(kz) = rvec_cov(kz)                                       &
            + bet * (pvec_cov(kz) - omg * vvec_cov(kz))
        end do
      end if
    end do    ! loop m = 1, max_itr

    return

  end subroutine pbl_mym_simeq_bcgstab
  !
  subroutine pbl_mym_simeq_matrix_prod(                               &
    aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                            &
    aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                            &
    aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                   &
    x_tsq_k, x_qsq_k, x_cov_k,                                        &
    y_tsq_k, y_qsq_k, y_cov_k)

    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    real(r_size), intent(in) :: aa_tsq_k(nz)
    real(r_size), intent(in) :: bb_tsq_k(nz)
    real(r_size), intent(in) :: cc_tsq_k(nz)
    real(r_size), intent(in) :: pp_tc_k(nz)
    real(r_size), intent(in) :: aa_qsq_k(nz)
    real(r_size), intent(in) :: bb_qsq_k(nz)
    real(r_size), intent(in) :: cc_qsq_k(nz)
    real(r_size), intent(in) :: pp_qc_k(nz)
    real(r_size), intent(in) :: aa_cov_k(nz)
    real(r_size), intent(in) :: bb_cov_k(nz)
    real(r_size), intent(in) ::  cc_cov_k(nz)
    real(r_size), intent(in) :: pp_ct_k(nz)
    real(r_size), intent(in) :: pp_cq_k(nz)
                                ! vector elements
    real(r_size), intent(in) :: x_tsq_k(nz)
    real(r_size), intent(in) :: x_qsq_k(nz)
    real(r_size), intent(in) :: x_cov_k(nz)

    real(r_size), intent(out) :: y_tsq_k(nz)
    real(r_size), intent(out) :: y_qsq_k(nz)
    real(r_size), intent(out) :: y_cov_k(nz)
                                ! vector elements of products (answers)
    integer(4) :: kz

    ! y = A * x
    kz = 1
    y_tsq_k(kz) =      bb_tsq_k(kz) * x_tsq_k(kz)                            &
      + cc_tsq_k(kz) * x_tsq_k(kz + 1)                        &
      + pp_tc_k(kz) * x_cov_k(kz)

    y_qsq_k(kz) =      bb_qsq_k(kz) * x_qsq_k(kz)                            &
      + cc_qsq_k(kz) * x_qsq_k(kz + 1)                        &
      + pp_qc_k(kz) * x_cov_k(kz)

    y_cov_k(kz) =     bb_cov_k(kz) * x_cov_k(kz)                             &
      + cc_cov_k(kz) * x_cov_k(kz + 1)                        &
      + pp_ct_k(kz) * x_tsq_k(kz)                             &
      + pp_cq_k(kz) * x_qsq_k(kz)

    do kz = 2, nz - 1
      y_tsq_k(kz) = aa_tsq_k(kz) * x_tsq_k(kz - 1)                           &
        + bb_tsq_k(kz) * x_tsq_k(kz)                            &
        + cc_tsq_k(kz) * x_tsq_k(kz + 1)                        &
        + pp_tc_k(kz) * x_cov_k(kz)

      y_qsq_k(kz) = aa_qsq_k(kz) * x_qsq_k(kz - 1)                           &
        + bb_qsq_k(kz) * x_qsq_k(kz)                            &
        + cc_qsq_k(kz) * x_qsq_k(kz + 1)                        &
        + pp_qc_k(kz) * x_cov_k(kz)

      y_cov_k(kz) = aa_cov_k(kz) * x_cov_k(kz - 1)                           &
        + bb_cov_k(kz) * x_cov_k(kz)                            &
        + cc_cov_k(kz) * x_cov_k(kz + 1)                        &
        + pp_ct_k(kz) * x_tsq_k(kz)                             &
        + pp_cq_k(kz) * x_qsq_k(kz)

    end do

    kz = nz
    y_tsq_k(kz) = aa_tsq_k(kz) * x_tsq_k(kz - 1)                             &
      + bb_tsq_k(kz) * x_tsq_k(kz)                            &
      + pp_tc_k(kz) * x_cov_k(kz)

    y_qsq_k(kz) = aa_qsq_k(kz) * x_qsq_k(kz - 1)                             &
      + bb_qsq_k(kz) * x_qsq_k(kz)                            &
      + pp_qc_k(kz) * x_cov_k(kz)

    y_cov_k(kz) = aa_cov_k(kz) * x_cov_k(kz - 1)                             &
      + bb_cov_k(kz) * x_cov_k(kz)                            &
      + pp_ct_k(kz) * x_tsq_k(kz)                             &
      + pp_cq_k(kz) * x_qsq_k(kz)

    return

  end subroutine pbl_mym_simeq_matrix_prod

  !
  subroutine pbl_mym_simeq_ilud2_decmp(                                       &
    aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                            &
    aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                            &
    aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                   &
    aap_tsq_k, r_bbp_tsq_k, ccp_tsq_k,                                &
    ppp_tc_k, pp1_tc_k,  pp2_tc_k,                                    &
    aap_qsq_k, r_bbp_qsq_k, ccp_qsq_k,                                &
    ppp_qc_k, pp1_qc_k,  pp2_qc_k,                                    &
    aap_cov_k, r_bbp_cov_k, ccp_cov_k,                                &
    ppp_ct_k, ppp_cq_k, pp1_ct_k, pp1_cq_k, pp2_ct_k, pp2_cq_k)

    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    ! intent in variables
    real(r_size), intent(in) :: aa_tsq_k(nz)
    real(r_size), intent(in) :: bb_tsq_k(nz)
    real(r_size), intent(in) :: cc_tsq_k(nz)
    real(r_size), intent(in) :: pp_tc_k(nz)
    real(r_size), intent(in) :: aa_qsq_k(nz)
    real(r_size), intent(in) :: bb_qsq_k(nz)
    real(r_size), intent(in) :: cc_qsq_k(nz)
    real(r_size), intent(in) :: pp_qc_k(nz)
    real(r_size), intent(in) :: aa_cov_k(nz)
    real(r_size), intent(in) :: bb_cov_k(nz)
    real(r_size), intent(in) :: cc_cov_k(nz)
    real(r_size), intent(in) :: pp_ct_k(nz)
    real(r_size), intent(in) :: pp_cq_k(nz)
                         ! matrix elements

    real(r_size), intent(out) :: aap_tsq_k(nz)
    real(r_size), intent(out) :: r_bbp_tsq_k(nz)
    real(r_size), intent(out) :: ccp_tsq_k(nz)
    real(r_size), intent(out) :: ppp_tc_k(nz)
    real(r_size), intent(out) :: pp1_tc_k(nz)
    real(r_size), intent(out) :: pp2_tc_k(nz)
    real(r_size), intent(out) :: aap_qsq_k(nz)
    real(r_size), intent(out) :: r_bbp_qsq_k(nz)
    real(r_size), intent(out) :: ccp_qsq_k(nz)
    real(r_size), intent(out) :: ppp_qc_k(nz)
    real(r_size), intent(out) :: pp1_qc_k(nz)
    real(r_size), intent(out) :: pp2_qc_k(nz)
    real(r_size), intent(out) :: aap_cov_k(nz)
    real(r_size), intent(out) :: r_bbp_cov_k(nz)
    real(r_size), intent(out) :: ccp_cov_k(nz)
    real(r_size), intent(out) :: ppp_ct_k(nz)
    real(r_size), intent(out) :: ppp_cq_k(nz)
    real(r_size), intent(out) :: pp1_ct_k(nz)
    real(r_size), intent(out) :: pp1_cq_k(nz)
    real(r_size), intent(out) :: pp2_ct_k(nz)
    real(r_size), intent(out) :: pp2_cq_k(nz)
                    ! matrix elements of the ILU decomposed matrix

    integer(4) :: kz
                                       ! loop indexes

    aap_tsq_k(1) = aa_tsq_k(1)
    r_bbp_tsq_k(1) = 1.0 / bb_tsq_k(1)
    ccp_tsq_k(1) = cc_tsq_k(1)

    aap_qsq_k(1) = aa_qsq_k(1)
    r_bbp_qsq_k(1) = 1.0 / bb_qsq_k(1)
    ccp_qsq_k(1) = cc_qsq_k(1)

    ppp_tc_k(1) = pp_tc_k(1)
    ppp_qc_k(1) = pp_qc_k(1)

    pp1_tc_k(1) = 0.0
    pp1_qc_k(1) = 0.0
    pp2_tc_k(1) = 0.0
    pp2_qc_k(1) = 0.0


    do kz = 2, nz
      aap_tsq_k(kz) = aa_tsq_k(kz)
      r_bbp_tsq_k(kz) = 1.0 / (bb_tsq_k(kz)                                 &
        - aap_tsq_k(kz) * ccp_tsq_k(kz - 1) * r_bbp_tsq_k(kz - 1))
      ccp_tsq_k(kz) = cc_tsq_k(kz)

      aap_qsq_k(kz) = aa_qsq_k(kz)
      r_bbp_qsq_k(kz) = 1.0 / (bb_qsq_k(kz)                                 &
        - aap_qsq_k(kz) * ccp_qsq_k(kz - 1) * r_bbp_qsq_k(kz - 1))
      ccp_qsq_k(kz) = cc_qsq_k(kz)

      ppp_tc_k(kz) = pp_tc_k(kz)
      ppp_qc_k(kz) = pp_qc_k(kz)

      pp1_tc_k(kz) = - aap_tsq_k(kz) * r_bbp_tsq_k(kz - 1) * ppp_tc_k(kz - 1)
      pp1_qc_k(kz) = - aap_qsq_k(kz) * r_bbp_qsq_k(kz - 1) * ppp_qc_k(kz - 1)

      pp2_tc_k(kz) = - aap_tsq_k(kz) * r_bbp_tsq_k(kz - 1) * pp1_tc_k(kz - 1)
      pp2_qc_k(kz) = - aap_qsq_k(kz) * r_bbp_qsq_k(kz - 1) * pp1_qc_k(kz - 1)

    end do

    kz = 1

    ppp_ct_k(kz) = pp_ct_k(kz)
    ppp_cq_k(kz) = pp_cq_k(kz)

    pp1_ct_k(kz) = -ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ccp_tsq_k(kz)
    pp1_cq_k(kz) = -ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ccp_qsq_k(kz)

    pp2_ct_k(kz) = - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * ccp_tsq_k(kz + 1)
    pp2_cq_k(kz) = - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * ccp_qsq_k(kz + 1)

    aap_cov_k(kz) = 0.0

    r_bbp_cov_k(kz) = 1.0 / (                                              &
      bb_cov_k(kz)                                           &
      - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ppp_tc_k(kz)          &
      - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * pp1_tc_k(kz + 1)  &
      - pp2_ct_k(kz) * r_bbp_tsq_k(kz + 2) * pp2_tc_k(kz + 2)  &
      - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ppp_qc_k(kz)          &
      - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * pp1_qc_k(kz + 1)  &
      - pp2_cq_k(kz) * r_bbp_qsq_k(kz + 2) * pp2_qc_k(kz + 2))

    ccp_cov_k(kz) = cc_cov_k(kz)                                            &
      - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * ppp_tc_k(kz + 1)   &
      - pp2_ct_k(kz) * r_bbp_tsq_k(kz + 2) * pp1_tc_k(kz + 2)   &
      - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * ppp_qc_k(kz + 1)   &
      - pp2_cq_k(kz) * r_bbp_qsq_k(kz + 2) * pp1_qc_k(kz + 2)

    do kz = 2, nz - 2
      ppp_ct_k(kz) = pp_ct_k(kz)
      ppp_cq_k(kz) = pp_cq_k(kz)

      pp1_ct_k(kz) = -ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ccp_tsq_k(kz)
      pp1_cq_k(kz) = -ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ccp_qsq_k(kz)

      pp2_ct_k(kz) = - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * ccp_tsq_k(kz + 1)
      pp2_cq_k(kz) = - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * ccp_qsq_k(kz + 1)

      aap_cov_k(kz) = aa_cov_k(kz)                                          &
        - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * pp1_tc_k(kz)         &
        - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * pp2_tc_k(kz + 1) &
        - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * pp1_qc_k(kz)         &
        - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * pp2_qc_k(kz + 1)

      r_bbp_cov_k(kz) = 1.0 / (                                            &
        bb_cov_k(kz)                                            &
        - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ppp_tc_k(kz)           &
        - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * pp1_tc_k(kz + 1)   &
        - pp2_ct_k(kz) * r_bbp_tsq_k(kz + 2) * pp2_tc_k(kz + 2)   &
        - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ppp_qc_k(kz)           &
        - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * pp1_qc_k(kz + 1)   &
        - pp2_cq_k(kz) * r_bbp_qsq_k(kz + 2) * pp2_qc_k(kz + 2)   &
        - aap_cov_k(kz) * r_bbp_cov_k(kz - 1) * ccp_cov_k(kz - 1))

      ccp_cov_k(kz) = cc_cov_k(kz)                                          &
        - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * ppp_tc_k(kz + 1) &
        - pp2_ct_k(kz) * r_bbp_tsq_k(kz + 2) * pp1_tc_k(kz + 2) &
        - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * ppp_qc_k(kz + 1) &
        - pp2_cq_k(kz) * r_bbp_qsq_k(kz + 2) * pp1_qc_k(kz + 2)
    end do

    kz = nz - 1

    ppp_ct_k(kz) = pp_ct_k(kz)
    ppp_cq_k(kz) = pp_cq_k(kz)

    pp1_ct_k(kz) = -ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ccp_tsq_k(kz)
    pp1_cq_k(kz) = -ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ccp_qsq_k(kz)
    pp2_ct_k(kz) = 0.0
    pp2_cq_k(kz) = 0.0

    aap_cov_k(kz) = aa_cov_k(kz)                                            &
      - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * pp1_tc_k(kz)           &
      - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * pp2_tc_k(kz + 1)   &
      - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * pp1_qc_k(kz)           &
      - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * pp2_qc_k(kz + 1)

    r_bbp_cov_k(kz) = 1.0 / (                                              &
      bb_cov_k(kz)                                            &
      - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ppp_tc_k(kz)           &
      - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * pp1_tc_k(kz + 1)   &
      - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ppp_qc_k(kz)           &
      - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * pp1_qc_k(kz + 1)   &
      - aap_cov_k(kz) * r_bbp_cov_k(kz - 1) * ccp_cov_k(kz - 1))

    ccp_cov_k(kz) = cc_cov_k(kz)                                            &
      - pp1_ct_k(kz) * r_bbp_tsq_k(kz + 1) * ppp_tc_k(kz + 1)   &
      - pp1_cq_k(kz) * r_bbp_qsq_k(kz + 1) * ppp_qc_k(kz + 1)


    kz = nz

    ppp_ct_k(kz) = pp_ct_k(kz)
    ppp_cq_k(kz) = pp_cq_k(kz)

    pp1_ct_k(kz) = 0.0
    pp1_cq_k(kz) = 0.0
    pp2_ct_k(kz) = 0.0
    pp2_cq_k(kz) = 0.0

    aap_cov_k(kz) = aa_cov_k(kz)                                            &
      - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * pp1_tc_k(kz)           &
      - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * pp1_qc_k(kz)
    r_bbp_cov_k(kz) = 1.0 / (                                              &
      bb_cov_k(kz)                                            &
      - ppp_ct_k(kz) * r_bbp_tsq_k(kz) * ppp_tc_k(kz)           &
      - ppp_cq_k(kz) * r_bbp_qsq_k(kz) * ppp_qc_k(kz)           &
      - aap_cov_k(kz) * r_bbp_cov_k(kz - 1) * ccp_cov_k(kz - 1))
    ccp_cov_k(kz) = 0.0

    return


  end subroutine pbl_mym_simeq_ilud2_decmp
!
  subroutine pbl_mym_simeq_lud(                                         &
    qq_tsq_k, qq_qsq_k, qq_cov_k,                                     &
    aa_tsq_k, bb_tsq_k, cc_tsq_k, pp_tc_k,                            &
    aa_qsq_k, bb_qsq_k, cc_qsq_k, pp_qc_k,                            &
    aa_cov_k, bb_cov_k, cc_cov_k, pp_ct_k, pp_cq_k,                   &
    tsq_k, qsq_k, cov_k)

    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    ! intent in variables
    real(r_size), intent(in) :: qq_tsq_k(nz)
    real(r_size), intent(in) :: qq_qsq_k(nz)
    real(r_size), intent(in) :: qq_cov_k(nz)
    real(r_size), intent(in) :: aa_tsq_k(nz)
    real(r_size), intent(in) :: bb_tsq_k(nz)
    real(r_size), intent(in) :: cc_tsq_k(nz)
    real(r_size), intent(in) :: pp_tc_k(nz)
    real(r_size), intent(in) :: aa_qsq_k(nz)
    real(r_size), intent(in) :: bb_qsq_k(nz)
    real(r_size), intent(in) :: cc_qsq_k(nz)
    real(r_size), intent(in) :: pp_qc_k(nz)
    real(r_size), intent(in) :: aa_cov_k(nz)
    real(r_size), intent(in) :: bb_cov_k(nz)
    real(r_size), intent(in) :: cc_cov_k(nz)
    real(r_size), intent(in) :: pp_ct_k(nz)
    real(r_size), intent(in) :: pp_cq_k(nz)
                                   ! matrix elements

    real(r_size), intent(out) :: tsq_k(nz)
    real(r_size), intent(out) :: qsq_k(nz)
    real(r_size), intent(out) :: cov_k(nz)
                                  ! solved tsq, qsq and cov

    integer(4) :: kz
    integer(4) :: kk
    integer(4) :: l
    integer(4) :: m
    integer(4) :: n
                                ! loop indexes
    integer(4) :: kpiv
                                ! index of a pivot

    real(r_size) :: wk
                                ! a work variable

    real(r_size) ::  amat(3 * nz, 3 * nz)
                                ! coefficient matrix
    real(r_size) :: bvec(3 * nz)
                                ! vector in the right hand side



    amat(:, :) = 0.0

    do kz = 1, nz
      amat(kz, kz) = bb_tsq_k(kz)
      amat(nz + kz, nz + kz) = bb_qsq_k(kz)
      amat(2 * nz + kz, 2 * nz + kz) = bb_cov_k(kz)
      bvec(kz)    = qq_tsq_k(kz)
      bvec(nz + kz) = qq_qsq_k(kz)
      bvec(2 * nz + kz) = qq_cov_k(kz)
    end do

    do kz = 2, nz
      amat(kz, kz-1) = aa_tsq_k(kz)
      amat(nz + kz, nz + kz - 1) = aa_qsq_k(kz)
      amat(2 * nz + kz, 2 * nz + kz - 1) = aa_cov_k(kz)
    end do

    do kz = 1, nz - 1
      amat(kz, kz+1) = cc_tsq_k(kz)
      amat(nz + kz, nz + kz + 1) = cc_qsq_k(kz)
      amat(2 * nz + kz, 2 * nz + kz + 1) = cc_cov_k(kz)
    end do

    do kz = 1, nz
      amat(kz, 2 * nz + kz) = pp_tc_k(kz)
      amat(nz + kz, 2 * nz + kz) = pp_qc_k(kz)
      amat(2 * nz + kz, kz) = pp_ct_k(kz)
      amat(2 * nz + kz, nz + kz) = pp_cq_k(kz)
    end do


    n = 3 * nz
    ! main part
    do kk = 1, n
      kpiv = kk
      wk  = abs(amat(kk, kk))
      do l = kk + 1, n
        if(abs(amat(l, kk)) > wk) then
          kpiv = l
          wk  = abs(amat(l, kk))
        end if
      end do

      if(kpiv /= kk) then
        do m = 1, n
          wk       = amat(kk, m)
          amat(kk, m)    = amat(kpiv, m)
          amat(kpiv, m) = wk
        end do
        wk   = bvec(kk)
        bvec(kk) = bvec(kpiv)
        bvec(kpiv) = wk
      end if

      amat(kk, kk) = 1.0 / amat(kk, kk)

      do l = kk + 1, n
        amat(l, kk) = amat(l, kk) * amat(kk, kk)
      end do


      do m = kk + 1, n
        do l = kk + 1, n
          amat(l, m) = amat(l, m) - amat(kk, m) * amat(l, kk)
        end do
      end do
    end do  ! loop kk = 1, n

    do m = 1, n - 1
      do l = m + 1, n
        bvec(l) = bvec(l) - bvec(m) * amat(l, m)
      end do
    end do

    do m = n, 1, -1
      bvec(m) = bvec(m) * amat(m, m)
      do l = 1, m - 1
        bvec(l) = bvec(l) - amat(l, m) * bvec(m)
      end do
    end do

    do kz = 1, nz
      tsq_k(kz) = bvec(kz)
      qsq_k(kz) = bvec(nz + kz)
      cov_k(kz) = bvec(2 * nz + kz)
    end do
    return

  end subroutine pbl_mym_simeq_lud
!
  subroutine pbl_mym_diff_matcoef(coef, &
    &  dfm, f2h_m, f2h_p, rdz_f, rdz_h, &
    &  aa, bb, cc)
    use pp_vardef
    use pbl_grid, only: nz
    implicit none

    real(r_size), intent(in) :: coef
    real(r_size), intent(in) :: dfm(nz)
    real(r_size), intent(in) :: f2h_m(nz-1)
    real(r_size), intent(in) :: f2h_p(nz-1)
    real(r_size), intent(in) :: rdz_f(nz)
    real(r_size), intent(in) :: rdz_h(nz-1)

    real(r_size), intent(out) :: aa(nz)
    real(r_size), intent(out) :: bb(nz)
    real(r_size), intent(out) :: cc(nz)


    integer(4) :: kz
    real(r_size) :: km_m1, km_p1

    ! Calculate aa, bb, cc
    kz = 1
    km_p1 = coef * ( &
      & f2h_p(kz) * dfm(kz + 1) + f2h_m(kz) * dfm(kz))

    cc(kz) = km_p1 * rdz_f(kz) * rdz_h(kz)
    aa(kz) = 0.0
    bb(kz) = - aa(kz) - cc(kz)

    do kz = 2, nz - 1
      km_p1 = coef * ( &
        & f2h_p(kz) * dfm(kz + 1) + f2h_m(kz) * dfm(kz))
      km_m1 = coef * ( &
        & f2h_p(kz - 1) * dfm(kz) + f2h_m(kz - 1) * dfm(kz - 1))

      cc(kz) = km_p1 * rdz_f(kz) * rdz_h(kz)
      aa(kz) = km_m1 * rdz_f(kz) * rdz_h(kz - 1)
      bb(kz) = -aa(kz) - cc(kz)
    end do

    kz = nz
    km_m1 = coef * ( &
      & f2h_p(kz - 1) * dfm(kz) + f2h_m(kz - 1) * dfm(kz - 1))
    km_p1 = 0.0

    cc(kz) = 0.0
    aa(kz) = km_m1 * rdz_f(kz) * rdz_h(kz - 1)
    bb(kz) = -aa(kz) - cc(kz)
    return

  end subroutine pbl_mym_diff_matcoef

end module pbl_mym_simeq




