########################################################################
# ISPACK FORTRAN SUBROUTINE LIBRARY FOR SCIENTIFIC COMPUTING
# Copyright (C) 1998--2016 Keiichi Ishioka <ishioka@gfd-dennou.org>
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License as published by the Free Software Foundation; either
# version 2.1 of the License, or (at your option) any later version.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301 USA.
########################################################################
.text
.globl lvogzs_
.globl _lvogzs_	
lvogzs_:
_lvogzs_:	
	movl   (%rdi), %edi  # : JB  rdi 

	# JB: rdi
        # AC: rsi	
	# SD: rdx	
	# Q: rcx

	shlq $6,%rdi # JB*8*8  rsi 
	movq %rdi,%r8	
	addq %r8,%r8
	addq %r8,%r8
	addq %rdi,%r8	# r8  JB*8*8*5 
	
	addq %rcx,%r8

	vzeroall
	vbroadcastsd   (%rsi),%zmm8 # AC5 
	vbroadcastsd  8(%rsi),%zmm9 # AC6 
	vbroadcastsd 16(%rsi),%zmm10 # AC7 
	vbroadcastsd 24(%rsi),%zmm11 # AC8 

	movq %rcx,%rax

L00:
	vmovapd   (%rax),%zmm14 # X2
	vmovapd   %zmm14,%zmm15 # X2		
	vmovapd 64(%rax),%zmm12 # Q1
	vmovapd 128(%rax),%zmm13 # Q2

	vfmadd213pd %zmm9,%zmm8,%zmm15 # zmm15 = zmm8 * zmm15 + zmm9 	
	vfmadd213pd %zmm12,%zmm13,%zmm15 # zmm15 = zmm13 * zmm15 + zmm12
	vmovapd %zmm15,64(%rax) # Q1
	
	vfmadd213pd %zmm11,%zmm10,%zmm14 # zmm14 = zmm10 * zmm14 + zmm11
	vfmadd213pd %zmm13,%zmm15,%zmm14 # zmm14 = zmm15 * zmm14 + zmm13
	vmovapd %zmm14,128(%rax) # Q2
	
	vmovapd 192(%rax),%zmm14 # G1R
	vfmadd231pd %zmm12,%zmm14,%zmm1 # zmm1 = zmm12 * zmm14 + zmm1
	vfmadd231pd %zmm13,%zmm14,%zmm3 # zmm3 = zmm13 * zmm14 + zmm3
	
	vmovapd 256(%rax),%zmm14 # G2R	
	vfmadd231pd %zmm12,%zmm14,%zmm0 # zmm0 = zmm12 * zmm14 + zmm0
	vfmadd231pd %zmm13,%zmm14,%zmm2 # zmm2 = zmm13 * zmm14 + zmm2
	
	addq $320,%rax
	cmpq %rax,%r8
	jne L00

	vmovupd %zmm0,-64(%rsp)
	fldl (%rdx)
        faddl -64(%rsp)
        faddl -56(%rsp)		
        faddl -48(%rsp)
        faddl -40(%rsp)				
        faddl -32(%rsp)
	faddl -24(%rsp)
	faddl -16(%rsp)
        faddl -8(%rsp)			
	fstpl (%rdx)
	
	vmovupd %zmm1,-64(%rsp)
	fldl 8(%rdx)
        faddl -64(%rsp)
        faddl -56(%rsp)		
        faddl -48(%rsp)
        faddl -40(%rsp)				
        faddl -32(%rsp)
	faddl -24(%rsp)
	faddl -16(%rsp)
        faddl -8(%rsp)			
	fstpl 8(%rdx)
	
	vmovupd %zmm2,-64(%rsp)
	fldl 16(%rdx)	
        faddl -64(%rsp)
        faddl -56(%rsp)		
        faddl -48(%rsp)
        faddl -40(%rsp)				
        faddl -32(%rsp)
	faddl -24(%rsp)
	faddl -16(%rsp)
        faddl -8(%rsp)			
	fstpl 16(%rdx)
	
	vmovupd %zmm3,-64(%rsp)
	fldl 24(%rdx)
        faddl -64(%rsp)
        faddl -56(%rsp)		
        faddl -48(%rsp)
        faddl -40(%rsp)				
        faddl -32(%rsp)
	faddl -24(%rsp)
	faddl -16(%rsp)
        faddl -8(%rsp)			
	fstpl 24(%rdx)

	ret
