/* ---------------------------------------------------------------------
 *
 * -- Automatically Tuned Linear Algebra Software (ATLAS)
 *    (C) Copyright 1999 All Rights Reserved
 *
 * -- ATLAS routine -- Version 2.0 -- December 25, 1999
 *
 * -- Suggestions,  comments,  bugs reports should be sent to the follo-
 *    wing e-mail address: atlas@cs.utk.edu
 *
 *  Author         : Antoine P. Petitet
 * University of Tennessee - Innovative Computing Laboratory
 * Knoxville TN, 37996-1301, USA.
 *
 * ---------------------------------------------------------------------
 *
 * -- Copyright notice and Licensing terms:
 *
 * Redistribution  and  use in  source and binary forms, with or without
 * modification, are  permitted provided  that the following  conditions
 * are met:
 *
 * 1) Redistributions  of  source  code  must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2) Redistributions in binary form must reproduce  the above copyright
 *    notice,  this list of  conditions and the  following disclaimer in
 *    the documentation and/or other materials provided with the distri-
 *    bution.
 * 3) All advertising materials mentioning features or use of this soft-
 *    ware must display the folowing acknowledgement:
 *    This product includes software developed by the ATLAS group of the
 *    University of Tennesee, Knoxville and its contributors.
 * 4) The names of the  University of Tennessee,  Knoxville,  the  ATLAS
 *    group, or the names of its contributors may not be used to endorse
 *    or  promote products derived  from  this software without specific
 *    prior written permission.
 *
 * -- Disclaimer:
 *
 * The University of Tennessee, Knoxville,  the ATLAS group,  or the au-
 * thors make  no representations about the suitability of this software
 * for any purpose.  This software is provided ``as is'' without express
 * or implied warranty.
 *
 * ---------------------------------------------------------------------
 */
/*
 * Include files
 */
#include "atlas_refmisc.h"
#include "atlas_reflevel2.h"

void ATL_zreftbmv
(
   const enum ATLAS_UPLO      UPLO,
   const enum ATLAS_TRANS     TRANS,
   const enum ATLAS_DIAG      DIAG,
   const int                  N,
   const int                  K,
   const double               * A,
   const int                  LDA,
   double                     * X,
   const int                  INCX
)
{
/*
 * Purpose
 * =======
 *
 * ATL_zreftbmv performs one of the matrix-vector operations
 *
 *    x := A * x,   or   x := conjg( A  ) * x,   or
 *
 *    x := A'* x,   or   x := conjg( A' ) * x,
 *
 * where x is an n-element vector and  A is an n by n unit, or non-unit,
 * upper or lower triangular band matrix, with ( k + 1 ) diagonals.
 *
 * Arguments
 * =========
 *
 * UPLO    (input)                       const enum ATLAS_UPLO
 *         On entry, UPLO  specifies whether  the  matrix is an upper or
 *         lower triangular matrix as follows:
 *
 *             UPLO = AtlasUpper   A is an upper triangular matrix.
 *
 *             UPLO = AtlasLower   A is a lower triangular matrix.
 *
 *         Unchanged on exit.
 *
 * TRANS   (input)                       const enum ATLAS_TRANS
 *         On entry,  TRANS  specifies the  operation to be performed as
 *         follows:
 *
 *            TRANS = AtlasNoTrans     x := A *x,
 *
 *            TRANS = AtlasConj        x := conjg( A ) * x,
 *
 *            TRANS = AtlasTrans       x := A'*x,
 *
 *            TRANS = AtlasConjTrans   x := conjg( A' )*x.
 *
 *         Unchanged on exit.
 *
 * DIAG    (input)                       const enum ATLAS_DIAG
 *         On entry, DIAG specifies whether or not A is unit triangu-
 *         lar as follows:
 *
 *            DIAG = AtlasUnit       A is assumed to be unit triangular,
 *
 *            DIAG = AtlasNonUnit    A is not assumed to be unit trian-
 *                                   gular.
 *
 *         Unchanged on exit.
 *
 * N       (input)                       const int
 *         On entry, N specifies the order of the matrix A. N must be at
 *         least zero. Unchanged on exit.
 *
 * K       (input)                       const int
 *         On entry, with UPLO = AtlasUpper,  K  specifies the number of
 *         super-diagonals of the matrix A.  With  UPLO = AtlasLower,  K
 *         specifies the number of sub-diagonals of the matrix A. K must
 *         satisfy  0 <= K. Unchanged on exit.
 *
 * A       (input)                       const double *
 *         On entry,  A  points  to an array of size equal to or greater
 *         than   LDA * n * sizeof( double[2] ).   Before   entry   with
 *         UPLO = AtlasUpper, the leading (k + 1) by n part of the array
 *         A  must  contain the upper triangular band part of the matrix
 *         of coefficients, supplied column  by column, with the leading
 *         diagonal  of the matrix in row k of the  array, the first su-
 *         per-diagonal starting at position 1 in row  k-1,  and  so on.
 *         The top left k by k triangle of the array  A  is not referen-
 *         ced.  The  following  program segment  will transfer an upper
 *         triangular band matrix from conventional full  matrix storage
 *         to band storage:
 *
 *            for( j = 0; j < n; j++ )
 *            {
 *               m  = k - j;
 *               for( i = ( m < 0 ? -m : 0 ); i < j; i++ )
 *               {
 *                  a[((m+i+j*LDA)<<1)+0] = real( matrix( i, j ) );
 *                  a[((m+i+j*LDA)<<1)+1] = imag( matrix( i, j ) );
 *               }
 *            }
 *
 *         Before entry with UPLO = AtlasLower, the leading (k + 1) by n
 *         part of the array  A  must contain the lower triangular  band
 *         part  of  the  matrix of coefficients, supplied column by co-
 *         lumn, with the leading diagonal of the matrix in row 0 of the
 *         array,  the  first sub-diagonal starting at position 0 in row
 *         1, and so on. The bottom right k by k triangle of the array A
 *         is not referenced.  The following program segment will trans-
 *         fer a lower  real triangular  band  matrix from  conventional
 *         full matrix storage to band storage:
 *
 *            for( j = 0; j < n; j++ )
 *            {
 *               i1 = ( n > j + k + 1 ? j + k + 1 : n );
 *               for( i = j; i < i1; i++ )
 *               {
 *                  a[((i-j+j*LDA)<<1)+0] = real( matrix( i, j ) );
 *                  a[((i-j+j*LDA)<<1)+1] = imag( matrix( i, j ) );
 *               }
 *            }
 *
 *         Note that when DIAG = AtlasUnit  the elements of the array  A
 *         corresponding to the diagonal elements of the matrix are  not
 *         referenced, but are assumed to be unity. Unchanged on exit.
 *
 * LDA     (input)                       const int
 *         On entry, LDA  specifies the leading dimension of A as decla-
 *         red  in  the  calling (sub) program.  LDA  must  be  at least
 *         k + 1. Unchanged on exit.
 *
 * X       (input/output)                double *
 *         On entry,  X  points to the  first entry to be accessed of an
 *         incremented array of size equal to or greater than
 *            ( 1 + ( n - 1 ) * abs( INCX ) ) * sizeof( double[2] ),
 *         that contains the vector x.  On exit,  X  is overwritten with
 *         the tranformed vector x.
 *
 * INCX    (input)                       const int
 *         On entry, INCX specifies the increment for the elements of X.
 *         INCX must not be zero. Unchanged on exit.
 *
 * ---------------------------------------------------------------------
 */
/*
 * .. Local Variables ..
 */
   int                        i, i0, i1, iaij, incx2 = 2 * INCX, ix, j,
                              jaj, jx, kx = 0, l, lda2 = LDA << 1;
   register double            t0_i, t0_r;
/* ..
 * .. Executable Statements ..
 *
 */
   if( N == 0 ) return;

   if( TRANS == AtlasNoTrans )
   {
      if( UPLO == AtlasUpper )
      {
         for( j = 0,      jaj  = 0,    jx  = kx;
              j < N; j++, jaj += lda2, jx += incx2 )
         {
            Mset( X[jx], X[jx+1], t0_r, t0_i );
            l       = K - j;
            i0      = ( j - K > 0 ? j - K : 0 );
            for( i = i0,     iaij  = ((l+i0) << 1)+jaj, ix  = kx;
                 i < j; i++, iaij += 2,                 ix += incx2 )
            {
               Mmla( A[iaij], A[iaij+1], t0_r, t0_i, X[ix] , X[ix+1] );
            }
            if( DIAG == AtlasNonUnit )
            {
               Mmul( A[iaij], A[iaij+1], t0_r, t0_i, X[jx] , X[jx+1] );
            }
            if( j >= K ) { kx += incx2; }
         }
      }
      else
      {
         for( j = N-1,     jaj  = (N-1)*lda2, jx  = kx+(N-1)*incx2;
              j >= 0; j--, jaj -= lda2,       jx -= incx2 )
         {
            Mset( X[jx], X[jx+1], t0_r, t0_i );
            if( DIAG == AtlasNonUnit )
            {
               Mdscl( A[jaj], A[jaj+1], X[jx], X[jx+1] );
            }
            i1   = ( N - 1 > j + K ? j + K : N - 1 );
            for( i  = j+1,     iaij  = 2+jaj, ix  = jx + incx2;
                 i <= i1; i++, iaij += 2,     ix += incx2 )
            {
               Mmla( A[iaij], A[iaij+1], t0_r, t0_i, X[ix] , X[ix+1] );
            }
         }
      }
   }
   else if( TRANS == AtlasConj )
   {
      if( UPLO == AtlasUpper )
      {
         for( j = 0,      jaj  = 0,    jx  = kx;
              j < N; j++, jaj += lda2, jx += incx2 )
         {
            Mset( X[jx], X[jx+1], t0_r, t0_i );
            l     = K - j;
            i0    = ( j - K > 0 ? j - K : 0 );
            for( i = i0,     iaij  = ((l+i0) << 1)+jaj, ix  = kx;
                 i < j; i++, iaij += 2,                 ix += incx2 )
            {
               Mmla( A[iaij], -A[iaij+1], t0_r, t0_i, X[ix] , X[ix+1] );
            }
            if( DIAG == AtlasNonUnit )
            {
               Mmul( A[iaij], -A[iaij+1], t0_r, t0_i, X[jx] , X[jx+1] );
            }
            if( j >= K ) { kx += incx2; }
         }
      }
      else
      {
         for( j = N-1,     jaj  = (N-1)*lda2, jx  = kx+(N-1)*incx2;
              j >= 0; j--, jaj -= lda2,       jx -= incx2 )
         {
            Mset( X[jx], X[jx+1], t0_r, t0_i );
            if( DIAG == AtlasNonUnit )
            {
               Mdscl( A[jaj], -A[jaj+1], X[jx], X[jx+1] );
            }
            i1   = ( N - 1 > j + K ? j + K : N - 1 );
            for( i  = j+1,     iaij  = 2+jaj, ix  = jx + incx2;
                 i <= i1; i++, iaij += 2,     ix += incx2 )
            {
               Mmla( A[iaij], -A[iaij+1], t0_r, t0_i, X[ix] , X[ix+1] );
            }
         }
      }
   }
   else if( TRANS == AtlasTrans )
   {
      if( UPLO == AtlasUpper )
      {
         for( j = N-1,     jaj  = (N-1)*lda2, jx  = kx+(N-1)*incx2;
              j >= 0; j--, jaj -= lda2,       jx -= incx2 )
         {
            Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
            l       = K - j;
            i0      = ( j - K > 0 ? j - K : 0 );
            for( i = i0,     iaij  = ((l+i0) << 1)+jaj, ix  = kx+i0*incx2;
                 i < j; i++, iaij += 2,                 ix += incx2 )
            {
               Mmla( A[iaij], A[iaij+1], X[ix] , X[ix+1], t0_r, t0_i );
            }
            if( DIAG == AtlasNonUnit )
            {
               Mmla( A[iaij], A[iaij+1], X[jx] , X[jx+1], t0_r, t0_i );
            }
            else
            {
               Mset( t0_r + X[jx], t0_i + X[jx+1], t0_r, t0_i );
            }
            Mset( t0_r, t0_i, X[jx], X[jx+1] );
         }
      }
      else
      {
         for( j = 0,      jaj  = 0,    jx  = kx;
              j < N; j++, jaj += lda2, jx += incx2 )
         {
            if( DIAG == AtlasNonUnit )
            {
               Mmul( A[jaj], A[jaj+1], X[jx] , X[jx+1], t0_r, t0_i );
            }
            else
            {
               Mset( X[jx], X[jx+1], t0_r, t0_i );
            }
            i1   = ( N - 1 > j + K ? j + K : N - 1 );
            for( i = j+1,      iaij  = 2+jaj, ix  = jx + incx2;
                 i <= i1; i++, iaij += 2,     ix += incx2 )
            {
               Mmla( A[iaij], A[iaij+1], X[ix] , X[ix+1], t0_r, t0_i );
            }
            Mset( t0_r, t0_i, X[jx], X[jx+1] );
         }
      }
   }
   else
   {
      if( UPLO == AtlasUpper )
      {
         for( j = N-1,     jaj  = (N-1)*lda2, jx  = kx+(N-1)*incx2;
              j >= 0; j--, jaj -= lda2,       jx -= incx2 )
         {
            Mset( ATL_dZERO, ATL_dZERO, t0_r, t0_i );
            l    = K - j;
            i0   = ( j - K > 0 ? j - K : 0 );
            for( i = i0,     iaij  = ((l+i0) << 1)+jaj, ix  = kx+i0*incx2;
                 i < j; i++, iaij += 2,                 ix += incx2 )
            {
               Mmla( A[iaij], -A[iaij+1], X[ix] , X[ix+1], t0_r, t0_i );
            }
            if( DIAG == AtlasNonUnit )
            {
               Mmla( A[iaij], -A[iaij+1], X[jx] , X[jx+1], t0_r, t0_i );
            }
            else
            {
               Mset( t0_r + X[jx], t0_i + X[jx+1], t0_r, t0_i );
            }
            Mset( t0_r, t0_i, X[jx], X[jx+1] );
         }
      }
      else
      {
         for( j = 0,      jaj  = 0,    jx  = kx;
              j < N; j++, jaj += lda2, jx += incx2 )
         {
            if( DIAG == AtlasNonUnit )
            {
               Mmul( A[jaj], -A[jaj+1], X[jx] , X[jx+1], t0_r, t0_i );
            }
            else
            {
               Mset( X[jx], X[jx+1], t0_r, t0_i );
            }
            i1   = ( N - 1 > j + K ? j + K : N - 1 );
            for( i = j+1,      iaij  = 2+jaj, ix  = jx + incx2;
                 i <= i1; i++, iaij += 2,     ix += incx2 )
            {
               Mmla( A[iaij], -A[iaij+1], X[ix] , X[ix+1], t0_r, t0_i );
            }
            Mset( t0_r, t0_i, X[jx], X[jx+1] );
         }
      }
   }
/*
 * End of ATL_zreftbmv
 */
}
