#include <time.h>

#include <math.h>
#include "eno.h"

#include "eno.xtn"
#include "fluids.xtn"

extern void construct_a();
extern void zap_v();
extern void assign_v();
extern void add_v();
extern void resid_v();
extern void gs_iterate();
extern void coarse_v();
extern void fine_v();
extern void initgrids_v();

/* coarse grid storage below */
double uval[imax][jmax],fval[imax][jmax],
       b00[imax][jmax],b01[imax][jmax],b02[imax][jmax],
       b10[imax][jmax],b11[imax][jmax],b12[imax][jmax],
       b20[imax][jmax],b21[imax][jmax],b22[imax][jmax];


double alphaij[imax][jmax],ap[imax][jmax],
       r1[imax][jmax],z1[imax][jmax],
       r2[imax][jmax],p[imax][jmax];

logical use_multigrid,mg_alone;
int km,kmx,kmy;

void copy_vector(source,dest)
double source[imax][jmax],dest[imax][jmax];
{
int ix,iy;

  for_all_points_int
   dest[ix][iy]=source[ix][iy];
}

double inner_prod(z1,r1)
double z1[imax][jmax],r1[imax][jmax];
{
int ix,iy;
double sum_val;
  
  sum_val = 0.0;
  for_all_points_int
    sum_val += z1[ix][iy]*r1[ix][iy];
  return sum_val;
}

void saxpy(dest,a1,beta,b1)
double dest[imax][jmax],a1[imax][jmax],
       beta,b1[imax][jmax];
{
int ix,iy;

  for_all_points_int
   dest[ix][iy] = a1[ix][iy]+beta*b1[ix][iy];
}

 
/*
  compute ap = Ap

  remember, aij is symmetric.
  WE COMPUTE POINTS FOR AIJ(X,Y) where x,y is out of domain for periodic.
*/
void a_prod(p,ap)
double p[imax][jmax],ap[imax][jmax];
{
int ix,iy;

  for_all_points
   if (! point1_ok)
    p[ix][iy]=gst2(p,ix,iy);

  for_all_points_int
    {
     ap[ix][iy]=a11[ix][iy]*p[ix][iy];

     ap[ix][iy]+=a12[ix][iy-1]*p[ix][iy-1];
     ap[ix][iy]+=a21[ix-1][iy]*p[ix-1][iy];
     ap[ix][iy]+=a12[ix][iy]*p[ix][iy+1];
     ap[ix][iy]+=a21[ix][iy]*p[ix+1][iy];

     ap[ix][iy]+=a22[ix-1][iy-1]*p[ix-1][iy-1];
     ap[ix][iy]+=a02[ix+1][iy-1]*p[ix+1][iy-1];
     ap[ix][iy]+=a22[ix][iy]*p[ix+1][iy+1];
     ap[ix][iy]+=a02[ix][iy]*p[ix-1][iy+1];
    }    /* looping with ix,iy */
}

/*
   solve z1 = minv(r1)
   m = LL(t)   LLt is an approximate LU factorization determined by the
            alphaij's + pij,mij,dij,eij.
*/

void relax_fine(uval,fval,fval_coarse,k,num_gs)
double uval[imax][jmax],fval[imax][jmax],
       fval_coarse[imax][jmax];
int k,num_gs;
{
int l;
double error_inf;

 error_inf=1.0;
 for (l=1;l<=num_gs;l++)
  gs_iterate(k,fval,uval);
 resid_v(k,uval,fval,yy,&error_inf);

 if (k<km)
  coarse_v(k,yy,fval_coarse);
 else
  {
   while (error_inf>tolerance_matrix)
    {
     gs_iterate(k,fval,uval);
     l++;
     resid_v(k,uval,fval,yy,&error_inf);
    }
  }   /* solve exactly on the coarsest level */
}   /* relax_fine */
 
void coarse_correct(uval,uval_coarse,fval,k,num_gs)
double uval[imax][jmax],fval[imax][jmax],
       uval_coarse[imax][jmax];
int k,num_gs;
{
int l;

 fine_v(k+1,uval_coarse,yy);
 add_v(k,uval,yy);
 for (l=1;l<=num_gs;l++)
  gs_iterate(k,fval,uval);
}  /* coarse_correct */

/* WARNING, periodicity in y not implemented yet */ 
void m_solve(r1,z1)
double r1[imax][jmax],z1[imax][jmax];
{
int ix,iy;
int k,k_cycle,it_max;

  if (! use_multigrid)
  {
  for_all_points
   if (! point1_ok)
    {
     /*   We use different bc for s_t than for s so we have to
        BE CAREFUL */

     if (iy<iymin)
      yy[ix][iy]=bottom_stream_value_global;
     else if (iy>iymax)
      yy[ix][iy]=top_stream_value_global;
     else
      yy[ix][iy]=0.0;  /* gst2 not called here */
    }

  /* compute Ly = r1 */
  for_all_y_int
   {
    for_all_x_int
    {
     yy[ix][iy]=r1[ix][iy];
     yy[ix][iy]-=a10[ix][iy]*yy[ix][iy-1];
     yy[ix][iy]-=a01[ix][iy]*yy[ix-1][iy];
     yy[ix][iy]-=a00[ix][iy]*yy[ix-1][iy-1];
     yy[ix][iy]-=a20[ix][iy]*yy[ix+1][iy-1];
    }
    if (mx_type==PERIODIC)
     yy[ixmax][iy]-=a20[ixmax][iy]*yy[ixmin][iy-1];
   }

  for_all_points_int
   yy[ix][iy] *= alphaij[ix][iy];

  for_all_points
   if (! point1_ok)
    {
     if (iy<iymin)
      z1[ix][iy]=bottom_stream_value_global;
     else if (iy>iymax)
      z1[ix][iy]=top_stream_value_global;
     else
      z1[ix][iy]=0.0;  /* gst2 not called here */
    }

  /* compute Uz1 = y */
  for (iy=iymax;iy>=iymin;iy--)
   {
    for (ix=ixmax;ix>=ixmin;ix--)
    {
     z1[ix][iy]=yy[ix][iy];
     z1[ix][iy]-=a10[ix][iy+1]*z1[ix][iy+1];
     z1[ix][iy]-=a01[ix+1][iy]*z1[ix+1][iy];
     z1[ix][iy]-=a00[ix+1][iy+1]*z1[ix+1][iy+1];
     z1[ix][iy]-=a20[ix-1][iy+1]*z1[ix-1][iy+1];
    }
    if (mx_type==PERIODIC)
     z1[ixmin][iy]-=a20[ixmax][iy+1]*z1[ixmax][iy+1];
   }   /* Uz1 = y */
  } /* not use_multigrid */
  else
  {
#define gs_max 2
#define relax_step(kk,steps) if (kk==1) relax_fine(z1,r1,fval,kk,steps); \
  else relax_fine(uval,fval,fval,kk,steps)
#define coarse_step(kk) if (kk==1) coarse_correct(z1,uval,r1,kk,gs_max);  \
  else coarse_correct(uval,uval,fval,kk,gs_max)

  if (! mg_alone)
   zap_v(z1,1);
  for (k=2;k<=km;k++)
   zap_v(uval,k);

  for (k=1;k<=km;k++)
   relax_step(k,gs_max);
  for (k=km-1;k>=1;k--)
   coarse_step(k); 
  }   /* multigrid preconditioner */
    
}   /* m_solve */
              
/* 
   compute Ax = b 
   
   source_term - b
   result_term - x

   result_term is initialized BEFORE routine is called.

   x coordinant range : 0..mx-1
   y   "          "   : 0..my-1

   A is n1n2Xn1n2 matrix
   n1 = mx-1
   n2 = my-1
   FOR FREE_BDRY, n1=mx   n2=my+1

   A is symmetric positive definite

   aij,bij,alphaij,ss 
    are n1n2X1 vectors.
   alphaij is scratch variable used by routine.  (0..n1n2-1)
   r1,r2,z1,z2,ap,p,y   "                 "

   NOTE : lower diagonal entries must be initialized for
          cholesky stuff to work.

   Routine uses an IC preconditioned conjugate gradiant method.

   m = LL(t)  where l=0 when a=0
   m=a when a != 0

   
   PCG algorithm taken from p.374 of Golub and Van Loan.
   A SPD is preconditioned by M which could be written as c*c.

   p. 376 gives algorithm for computing ILU for SYMMETRIC matrix.   
*/
void invert_matrix(tolerance,iterations,ss,use_corner_difference)
double tolerance;
int *iterations;
double ss[imax][jmax];
logical use_corner_difference;
{
int ix,iy;
int k,temp_mx,temp_my;
double temp_prod,temp_prod_hld,norm_r1,beta,aa,
       truncation,avg_error,hld,temp,milu;
clock_t t0,t1;
logical periodicx;

if (use_debug)
  printf("bc top_stream,bot_stream %10.5f %10.5f \n",
   top_stream_value_global,bottom_stream_value_global);

  t0=clock();

  temp_mx=2;
  kmx=1;
  while (temp_mx<mx)
   { temp_mx*=2;kmx++; }
  temp_my=2;
  kmy=1;
  while (temp_my<my)
   { temp_my*=2;kmy++; }

  if ((my==temp_my)&&(mx==temp_mx))
   use_multigrid=TRUE;
  else 
   use_multigrid=FALSE;

use_multigrid=FALSE;

mg_alone=TRUE;
if (! use_multigrid)
 mg_alone=FALSE;

milu = 1.0-1.0/(min(mx,my));
/*
milu=0.0;
*/
if (use_debug)
{
  if (use_multigrid)
   printf("using multigrid kmx,kmy %5d%5d\n",kmx,kmy);
  else
   printf("using iccf pcg milu=%10.5f\n",milu);
}

  if (mg_alone)
   printf("using multigrid w/o cg scheme\n");

  if (use_multigrid)
   initgrids_v();
  else
  {
  for_all_points_int
   alphaij[ix][iy]=a11[ix][iy];

  /* everything must be in ORDER : k>j>i */
  for_all_points_int     /* iy MUST be in outer loop */
    {
     periodicx=((ix==ixmin)&&(mx_type==PERIODIC));

     /* ix+1,iy */
     if (ix<ixmax)
      {
       hld=a01[ix+1][iy];
       a01[ix+1][iy]/=alphaij[ix][iy];
       alphaij[ix+1][iy]-=hld*a01[ix+1][iy];

       /* ixmax,iy ix+1,iy */
       if (periodicx)
        {
         temp=milu*a01[ix+1][iy]*a01[ix][iy];
         alphaij[ix+1][iy]-=temp;
         alphaij[ixmax][iy]-=temp;
        }
       /* ix-1,iy+1 ix+1,iy */
       if ((ix>ixmin)&&(iy<iymax))
        {
         temp=milu*a01[ix+1][iy]*a20[ix-1][iy+1];
         alphaij[ix+1][iy]-=temp;
         alphaij[ix-1][iy+1]-=temp;
        }
       /* ix,iy+1   ix+1,iy */
       if (iy<iymax)
        {
         a20[ix][iy+1]-=a01[ix+1][iy]*a10[ix][iy+1];
         /* ix+1,iy+1 ix+1,iy */
         a10[ix+1][iy+1]-=a01[ix+1][iy]*a00[ix+1][iy+1];
        }
       /* ixmax,iy+1 ix+1,iy */
       if ((periodicx)&&(iy<iymax))
        {
         temp=milu*a01[ix+1][iy]*a20[ixmax][iy+1];
         alphaij[ix+1][iy]-=temp;
         alphaij[ixmax][iy+1]-=temp;
        }
     
      }  /* ix<ixmax */
   
     /* ixmax,iy  */
     if (periodicx)
      {
       hld=a01[ix][iy];
       a01[ix][iy]/=alphaij[ix][iy];
       alphaij[ix][iy]-=hld*a01[ix][iy];
       /* ix,iy+1   ixmax,iy */
       if (iy<iymax)
        {
         a00[ix][iy+1]-=a01[ix][iy]*a10[ix][iy+1];
         /* ix+1,iy+1  ixmax,iy */
         temp=milu*a01[ix][iy]*a00[ix+1][iy+1];
         alphaij[ix+1][iy+1]-=temp;
         alphaij[ixmax][iy]-=temp;
         /* ixmax,iy+1 ixmax,iy */
         a10[ixmax][iy+1]-=a01[ix][iy]*a20[ixmax][iy+1];
        }
      }     /* periodic and ix==ixmin */
         
         
     /* ix-1,iy+1 */
     if ((ix>ixmin)&&(iy<iymax))
      {
       hld=a20[ix-1][iy+1];
       a20[ix-1][iy+1]/=alphaij[ix][iy];
       alphaij[ix-1][iy+1]-=hld*a20[ix-1][iy+1];
       
       /* ix,iy+1  ix-1,iy+1 */
       a01[ix][iy+1]-=a20[ix-1][iy+1]*a10[ix][iy+1];
       /* ix+1,iy+1 ix-1,iy+1 */
       if (ix<ixmax)
        {
         temp=milu*a20[ix-1][iy+1]*a00[ix+1][iy+1];
         alphaij[ix-1][iy+1]-=temp;
         alphaij[ix+1][iy+1]-=temp;
        }
      }

     /* ix,iy+1 */
     if (iy<iymax)
      {
       hld=a10[ix][iy+1];
       a10[ix][iy+1]/=alphaij[ix][iy];
       alphaij[ix][iy+1]-=hld*a10[ix][iy+1]; 
       /* ix+1,iy+1 ix,iy+1 */
       if (ix<ixmax)
         a01[ix+1][iy+1]-=a10[ix][iy+1]*a00[ix+1][iy+1];
       /* ixmax,iy+1   ix,iy+1 */
       if (periodicx)
        a01[ix][iy+1]-=a10[ix][iy+1]*a20[ixmax][iy+1];
      }

     /* ix+1,iy+1 */
     if ((ix<ixmax)&&(iy<iymax))
      {
       hld=a00[ix+1][iy+1];
       a00[ix+1][iy+1]/=alphaij[ix][iy];
       alphaij[ix+1][iy+1]-=hld*a00[ix+1][iy+1];
       /* ixmax,iy+1  ix+1,iy+1 */
       if (periodicx)
        {
         temp=milu*a00[ix+1][iy+1]*a20[ixmax][iy+1];
         alphaij[ix+1][iy+1]-=temp;
         alphaij[ixmax][iy+1]-=temp;
        }
      }

     /*ixmax,iy+1 */
     if ((periodicx) && (iy<iymax))
      {
       hld=a20[ixmax][iy+1];
       a20[ixmax][iy+1]/=alphaij[ix][iy];
       alphaij[ix][iy]-=hld*a20[ixmax][iy+1];
      }     /* periodic and ix==ixmin */
    }    /* looping  ilu - cholesky */

   for_all_points_int
    alphaij[ix][iy]=1.0/alphaij[ix][iy];
  }  /* ! use_multigrid */
          
  norm_r1 = 0.0;
  a_prod(ss,ap);   /* ss contains the initial guess */

  for_all_points_int
    { 
     r1[ix][iy]=bij[ix][iy]-ap[ix][iy];
     norm_r1=abs_max(r1[ix][iy],norm_r1);
    }

  for (k = 0;
       ((k < mx*my) && (norm_r1 > tolerance));
       k++)
    {
     /* 
     printf("iterations,residual : %5d %10.4f\n",k,norm_r1);
     */
     if (mg_alone)
     {
       m_solve(bij,ss);
       norm_r1 = 0.0;
       avg_error=0.0;
       a_prod(ss,ap);   /* ss contains the initial guess */

       for_all_points_int
        {
         r1[ix][iy]=bij[ix][iy]-ap[ix][iy];
         norm_r1=abs_max(r1[ix][iy],norm_r1);
        }
     }
     else
     {     /* below for non-multigrid */

     if (k == 0)
       {
        m_solve(r1,z1); 
        /* z1 = minv*r1 */
        copy_vector(z1,p);
        temp_prod = inner_prod(z1,r1);
       }
     else
       {
        temp_prod_hld=inner_prod(z1,r2);
        /* z1 = minv*r1 */
        m_solve(r1,z1); 
        temp_prod = inner_prod(z1,r1);
        beta = temp_prod/temp_prod_hld;
        for_all_points_int
          p[ix][iy]=z1[ix][iy]+beta*p[ix][iy];
       }

     a_prod(p,ap);
     aa = temp_prod/inner_prod(p,ap);

     avg_error = 0.0;
     for_all_points_int
       {
        truncation = aa*p[ix][iy];
        ss[ix][iy] += truncation;
        avg_error += fabs(truncation);
       }

     avg_error /= mx*my;
     

     copy_vector(r1,r2);

     norm_r1 = 0.0;       /* L infinity norm */
     for_all_points_int
       {
        r1[ix][iy] += -aa*ap[ix][iy];
        norm_r1 = abs_max(r1[ix][iy],norm_r1);
       }
     }  /* above if mg_alone=FALSE */
/*
     printf("it,avg_err,normr1 %5d%10.5f %10.5f\n",k,avg_error,norm_r1);
*/
    }   /* iteration */

  *iterations = k;
  t1=clock();
if (use_debug)
  printf("it,clock %4d%10ld",k,t1-t0);
}   /* procedure invert_matrix */ 
