static char rcsid[] = "$Id: parmin.c,v 1.3 1997/07/18 03:02:36 dhb Exp $";

/*
** $Log: parmin.c,v $
** Revision 1.3  1997/07/18 03:02:36  dhb
** Fix for getopt problem; getopt(), optopt and optind are now
** G_getopt(), G_optopt and G_optind.
**
** Revision 1.2  1993/02/24 21:15:47  dhb
** 1.4 to 2.0 command argument changes.
**
** Revision 1.1  1992/12/11  19:06:03  dhb
** Initial revision
**
*/

/* By : Upinder S. Bhalla, 1992, Caltech */
#include <stdio.h>
#include <math.h>
#include "olf_ext.h"

float *vector();
void free_vector();
int find_nmin();
void splint();
float spbrent();
float bsplint();
float bsplinebrent();

#define NDIMS 20
#define TOLERANCE 0.0002
#define DX 0.002
#define RATIO 5.0
#define EPS 1.0e-8

#define ITMAX 200

#define NONE 0
#define FRESH 1
#define GRAD1LO 2
#define GRAD1HI 3
#define GRAD2LO 4
#define GRAD2HI 5
#define LINMIN 6
#define PARMIN 7
#define RANGE_DEFAULT 2.5
#define MIN_NSAMPLES 16

/*
**
** The parmin routine does some severe approximations. It uses a
** fixed number of sample points along the minimization vector,
** and uses a spline fit for estimating the minimum. This method
** is justfiable only in some special situations.
** 
** 1 : if the function has a lot of small local minima, it is very
** easy for the standard linmin to get stuck. Splinmin can avoid
** this in two ways : using bezier (smoothing splines) and by using
** an asymmetric parabola estimation of slope for the gradient, so
** we are sampling at two length scales.
**
** 2 : If we are using parallel machines to do the minimization,
** the spline fit approach allows parallelization of an otherwise
** serial algorithm. All the derivatives are evaluated in one swell
** foop by calculating the 2N function values on separate nodes.
** Then all the spline sample points are evaluated in parallel, and
** if N is large enough they could be done on the same 2N nodes.
** This means that the entire minimization takes only 2*ITER cycles,
** where the number of iterations is typically 5 to 10.
** 
** Note : the number of sample points for the spline should be at
** least 16 in order to cover the space sufficiently. The 'samples'
** variable sets this. When doing the calculation on a parallel machine,
** the samples variable should be 16 or the number of nodes, whichever
** is larger. The most efficient use of the machine is when the number
** of nodes is 2N+1 where N is the number of parameters to be varied.
** This comes from the gradient calculations for each of the dimensions,
** each of which use 2 points, plus the extra point for the actual
** point for which the gradient is being calculated.
** Unfortunately this leaves us with an odd number of nodes, which
** is usually not practical for allocation reasons. The occasional
** idle node should rarely be a problem, however.
**
** The function returns 0 when done, 1 when it is feeding out
** vectors for evaluation and 2 when it needs the parallel
** evaluated output.
**
** An example of its use would be something like ....
while (1)
	ret=parmin()
	if (ret==1)
		farm out simulations
		save results in appropriate array
		nothing needed for input
	end
	if (ret==2)
		send filled results array in, instead of vector.
	end
	if (ret==0)
		finish off. The vector array now has the best vector.
		break
	end
end

**
*/

int do_parmin(argc,argv)
	int argc;
	char	**argv;
{
	static float *p;
	float retmatch;
	static float *match;
	int iter,i,ndim;
	struct table_type	*table,*samples;
	static int ret = 0;
	int	exp_flag=0,init_flag=0,cspline_flag=0;
	float tolerance;
	float dx=DX,ratio=RATIO;
	int niter = 0;
	int nsamples = MIN_NSAMPLES;
	int	status;

	tolerance = TOLERANCE;

/* Handle io options */
	initopt(argc, argv, "ndim table-element nsamples samples -exponential -initialize -tolerance t -cspline -dx dx -ratio r");
	while ((status = G_getopt(argc, argv)) == 1)
	  {
	    if (strcmp(G_optopt, "-exponential") == 0)
		exp_flag=1;
	    else if (strcmp(G_optopt, "-initialize") == 0)
		init_flag=1;
	    else if (strcmp(G_optopt, "-cspline") == 0)
		cspline_flag=1;
	    else if (strcmp(G_optopt, "-tolerance") == 0)
		tolerance=Atof(optargv[1]);
	    else if (strcmp(G_optopt, "-niterations") == 0)
		niter=atoi(optargv[1]);
	    else if (strcmp(G_optopt, "-dx") == 0)
		dx=Atof(optargv[1]);
	    else if (strcmp(G_optopt, "-ratio") == 0)
		ratio=Atof(optargv[1]);
	  }

	if (status < 0) {
		printoptusage(argc, argv);
		return(-1);
	}

	ndim=atoi(optargv[1]);
	if (ndim <=0) {
		printf("Error : ndim = %d must be > 0\n",ndim);
		return(-1);
	}
	nsamples=atoi(optargv[3]);
	if (nsamples < 16 || nsamples < (ndim*2+1)) {
		printf("Error : nsamples = %d should be >= 16 and >= %d\n",
			nsamples,ndim*2+1);
		return(-1);
	}
	table=(struct table_type *) GetElement(optargv[2]);
	if (!table) {
		printf("Error : table element %s not found\n",optargv[2]);
		return(-1);
	}
	if (strcmp(table->object->name,"table") !=0) {
		printf("Error : element %s is not of type table\n",optargv[2]);
		return(-1);
	}
	if (!table->alloced) {
		printf("Error : table %s is not allocated\n",optargv[2]);
		return(-1);
	}
	if (table->table->xdivs < ndim) {
		printf("Error : table %s is size %d, less than ndim=%d\n",
			optargv[2],table->table->xdivs,ndim);
		return(-1);
	}

	if (ret==2) {
		samples=(struct table_type *) GetElement(optargv[4]);
		if (!samples) {
			printf("Error : table element %s not found\n",optargv[4]);
			return(-1);
		}
		if (strcmp(samples->object->name,"table") !=0) {
			printf("Error : element %s is not of type table\n",optargv[4]);
			return(-1);
		}
		if (!samples->alloced) {
			printf("Error : table %s is not allocated\n",optargv[4]);
			return(-1);
		}
		if (samples->table->xdivs < ndim) {
			printf("Error : table %s is size %d, less than nsamples=%d\n",
				optargv[4],samples->table->xdivs,nsamples);
			return(-1);
		}
	}


	/* Moved out of if statement because of possible pointer 
	** errors in sim, leaking over to here */
	if (ret==0) {
		p=vector(1,ndim); /* Stupid num rec convention */
		match=(float *)calloc(nsamples+4,sizeof(float));
		if (init_flag) {
			if (exp_flag) {
				for(i=1;i<=ndim;i++) p[i]=log(table->table->table[i-1]);
			} else  {
				for(i=1;i<=ndim;i++) p[i]=table->table->table[i-1];
			}
		} else {
			for(i=1;i<=ndim;i++) p[i]=0.0;
		}
	}
	if (ret==2) { /* loading in the returned values for matches */
		for(i=0;i<nsamples;i++) match[i]=samples->table->table[i];
	}
/* Do minimization */
	ret=find_parmin(p,ndim,nsamples,match,tolerance,dx,ratio,cspline_flag,
		&retmatch,niter);
	if (exp_flag)
		for(i=1;i<=ndim;i++) table->table->table[i-1]=exp(p[i]);
	else 
		for(i=1;i<=ndim;i++) table->table->table[i-1]=p[i];
	table->output=retmatch;
	if (ret==0) {
		free_vector(p,1,ndim);
		free(match);
	}
	return(ret);
}


/*
** Match 0 holds value for point for which gradient is being
** calculated 
*/
int find_parmin(p,ndim,nsamples,match,tolerance,dx,ratio,cspline_flag,retmatch,
	niter)
	float *p;
	int ndim,nsamples;
	float *match;
	float tolerance;
	float dx,ratio;
	int cspline_flag;
	float *retmatch;
	int niter;
{
static float *ptemp,*xi,fret;
static int state=FRESH,laststate=NONE;
static float *g,*h;
static int i,j,k;
static float fp;
static float temp;
static int its;
static float gg,gam,dgg;
static float range,center;
static int nloops=0;
int		grad_ok;

	/* finish up what was left over to do from last time */
	switch(laststate) {
		case FRESH :
			fp=match[0];
			break;
		case GRAD1LO :
		case GRAD2LO :
			p[i]=temp; /* restore previous value */
			/* xi[i]= (fp-match)*ratio*ratio -fp; */
			break;
		case GRAD1HI :
			p[i]=temp;
			/*
			xi[i]+=match;
			xi[i]/=ratio*(ratio+1.0)*dx;
			*/

			i++;
			if (state==PARMIN) { /* initializing for minimization */
				/* Calculate all the gradients */
				fp=match[0];
				grad_ok=0;
				for(j=1;j<=ndim;j++) {
					xi[j]=((fp-match[2*j-1])*ratio*ratio-fp+match[2*j])/
						(ratio*(ratio+1.0)*dx);
					if (xi[j]!=0.0) grad_ok=1;
				}
				if (!grad_ok) {
					printf("Error in gradient evaluation : all zero\n");
					return(2);
				}
				for(j=1;j<=ndim;j++){
					g[j]= -xi[j];
					xi[j]=h[j]=g[j];
				}
				its=1;
				/* since p is being used for reporting params to the
				** simulation func */
				for(k=1;k<=ndim;k++) ptemp[k]=p[k];
				/* match=fp; */
				range=RANGE_DEFAULT;
				center=0.0;
			}
			break;
		case GRAD2HI :
			p[i]=temp;
			i++;
			if (state==PARMIN) { /* initializing for minimization */
				/* Calculate all the gradients */
				fp=match[0];
				for(j=1;j<=ndim;j++) {
					xi[j]=((fp-match[2*j-1])*ratio*ratio-fp+match[2*j])/
						(ratio*(ratio+1.0)*dx);
				}

		        dgg=gg=0.0;
        		for (j=1;j<=ndim;j++) {
            		gg += g[j]*g[j];
/* 			        dgg += xi[j]*xi[j];   */
            		dgg += (xi[j]+g[j])*xi[j];
        		}
        		if (gg == 0.0) {
            		/* do cleanup stuff here */
					free_vector(ptemp,1,ndim);
					free_vector(g,1,ndim);
					free_vector(h,1,ndim);
					free_vector(xi,1,ndim);
            		return(1);
        		}
        		gam=dgg/gg;
        		for (j=1;j<=ndim;j++) {
            		g[j] = -xi[j];
            		xi[j]=h[j]=g[j]+gam*h[j];
        		}
				its++;
				if (its>ITMAX) {
					printf("Error : too many iterations in GRAD2HI\n");
					return(-1);
				}
				/* since p is being used for reporting params to the
				** simulation func */
				for(k=1;k<=ndim;k++) ptemp[k]=p[k];
				/* match=fp; */
				range=RANGE_DEFAULT;
				center=0.0;
			}
			break;
		case PARMIN :
			if (state==GRAD2LO) {
				/* Set the value of the returned function call at p */
				fp=match[0];
			}
			break;
		default :
			break;
	}
	laststate=state;

	switch(state) {
		case FRESH :
			/* Initialization */
			ptemp=vector(1,ndim); /* Stupid num rec convention */
			for(i=1;i<=ndim;i++)
				ptemp[i]=0.0;
			g=vector(1,ndim);
			h=vector(1,ndim);
			xi=vector(1,ndim);
			i=1;
			state=GRAD1LO;
			break;
		case GRAD1LO :
			/* requests the function value at small basis vector
			** offsets in order to calculate gradients */
			temp=p[i];
			p[i]=temp-dx;
			state=GRAD1HI; /* evaluate lo end of gradient */
			break;
		case GRAD1HI :
			p[i]=temp+ratio*dx;
			if (i>=ndim) { /* This is the last eval in GRAD1HI */
				state=PARMIN;
				return(2);
			} else
				state=GRAD1LO; /* evaluate hi end of gradient */
			break;
		case PARMIN :
			j=parmin(ptemp,xi,ndim,nsamples,match,p,&range,&center,tolerance,
				cspline_flag);
			*retmatch=fret=match[0];
			if (j==3) return(2);
			if (j>=1) { /* finished minimization */
				/* The pt to be evaluated is now ptemp */
				/* Since we could not predict what the last eval
				** in MIN would be, we have to set off the next
				** state from here itself. It is just a function
				** evaluation. The following state will be gradient
				** calculation */
				for(k=1;k<=ndim;k++) p[k]=ptemp[k];
				if (niter>0) {
					if (nloops>=niter) {
						free_vector(ptemp,1,ndim);
						free_vector(g,1,ndim);
						free_vector(h,1,ndim);
						free_vector(xi,1,ndim);
						nloops=0;
						return(0); /* finally done */
					} else {
						nloops++;
					}
				} else {
					if (j==2 && 2.0*fabs(fret-fp) <=
						tolerance*(fabs(fret)+fabs(fp)+EPS)) {
						/* free stuff here */
						free_vector(ptemp,1,ndim);
						free_vector(g,1,ndim);
						free_vector(h,1,ndim);
						free_vector(xi,1,ndim);
						return(0); /* finally done */
					}
				}
				/* Since we could not predict what the last eval
				** in MIN would be, we have to set off the next
				** state from here itself. It is just a function
				** evaluation. The following state will be gradient
				** calculation */

				/* if it got here, then it did find a line min, but
				** not the final answer. So we go and do another 
				** grad evaluation */
				i=1;
				state=GRAD2LO;
				return(1);
			}
			break;
		case GRAD2LO :
			/* requests the function value at small basis vector
			** offsets in order to calculate gradients */
			temp=p[i];
			p[i]=temp-dx;
			state=GRAD2HI; /* evaluate lo end of gradient */
			break;
		case GRAD2HI :
			p[i]=temp+ratio*dx;
			if (i>=ndim) { /* This is the last eval in GRAD1HI */
				state=PARMIN;
				return(2);
			} else
				state=GRAD2LO; /* evaluate hi end of gradient */
			break;
		default :
			printf("Error : state should never be here \n");
			break;
	}
	return(1);
}

#undef NDIMS 
#undef TOLERANCE
#undef DX
#undef EPS

#undef ITMAX

#undef NONE
#undef FRESH
#undef GRAD1LO
#undef GRAD1HI
#undef GRAD2LO
#undef GRAD2HI
#undef LINMIN
#undef PARMIN
#undef RANGE_DEFAULT

#define MIN_INIT 1
#define MIN_BRAK 2
#define MIN_BRENT 3
#define MIN_SCAN 4
#define MIN_FIND 5

/* Should always be an odd number */
#define MAX_X_SAMPLE_PTS 17

static int ncom=0; /* defining declarations */
static float *pcom=0,*xicom=0;
float Spbrent();

static void setp(p,x)
	float *p;
	float x;
{
	int i;

	for(i=1;i<=ncom;i++) p[i]=pcom[i]+x*xicom[i];
}


/* Match is now an array of size nmatch */
/* this routine does some ugly juggling in order to use the 
** already-calculated evaluation point as the center sample point
** (nmatch/2+1) along the vector. This gives us an extra sample
** point for free.
*/
int parmin(p,xi,n,nmatch,match,peval,range,center,tolerance,cspline_flag)
	float *p,*xi;
	int n,nmatch;
	float *match,*peval,*range,*center;
	float tolerance;
	int cspline_flag;
{
	static int state=MIN_INIT;
	static float xx,xmin,fx,fb,fa,bx,ax;
	static float *x,*y,*y2;
	int		i,j;
	static int scan_count;
	static float  *x_sample_pts;
	int converge_flag=0;
	int besti;
	float ymin;
	float xiscale;
	FILE	*fp,*fopen();
	float temp;

	switch (state) {
		case MIN_INIT :
			x=vector(1,nmatch+1);
			y=vector(1,nmatch+1);
			y2=vector(1,nmatch+1);
			x_sample_pts=vector(1,nmatch+2);
			pcom=vector(1,n);
			xicom=vector(1,n);
			ncom=n;

			if (cspline_flag) {
				temp=0.01;
				xiscale=exp(log(100.0)/(float)(nmatch/2));
				for(i=1;i<=(nmatch/2);i++) {
					x_sample_pts[nmatch/2 - i + 1]= -temp;
					x_sample_pts[nmatch/2 + i + 1]= temp;
					temp*=xiscale;
				}
				x_sample_pts[nmatch/2+1]=0.0;
			} else {
				/* range of -1 to +1 */
				temp=2.0/(float)(nmatch);
				for(i=1;i<=nmatch/2;i++) {
					j=nmatch/2+1-i;
					x_sample_pts[j]=-(float)(i) * temp;
				}
				for(i=nmatch/2+2;i<=nmatch+1;i++) {
					j=i-nmatch/2-1;
					x_sample_pts[i]=(float)(j) * temp;
				}
			}

			/* Enforce xi to be a unit vector, since we want to
			** avoid local minima */

			xiscale=0.0;
			for (j=1;j<=n;j++) {
				xiscale += xi[j] * xi[j];
			}
			if (xiscale > 0.0)
				xiscale=sqrt(xiscale);
			else 
				xiscale=1.0;

			for (j=1;j<=n;j++) {
				pcom[j]=p[j];
				xicom[j]=xi[j]/xiscale;
			}
			scan_count=1;
			/* Set it to the value at the initial p vector */
			y[1+nmatch/2]= match[0];
			/* Set up the x array */
			for (i=1;i<=nmatch+1;i++)
				x[i]= *center + *range * x_sample_pts[i];
			setp(peval,x[scan_count]);

			state=MIN_SCAN;
			break;
		case MIN_SCAN :	
			/* y[scan_count]= *match; */
			scan_count++;
			/* Skip if this is the middle pt */
			if (scan_count==1+nmatch/2)
				scan_count++;
			setp(peval,x[scan_count]);
			if (scan_count>=nmatch+1) {
				state=MIN_FIND;
				return(3);
			}
			break;
		case MIN_FIND :
			for(i=1;i<=nmatch/2;i++) y[i]=match[i-1];
			for(i=nmatch/2+2;i<=nmatch+1;i++) y[i]=match[i-2];
			fp=fopen("foo","a");
			fprintf(fp,"\n/newplot\n");
			for(i=1;i<=nmatch+1;i++) {
				fprintf(fp,"%f	%f\n",x[i],y[i]);
			}
			fclose(fp);
			ymin=y[1];
			besti=1;
			for(i=2;i<=nmatch+1;i++) {
				if (ymin>y[i]) {
					ymin=y[i];
					besti=i;
				}
			}
			if (besti<=1 && y[1]==y[2]) besti=2;
			if (besti>=nmatch+1 &&
				y[nmatch+1]==y[nmatch]) besti=nmatch;
			if (besti==1) { /* min was lower than the sampled range*/
			/* shift the sample range left by half, and double it */
				*center-= *range;
				*range *= 2;
			/* Set the new middle pt to the current low point */
				y[1+nmatch/2]=y[1];
			/* set the x array to the new scale */
				for (i=1;i<=nmatch+1;i++)
					x[i]= *center + *range * x_sample_pts[i];
			/* do another set of samples, alas */
				scan_count=1;
				setp(peval,x[scan_count]);
				state=MIN_SCAN;
			} else if (besti>=nmatch+1) {
			/* max was larger than the sampled range*/
			/* shift the sample range right by half, and double it */
				*center+= *range;
				*range *= 2;
			/* Set the new middle pt to the current high point */
				y[1+nmatch/2]=y[nmatch+1];
			/* set the x array to the new scale */
				for (i=1;i<=nmatch+1;i++)
					x[i]= *center + *range * x_sample_pts[i];
			/* do another set of samples, alas */
				scan_count=1;
				setp(peval,x[scan_count]);
				state=MIN_SCAN;
			} else {
			/* The best point is somewhere in here */
				if (cspline_flag) {
					spline(x,y,nmatch+1,1e30,1e30,y2);
					ax= x[besti-1];
					xx= x[besti];
					bx= x[besti+1];
					/* find the minimum */
					match[0]=
						spbrent(ax,xx,bx,tolerance,&xmin,x,y,y2,
						nmatch+1);
					/* set the p vector to return */
					setp(p,xmin);
				} else { /* the default b-spline */
					ax= x[besti-1];
					xx= x[besti];
					bx= x[besti+1];
					match[0]=
					bsplinebrent(ax,xx,bx,tolerance,&xmin,x,y,
						nmatch+1);
					if (xmin < tolerance)
						converge_flag=1;
					if (match[0] > y[besti]) 
						match[0]=y[besti];
					/* set the p vector to return */
					setp(p,xmin);
				}

				/* clean up */
				free_vector(x,1,nmatch+1);
				free_vector(y,1,nmatch+1);
				free_vector(y2,1,nmatch+1);
				free_vector(x_sample_pts,1,nmatch+2);
				free_vector(pcom,1,n);
				free_vector(xicom,1,n);

				state=MIN_INIT;
				if (converge_flag)
					return(2);
				else
					return(1);
			}
		break;
	}
	return(0);
}

#undef MIN_INIT
#undef MIN_BRAK
#undef MIN_BRENT
#undef MIN_SCAN 
#undef MIN_FIND
#undef MAX_X_SAMPLE_PTS 
