/* Copyright (C) 1991-2000 Simon N. Wood  snw@st-and.ac.uk

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License   
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
(www.gnu.org/copyleft/gpl.html)

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
USA.
*/


/***************************************************************************/
/* This is a new dde solver, that attempts to get around the problem of    */
/* interpolating lagged variables at a lower order than the integration    */
/* scheme. The methods borrow heavily from:                                */
/* [1] C.A.H. Paul 1992 Appl.Numer.Math. 9:403-414                         */
/* [2] D.J. Higham 1993 Appl.Numer.Math. 12:315-330                        */
/* [3] Press et al. 1992 Numerical recipes in C C.U.P. chap. 16            */
/* The integration scheme used is an emdedded RK23 pair due to Fehlberg    */
/* reported in:                                                            */
/* [4]E.Hairer, S.P.Norsett & G.Wanner 1987, Solving Ordinary differential */
/*                   Equations I. springer-Verlag Berlin. p170 RKF2(3)B    */
/* How to derive better schemes is described in:                           */
/* [5] J.C. Butcher 1987 The Numerical Analysis of Ordinary Differential   */
/* Equations. John Wiley & sons, Chichester.                               */
/* Interpolation of lagged variables is by cubic hermite polynomials. This */
/* necessitates storing values and gradients of lagged variables. Some     */
/* models have to be re-cast a bit to do this, but you can always make up  */
/* a lagged quantity from the lagged state variables and the gradients of  */
/* the latter are obviously easy to find. [2] Shows why this effort is     */
/* required.                                                               */
/* Lags of less than the timestep are dealt with by extrapolating the      */
/* cubic polynomial for the last stored step, beyond that time step [1].   */
/* Switches are also dealt with. This means that lagged variables are      */
/* stored twice when a switch occurs: once before and once after. However, */
/* switch tracking is not carried out, so the step length may at times     */
/* reduce as switches in lagged variables are crossed, yielding derivative */
/* discontinuities.                                                        */
/* Stepping logic is from [3].                         		           */
/* Note that the code has no scope for specifying initial histories. This  */
/* could be changed, but initial history problems are seriously unpleasant.*/
/* NOTE: code not yet optimised for minimum no. of gradient evaluations    */
/***************************************************************************/




#include <stdlib.h>
#include <stdio.h>
#include <math.h>

#include "ddeq.h"
#define ERRCON 1.89e-4
#define MINSTEPFUDGE 0.0  /* this number is used to set the minimum time step
			                    permissable when solving the model (it is actually
                             a multiplier for the initial timestep passed to the
			                    integration routine). Needing to set it to a
                             non-zero value probably indicates a problem with
                             the continuity of the model, and will lead to
                             inaccuracies in the model solution*/
#define MAXSTEP 100.0     /* sets the maximum permissable time step - this can
                             be needed in order to avoid stepping right through
                             model fetures that have a relatively short
                             timescale. A more efficient alternative to using
                             this is to define switches that are thrown during
                             the `feature' (and not reset)- but this is not
                             always possible. The maximum timestep is set to the
                             initial timestep dt multiplied by MAXSTEP */
double dbg0,dbg1;
#define SWITCHES

/* The following macros are for inline calculation of cubic hermite
   polynomials (HERMITE) and their gradients (GHERMITE)
   The horrible variable names are to (hopefully) ensure that the names don't
   clash with anything else - find and replace with something nicer if you
   need to check the macro.
   Tested 4/10/95 (HERMITE), 20/11/95 (GHERMITE) 
   Modified 8/1/96 - left to right evaluation sequence IS NOT standard!! */


#define HERMITE(res,x0,x1,y0,y1,g0,g1,x) \
{ HeRmItE_xx0=x-x0;HeRmItE_xx1=x-x1;HeRmItE_xx12=HeRmItE_xx1*HeRmItE_xx1;\
	HeRmItE_xx02=HeRmItE_xx0*HeRmItE_xx0;\
	if (HeRmItE_h=x1-x0)\
  res=(( (g0)*HeRmItE_xx0*HeRmItE_xx12 + (g1)*HeRmItE_xx1*HeRmItE_xx02 \
  +((y0)*(2.0*HeRmItE_xx0+(HeRmItE_h))*HeRmItE_xx12- \
    (y1)*(2.0*HeRmItE_xx1-HeRmItE_h)*HeRmItE_xx02)/HeRmItE_h)/ \
    (HeRmItE_h*HeRmItE_h)); else res=y0;}


double HeRmItE_h,HeRmItE_xx1,HeRmItE_xx12,HeRmItE_xx02,HeRmItE_xx0;


#define GHERMITE(res,x0,x1,y0,y1,g0,g1,x) \
{ HeRmItE_xx0=x-x0;HeRmItE_xx1=x-x1;HeRmItE_xx12=HeRmItE_xx1*HeRmItE_xx1;\
  HeRmItE_xx02=HeRmItE_xx0*HeRmItE_xx0;\
if (HeRmItE_h=x1-x0)\
   res=(( (g0)*(HeRmItE_xx12+2.0*HeRmItE_xx0*HeRmItE_xx1) \
  + (g1)*(HeRmItE_xx02+2.0*HeRmItE_xx0*HeRmItE_xx1) \
  + ((y0)*2.0*HeRmItE_xx1*(2.0*HeRmItE_xx0+HeRmItE_h + HeRmItE_xx1) \
  - (y1)*2.0*HeRmItE_xx0*(2.0*HeRmItE_xx1-HeRmItE_h + HeRmItE_xx0))/HeRmItE_h )\
    / (HeRmItE_h*HeRmItE_h) ); else res=g0;}

/***************** end of definitions for hermite macros *******************/

void ErrorMessage(char *msg,int fatal);

/***************************************************************************/
/*                       Global variables                                  */
/***************************************************************************/

void (*poutput)(double *,double, void *, int)=output;
double (*ppastvalue)(int,double,int)=realpastvalue,
       (*ppastgradient)(int,double,int)=realpastgradient;
histype history;
long accepted=0L,rejected=0L;

/***************************************************************************/
/*             Routines that are not problem specific                      */
/***************************************************************************/

//int is_model_discrete()
// indicates to DDEfit that a full continuous model is being used

//{ return(0);
//}

void rk23(state,newstate,g,newg,error,coeff,ns,time,dt)
double *state,*newstate,*g,*newg,*error,*coeff,time,dt;int ns;

/* Takes a single integration step from time to time+dt using a 3rd order
   embedded Runge-Kutta Fehlberg method:
   E.Hairer, S.P.Norsett & G.Wanner 1987, Solving Ordinary differential
   Equations I. springer-Verlag Berlin. p170 RKF2(3)B
   The routine returns an estimated error vector for adaptive timestepping.
   The gradient of the state variables is to be given in function grad().
   The routine uses the lower order scheme for updating,
   fortunately Fehlberg optimised the coefficients for the lower order
   scheme..... 4/10/95.
   NOTE: not yet optimised for minimum gradient evaluations - see original
   table of coeffs. Partially optimised 9/10/95 Only valid for ci=b4i!
   Takes gradient at time in g, puts gradient at time+dt in newg - these can
   be the same pointer/array */


{ static int first=1,oldns=-1;
  static double *k1,*k2,*k3,*k4,
	 bct1,bct2,bct3,  /* variables to save 3*ns multiplications */
  /* Embedded RKF table - coded this way to save addressing time */
  a2=0.25,  a3=27.0/40.0,
  b21= 0.25,
  b31=-189.0/800.0,  b32= 729.0/800.0,
  b41= 214.0/891.0,  b42= 1.0/33.0,     b43= 650.0/891.0,
/*   c1= 214.0/891.0,   c2= 1.0/33.0,      c3= 650.0/891.0,*/
  cc1= 533.0/2106.0, cc3= 800.0/1053.0, cc4=-1.0/78.0;
  int i;
  if ((first)||(oldns!=ns))
  { if (!first)
    { free(k2);free(k3);free(k4);}
    oldns=ns;first=0;
    if (ns>0)
    { k2=(double *)calloc((size_t)ns,sizeof(double));
      k3=(double *)calloc((size_t)ns,sizeof(double));
      k4=(double *)calloc((size_t)ns,sizeof(double));
    }
  }
  k1=g;
  bct1=b21*dt;
  for (i=0;i<ns;i++) newstate[i]=state[i]+k1[i]*bct1;
  grad(k2,newstate,coeff,time+dt*a2);
  bct1=b31*dt;bct2=b32*dt;
  for (i=0;i<ns;i++) newstate[i]=state[i]+k1[i]*bct1+k2[i]*bct2;
  grad(k3,newstate,coeff,time+dt*a3);
  bct1=b41*dt;bct2=b42*dt;bct3=b43*dt;
  for (i=0;i<ns;i++)
  newstate[i]=state[i]+k1[i]*bct1+k2[i]*bct2+k3[i]*bct3;

  grad(k4,newstate,coeff,time+dt);
  bct1=cc1*dt;bct2=cc3*dt;bct3=cc4*dt;
  for (i=0;i<ns;i++)
  { newg[i]=k4[i];
    error[i]=state[i]+bct1*k1[i]+bct2*k3[i]+bct3*k4[i]-newstate[i];
  }
}


void inithisbuff(nhv,histsize,nlag)
int nhv,nlag;long histsize;

/* sets up the global structure "history" and
   sets the global long integer history.offset to zero
   4/10/95, if its been called before it clears up the old version first 23/11/95
   The history buffer can operate in two modes fast and slower. The fast mode
   has double the storage requirement of the slowmode. To set mode see dde.h.
   In slow mode the buffer stores values and gradients at a series of points for
   each history variable. Hermite basis functions are used to construct the
   interpolant from these - but this requires quite alot of flops. In the fast
   mode the buffer has storage for the polynomial representation of the same
   interpolant - once these are calculated - evaluation of the interpolant is
   much faster: but their calculation is not so cheap - only those actually
   required are calculated - if cdset is set to 1 then the coefficients for the
   corresponding interval already exist.
   */

{ static int oldnhv=0;
  int i;
  history.fast=FASTHISTORY;
  for (i=0;i<oldnhv;i++)
  { free(history.buff[i]);
    free(history.lagmarker[i]);
    free(history.gbuff[i]);
    if (history.fast)
    { free(history.cbuff[i]);
      free(history.dbuff[i]);
      free(history.cdset[i]);
    }
  }
  if (oldnhv) /* then further cleaning is required */
  { free(history.lagmarker);
    free(history.clock);
    free(history.buff);
    free(history.gbuff);
    if (history.fast)
    { free(history.cbuff);
      free(history.dbuff);
      free(history.cdset);
    }
  }
  oldnhv=nhv;
  if (!nhv) return;
  history.no=(long)nhv;
  history.size=histsize;
  history.lagmarker=(long **)calloc(nhv,sizeof(long *));
  for (i=0;i<nhv;i++)
  history.lagmarker[i]=(long *)calloc(nlag,sizeof(long));
  history.clock=(double *)calloc(history.size,sizeof(double));
  history.buff=(double **)calloc(nhv,sizeof(double *));
  for (i=0;i<nhv;i++)
  history.buff[i]=(double *)calloc((size_t)history.size,sizeof(double));
  if (history.fast) /* need to set up space for storing coeffs for fast evaluation */
  { history.cbuff=(double **)calloc(nhv,sizeof(double *));
    for (i=0;i<nhv;i++)
    history.cbuff[i]=(double *)calloc(history.size,sizeof(double));
    history.dbuff=(double **)calloc(nhv,sizeof(double *));
    for (i=0;i<nhv;i++)
    history.dbuff[i]=(double *)calloc(history.size,sizeof(double));
    history.cdset=(char **)calloc(nhv,sizeof(char *));
    for (i=0;i<nhv;i++)
    history.cdset[i]=(char *)calloc(history.size,sizeof(char));
  }

  history.gbuff=(double **)calloc(nhv,sizeof(double *));
  for (i=0;i<nhv;i++)
  history.gbuff[i]=(double *)calloc(history.size,sizeof(double));
  if (!history.gbuff[nhv-1])
  { ErrorMessage("Not enough memory for history variables.",1);
  }
  history.offset=0L;
}

void updatehistory(g,s,c,t)
double *g,*s,*c,t;

/* updates the history record by calling the storehistory() moving the
   offset and updating and recording the time 4/10/95*/

{ static int first=1, oldhno=-1L;
  static double *his,*ghis;
  int i;
  if (! history.no) return;
  if ((first)||(oldhno!=history.no))
  { if (!first) { free(his);free(ghis);}
    first=0;his=(double *)calloc(history.no,sizeof(double));
    ghis=(double *)calloc(history.no,sizeof(double));
    oldhno=history.no;
    history.first_time=t;
    history.offset=-1;
  }
  storehistory(his,ghis,g,s,c,t);
  history.last_time=t;
  history.offset++;if (history.offset==history.size) history.offset=0L;
  history.clock[history.offset]=t;
  for (i=0;i<history.no;i++)
  { history.buff[i][history.offset]=his[i];
    history.gbuff[i][history.offset]=ghis[i];
    if (history.fast)
    history.cdset[i][history.offset]=0;
  }
}

double realpastgradient(i,t,markno)
int i,markno;double t;
/* Interogates the history ringbuffers. Note that there is a fair amount of
   assignment of one variable to another at the start: this is to save
   on address calculation and speed up the routine. 4/10/95 (copy from
   pastvalue 20/11/95)*/

{ long k1,k,offset,offsetplus,size;
  double res,*y,*g,*x,x0,x1,h,y1y0,*c,*d,g0,g1,y0;
  char *cdset,*estr;
  y=history.buff[i];g=history.gbuff[i];
  if (history.fast)
  { c= history.cbuff[i]; d= history.dbuff[i]; cdset= history.cdset[i];}  x=history.clock; /*local pointers improve address efficiency*/
  offset=history.offset;size=history.size;
  offsetplus=offset+1L; if (offsetplus==size) offsetplus=0L;
  k=history.lagmarker[i][markno];
  k1=k+1L;if ((k1>=size)||(k1<0L)) k1=0L;
  while ((x[k1]<t)&&(k1!=offset))
  { k1++;if (k1==size) k1=0L;}
  if (k1==0L) k=size-1L; else k=k1-1L;
  while ((x[k]>t)&&(k!=offsetplus))
  { if (k==0L) k=size-1L; else k--;}
  k1=k+1L;if (k1==size) k1=0L;
  if (t<x[k])
  { estr=(char *)calloc(200,sizeof(char));
    sprintf(estr,"Lag for variable %d too large at %g\n",i,history.last_time-t);
    ErrorMessage(estr,1);
  }
  x0=x[k];x1=x[k1];
#ifdef SWITCHES  /* some code for not extrapolating through a switch */
  if ((t>x[k1])&&(x[k]==x[k1])) /* then its extrapolation through a switch */
  res=g[k1];     /* so use linear extrapolation just this once 20/11/95*/
  else
#endif
  if (history.fast)
  { t-=x0;
    if (!cdset[k])   /* set coeffs for rapid calculation */
    { y0=y[k];
      h=1.0/(x1-x0);y1y0=y[k1]-y0;
      g+=k; g0= *g; g++; g1= *g;c+=k;d+=k;
      *c = h*(3*h*y1y0-2*g0-g1);
      *d =h*h*(g0+g1-2*h*y1y0);
      cdset[k]=1;
      res= g0 + 2*t*( *c + 3* *d*t);
    } else
    res=g[k] + 2*t*(c[k] + 3* d[k]*t);
  } else  GHERMITE(res,x0,x1,y[k],y[k1],g[k],g[k1],t);    /* 20/11/95*/
  history.lagmarker[i][markno]=k;
  return(res);
}


double realpastvalue(i,t,markno)
int i,markno;double t;
/* Interogates the history ringbuffers. Note that there is a fair amount of
   assignment of one variable to another at the start: this is to save
   on address calculation and speed up the routine. 4/10/95*/

{ long k1,k,offset,offsetplus,size;
  char *cdset,*estr;
  double res,*y,*g,*x,x0,x1,h,y1y0,*c,*d,g0,g1,y0;
  y=history.buff[i];g=history.gbuff[i];
  if (history.fast)
  { c= history.cbuff[i]; d= history.dbuff[i]; cdset= history.cdset[i];}
  x=history.clock; /*local pointers improve address efficiency*/
  offset=history.offset;size=history.size;
  offsetplus=offset+1L; if (offsetplus==size) offsetplus=0L;
  k=history.lagmarker[i][markno];
  k1=k+1L;if ((k1>=size)||(k1<0L)) k1=0L;
  while ((x[k1]<t)&&(k1!=offset))
  { k1++;if (k1==size) k1=0L;}
  if (k1==0L) k=size-1L; else k=k1-1L;
  while ((x[k]>t)&&(k!=offsetplus))
  { if (k==0L) k=size-1L; else k--;}
  k1=k+1L;if (k1==size) k1=0L;
  if (t<x[k])
  { estr=(char *)calloc(200,sizeof(char));
    sprintf(estr,"Lag for variable %d too large at %g\n",i,history.last_time-t);
    ErrorMessage(estr,1);
  }
  x0=x[k];x1=x[k1];
#ifdef SWITCHES  /* some code for not extrapolating through a switch */
  if ((t>x1)&&(x0==x1)) /* then its extrapolation through a switch */
  res=y[k1]+(t-x1)*g[k1]; /* so use linear extrapolation just this once */
  else
#endif
  if (history.fast)
  { t-=x0;
    if (!cdset[k])   /* set coeffs for rapid calculation */
    { y0=y[k];h=x1-x0;
      if (h>0.0)
      { h=1.0/h;y1y0=y[k1]-y0;
	g+=k; g0= *g; g++; g1= *g;c+=k;d+=k;
	*c = h*(3*h*y1y0-2*g0-g1);
	*d =h*h*(g0+g1-2*h*y1y0);
      } else
      { c+=k;d+=k;*c=0.0;*d=0.0;}
      cdset[k]=1;
      res=y0+t*(g0 + t*( *c + *d*t));
    } else
    res=y[k] + t*(g[k] + t*(c[k] + d[k]*t));
  }
  else
  HERMITE(res,x0,x1,y[k],y[k1],g[k],g[k1],t);
  history.lagmarker[i][markno]=k;
  return(res);
}


double zeropos(x1,x2,x3,s1,s2,s3)
double x1,x2,x3,s1,s2,s3;
/* finds the root in [x1,x3] of a quadratic passing through the (xi,si)s
   it is assumed that s3<s1*/


{ double z,y,zpy,a,b,c,d,a1,b1,c1,p;
  int ok=1;
  static int first=1;
  static double udge;
  if (first)
  { first=0;
    udge=1.00000001;
  }
  z=x3-x2;y=x2-x1;zpy=z+y;
  a1=a=s2;c1=c=(z*s1+y*s3-zpy*s2)/(zpy*z*y);b1=b=(s2-s1)/y+c*y;
  d=b*b-4.0*a*c;c*=2.0;
  p= -a/b; /* linear only approximation - in case c numerically zero */
  if (c==0.0) a=p;
  else
  { if (d>=0.0)
    { d=sqrt(d);a=(-b+d)/c;b=(-b-d)/c;
      if ((b>=-y)&&(b<=z)) a=b; else
      if ((a<-y)||(a>z)) ok=0;
    }
    if ((d<0.0)||(!ok))
    { if (-s3<s1) a=z; else a=-y;}
    z=a1+a*b1+a*a*c1;
    d=a1+p*b1+p*p*c1;
    if (fabs(z)>fabs(d)) a=p; /* check that linear interpolation is not better */
  }
  a+=x2;
  if (a>x3) a=x3;
  if (a<=x1)
  { if (a==0.0) a=udge-1.0; else if (a<0.0) a/=udge; else a*=udge;}
  return(a);
}



double istep(sw0,newsws,s0,news,g,newg,c,err,t0,t1,nsw,ns,flickedswitch)
double *sw0,*newsws,*s0,*news,*g,*newg,*c,*err,t0,t1;
int nsw,ns,*flickedswitch;

/* executes RK23 step to next switch or target depending on which comes first
   If step is to the first switch then the number of that switch is returned
   in flickedswitch, but map() is not called.
   Returns how far it got. 5/10/95. g is assumed to contain the gradient at
   t0, it will contain the gradient at time t1 on exit. This improves
   efficiency by making use of info. calculated in the previous step of rk23.
   Note that it is necessary to try both linear and quadratic approximations,
   in case curvature is really zero.....
*/

{ static int first=1,nsold,nswold,*flicked;
  static double *err1,*s1,*s2,*sw1,*sw2;
  int k,i,switches=0;
  double zp,dt,sp2,sp1,minp,ds,udge;
  if ((first)||(ns!=nsold)||(nsw!=nswold))
  { if (!first)
    { free(err1);free(flicked);free(sw1);free(s1);free(sw2);free(s2);}
    first=0;
    sw1=(double *)calloc(nsw,sizeof(double));
    sw2=(double *)calloc(nsw,sizeof(double));
    s1=(double *)calloc(ns,sizeof(double));
    s2=(double *)calloc(ns,sizeof(double));
    err1=(double *)calloc(ns,sizeof(double));
    flicked=(int *)calloc(nsw,sizeof(int));
    nsold=ns;nswold=nsw;
  }
  dt=t1-t0;
  rk23(s0,news,g,newg,err,c,ns,t0,dt);
  if (nsw) switchfunctions(newsws,news,c,t1);
  for (i=0;i<nsw;i++)                 /* are there any switches */
  if ((sw0[i]>0.0)&&(newsws[i]<=0.0))
  { flicked[switches]=i;switches++;}

  if (!switches)   /* No switches so its an ordinary step */
  { *flickedswitch=-1;
    return(t1);
  }

  /* Logic for stepping to first switch */
  sp1=t0+dt*0.5;
  for (k=0;k<200;k++) /* if k gets to 100 routine fails */
  { rk23(s0,s1,g,newg,err,c,ns,t0,sp1-t0); /* step to approx. 1st switch position */
    switchfunctions(sw1,s1,c,sp1);

    switches=0;
    for (i=0;i<nsw;i++)     /* are there any switches ? MACRO after debug*/
    if ((sw0[i]>0.0)&&(sw1[i]<=0.0))
    { flicked[switches]=i;switches++;}

    if ((k)&&(switches==1))  /* MACRO after debug */
    { *flickedswitch=flicked[0];
      for (i=0;i<ns;i++) news[i]=s1[i];
      for (i=0;i<nsw;i++) newsws[i]=sw1[i];
      return(sp1);
    }

    rk23(s1,s2,newg,newg,err1,c,ns,sp1,t1-sp1);/* step to end of interval */
    switchfunctions(sw2,s2,c,t1);

    for (i=0;i<nsw;i++)     /* are there any switches ? MACRO after debug*/
    if ((sw1[i]>0.0)&&(sw2[i]<=0.0))
    { flicked[switches]=i;switches++;}

    if (!switches)  /* MACRO after debug */
    { *flickedswitch=-1;
      for (i=0;i<ns;i++)
      { news[i]=s2[i]; err[i]=sqrt(err[i]*err[i]+err1[i]*err1[i]);}
      for (i=0;i<nsw;i++) newsws[i]=sw2[i];
      return(t1);
    }

    /* having got this far switch positions must be estimated */

    /* locate the first switch */
    if (k==100)
    k=101;

    sp2=t1;minp=t1;
    for (i=0;i<switches;i++)
    { if ((t0==t1)||(sp1==t1)||(t0==sp1)) zp=t1; else
      zp=zeropos(t0,sp1,t1,sw0[flicked[i]],sw1[flicked[i]],sw2[flicked[i]]);
      if (zp<minp) { sp2=minp;minp=zp;}
    }
    udge=0.0000000001;
    sp1=minp;ds=sp2-sp1;
    if (ds>0.0)  /* ensuring that switch actually gets flicked, eventually */
    do { sp1+=udge*ds;udge*=10.0;} while (sp1==minp);
  }
  ErrorMessage("\nProblem with switch logic\n",1);return(-1.0);
}




void dde(s,c,t0,t1,dt,eps,dout,ns,nsw,nhv,hbsize,nlag,reset,step)
double *s,      /* State variables */
       *c,      /* coefficients */
       t0,t1,   /* start and stop times */
       *dt,     /* pointer to initial timestep (returns final step - which
		             is step that would have been used if t1 not reached!)
                   when reset=1 this is used to set the maximum and
                   minimum steps using macro definitions at head of file.*/
       eps,     /* fractional tolerance for adaptive stepping */
       dout;    /* interval for output via user routine output(). Every
		   time a step passes 1 or more times of the form t0+i*dout
		   output() is called once. Hence output is only roughly
		   regular. dout=0.0 for no output. */
long hbsize;    /* The number of elements to store in the history buffer */
int nsw,        /* number of switches */
    ns,         /* number of state variables */
    nhv,        /* number of lagged variables */
    reset,      /* set to 0 not to reset, to 1 to reset */
    nlag,       /* number of place markers per history variable */
    step;       /* 0 - adaptive stepping; 1 - record adaptive; 2 - playback
		   adaptive; 3 - free record memory and use adaptive;
		   4 - fixed step */
/* Note that the routine can store and replay a series of steps (the integration
   mesh used). To do this call the routine with step=1. The mesh will be stored
   for as long as step=1. When replaying, always call with the same set of
   start and end times (in the same sequence) used for recording */


{ double D,Da,errmax,rerr,target,t,ti,
	 *err,*newsws,*sws,*news,*newg,*dum,*sp,*nswp,*swp,*nsp,*e0,*scale;
  static double *g,mindt,maxdt,**recdt,
                pshrink = -0.333333333333333,
                pgrow   = -0.333333333333333,
                safety  = 0.9,
                errcon;
  static long no_dt=0L,max_no_dt=0L,dt_off=0L,dt_arr=0L,rp_arr,rp_off;
			  /* storing information on mesh recording memory */
  static int first=1;
  static char *errs;
  long i,iout=1L;
  int swi,adaptive=0;
  if (first) errcon=pow(5.0/safety,1.0/pgrow);
  nswp=newsws=(double *)calloc((size_t)nsw,sizeof(double));
  swp=sws=(double *)calloc((size_t)nsw,sizeof(double));
  nsp=news=(double *)calloc((size_t)ns,sizeof(double));
  newg=(double *)calloc((size_t)ns,sizeof(double));
  err=(double *)calloc((size_t)ns,sizeof(double));
  e0=(double *)calloc((size_t)ns,sizeof(double));
  scale=(double *)calloc((size_t)ns,sizeof(double));
  statescale(scale);
  if (nsw) switchfunctions(sws,s,c,t0);
  if ((step==1)&&(no_dt==0L)) /* initialising memory for recording mesh */
  { recdt=(double **)calloc(1000,sizeof(double *));max_no_dt=1000L;
    recdt[0]=(double *)calloc(1000,sizeof(double));no_dt=1L;
  }
  if ((step==3)&&(no_dt>0L)) /* free memory for recording mesh */
  { for (i=0;i<no_dt;i++) free(recdt[i]);free(recdt);max_no_dt=0L;no_dt=0L;
    dt_off=0L;
  }
  if (step==3) step=0;
  if ((step==0)||(step==1)) adaptive=1;
  if (reset)
  { if (first) first=0;
    else free(g);
    mindt=(*dt)*MINSTEPFUDGE;
    maxdt=(*dt)*MAXSTEP;
    g=(double *)calloc((size_t)ns,sizeof(double));
    inithisbuff(nhv,hbsize,nlag);
    grad(g,s,c,t0);
    updatehistory(g,s,c,t0);
    for (i=0;i<ns;i++)
    if ((*dt)*fabs(g[i])>0.1*fabs(s[i])&&(s[i]!=0.0))
    (*dt)=0.1*fabs(s[i])/fabs(g[i]);
    if (step==1)
    { for (i=dt_arr+1;i<no_dt;i++) free(recdt[i]); /* freeing memory unused by end of last run */
      no_dt=dt_arr+1;      /* resetting record of memory allocated */
      dt_arr=0L;dt_off=0L;  /* resetting recording offsets */
    }
    if (step==2)
    { rp_off=0L;rp_arr=0L;} /* setting replay offsets */
  }
  target=ti=t=t0;
  sp=s;
  D= (*dt);
  if (dout) poutput(s,t0,(void *) NULL,0);
  while (t0<t1)
  { if (step==2) /* replaying stored mesh */
    { if (t==target)   /* checking that switch has not split recorded step */
      { target=recdt[rp_arr][rp_off];
	     rp_off++; if (rp_off==1000L) {rp_off=0L;rp_arr++;}
      }
    } else
    if (step!=2) target=t0+D;  /* Not replaying */
    if (target>t1) { target=t1;}
    t=istep(sws,newsws,s,news,g,newg,c,err,t0,target,nsw,ns,&swi);
    errmax=0.0;
    if ((adaptive)&&(D>mindt))
    { /* calculate the maximum tolerable errors for each variable */
      for (i=0;i<ns;i++) e0[i]=eps*(fabs(s[i])+fabs((t-t0)*(g[i]+newg[i])*0.5)+scale[i]);
      for (i=0;i<ns;i++) /* now find the error that is the highest proportion of e0 */
      { if (err[i]<1e-150&&err[i]>-1e-150) rerr=0.0;else
        rerr=err[i]/e0[i];
        rerr=fabs(rerr);
        if (rerr>errmax) errmax=rerr;
      }
    }
    if (errmax<1.0) /* then all errors were less than their permitted maximum */
    { accepted++;
      if (step==1)  /* record steps */
      { if (dt_off==1000L)
	     { dt_off=0L;dt_arr++;
	       if (dt_arr==no_dt)       /* have to allocate some more memory */
	       { if (no_dt==max_no_dt)  /* need to expand memory table */
	         { max_no_dt+=1000L;
	           recdt=(double **)realloc(recdt,(size_t)(sizeof(double *)*max_no_dt));
	         }
	         recdt[dt_arr]=(double *)calloc(1000,sizeof(double));
	         no_dt++;
	       }
	     }
	     recdt[dt_arr][dt_off]=t;dt_off++;
      }

      dum=s;s=news;news=dum;dum=sws;sws=newsws;newsws=dum;
      dum=g;g=newg;newg=dum;
      updatehistory(g,s,c,t);
      if (dout)   /* outputting results */
      if ( t > ti+iout*dout )
      { poutput(s,t,(void *)NULL,0);
	     while (ti+iout*dout<=t) iout++;
      }
      t0=t;
      if ( swi > -1 )
      { if (dout) poutput(s,t,(void *)NULL,0);
        map(s,c,t,swi);
        if (dout) poutput(s,t,(void *)NULL,0);
        grad(g,s,c,t);updatehistory(g,s,c,t);
      } else   /* increase stepsize */
      { if ((adaptive)&&(t<t1))
        D= (errmax > errcon ? safety*D*pow(errmax,pgrow) : 5.0*D);
        if (D>maxdt) D=maxdt;
       /* for (i=0;i<ns;i++) stability check - not needed
        if (fabs(g[i])*D>fabs(s[i]))
        D=fabs(s[i])/fabs(g[i]);*/
      }
    } else  /* the error was too large and the step must be scaled */
    { if (errmax>100.0) errmax=100.0;
      Da=t-t0; /* Step actually achieved */
      rejected++;
      /* Shrink D from Da */
#ifndef STEPOFF
      D=safety*Da*pow(errmax,pshrink);
      D=( D < 0.1*Da ? 0.1*Da : D );
      if (D < 1e-16*(*dt))
      { errs=(char *)calloc(200,sizeof(char));
        sprintf(errs,"Stepsize reduced to %g at time %g.",D,t);
        ErrorMessage(errs,0);
      }
#endif
      t=t0;
    }
  }
  (*dt)=D;
  if (dout) poutput(s,t1,(void *)NULL,0);
  for (i=0;i<ns;i++) sp[i]=s[i]; /* copying results to correct address */
  free(swp);free(nswp);free(nsp);free(err);free(e0);free(newg);free(scale);
}


