/* Copyright (C) 1991-2000 Simon N. Wood  snw@st-and.ac.uk

This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License   
as published by the Free Software Foundation; either version 2
of the License, or (at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.
(www.gnu.org/copyleft/gpl.html)

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307,
USA.*/

/* Routines for quadratic programming and other constrained optimization. */


#include <stdlib.h>
#include <stdio.h>
#include <math.h>
#include <string.h>
#include "matrix.h"
#include "qp.h"
#include "gcv.h"
qpoutdatatype qpoutdata;
#define DELMAX 35L

void ErrorMessage(char *msg,int fatal);

#define max(a,b)    (((a) > (b)) ? (a) : (b))
#define min(a,b)    (((a) < (b)) ? (a) : (b))

#define round(a) ((a)-floor(a) <0.5 ? (int)floor(a):(int) floor(a)+1)




matrix addconQT(Q,T,a,u) matrix *Q,T,a,*u;

/* A constraint, a (a row vector), is added to the QT factorization of
   the working set. T must have been initialised square, and then had T.r
   set to correct length. */

{ long q,i,j;
  double la,ra=0.0,*cV,*bV,*T1V;
  matrix b,c;
  c=initmat(Q->r,1L);b=initmat(Q->r,1L);(*u)=initmat(Q->r,1L);
  for (i=0;i<c.r;i++) for (j=0;j<a.c;j++) c.V[i]+=a.V[j]*Q->M[j][i];
  la=dot(c,c);
  cV=c.V;bV=b.V;
  q=T.c-T.r-1;
  if (q!=0L)
  { for (i=q+1;i<a.c;i++) { ra+=cV[i]*cV[i];bV[i]=cV[i];}
    if ((la-ra)<0.0)
    { ErrorMessage("ERROR in addconQT.",1);}
    else
    bV[q]=sqrt(la-ra);
    if (cV[q]>0.0) bV[q]= -bV[q];
    householder(u,c,b,q);
    Hmult((*Q),(*u));
  } else
  for (i=0;i<a.c;i++) bV[i]=cV[i];
  T1V=T.M[T.r];T.r++;
  for (j=0;j<T.c;j++) T1V[j]=bV[j];
  freemat(b);
  freemat(c);
  return(T);
}

void GivensAddconQT(matrix *Q,matrix *T,matrix *a,matrix *s,matrix *c)

/* A constraint, a (a row vector), is added to the QT factorization of
   the working set, the QT factorisation is updated in a manner that
   allows easy update of the choleski factors of Z'GZ (R). The Givens
   transformations used to do this are stored in s and c. there are s.r
   of them and when applied from the right they zero successive elements
   of a by rotation into the next element.
   T must have been initialised square (and then had T.r reset)
   s and c need T->c-T->r-1 rows to store the Givens rotations and must be
   initialized outside the routine.
   */

{ long q,i,j;
  double Qi,r,cc,ss,*bV,*sV,*cV,**QM,*QV,bb,bb1;
  matrix b;
  b.V=T->M[T->r]; b.r=Q->r;b.c=1L;
  for (i=0;i<T->c;i++) b.V[i]=0.0;
  for (i=0;i<b.r;i++) for (j=0;j<Q->r;j++) b.V[i]+=Q->M[j][i]*a->V[j];
  /* now calculate a series of Givens rotations that will rotate the null basis
     so that it is orthogonal to new constraint a */
  bV=b.V;cV=c->V;sV=s->V;QM=Q->M;
  q=T->c-T->r-1; /* number of Givens transformations needed */
  for (i=0;i<q;i++)
  { /* first calculate the Givens transformation */
    bb=bV[i];bb1=bV[i+1];
    r=bb*bb+bb1*bb1;r=sqrt(r);
    if (r==0.0) { ss=sV[i]=0.0;cc=cV[i]=1.0;} else
    { ss=sV[i]=bb/r;cc=cV[i]= -bb1/r;
      bV[i]=0.0; /* non-essential */
      bV[i+1]=r;
    }
    /* now apply it to Q */
    for (j=0;j<Q->r;j++)
    { QV=QM[j];
      Qi=QV[i];
      QV[i]=cc*Qi + ss*QV[i+1];
      QV[i+1]=ss*Qi - cc*QV[i+1];
    }
  }
  T->r++;
}



matrix delconQT(Q,T,sth) matrix Q,T;long sth;

/* the sth constraint is deleted from the QT factorization of the working */
/* set where s>=0.                                                        */

{ long i,j,colj,coli,k,Tr,Tc,Qr,T1r,T1c;
  double r,s,c,xi,xj,**TM,**QM,**T1M,*TV,*QV,*T1V;
  Tr=T.r;TM=T.M;QM=Q.M;Tc=T.c;Qr=Q.r;
  for (i=sth+1;i<Tr;i++)
  { coli=Tc-i-1;colj=Tc-i;
    xi=TM[i][coli];xj=TM[i][colj];
    r=sqrt(xi*xi+xj*xj);
    s=xi/r;c=xj/r;
    for (j=i;j<Tr;j++)
    { TV=TM[j];
      xi=TV[coli];
      TV[coli]= -c*xi+s*TV[colj];
      TV[colj]=s*xi+c*TV[colj];
    }
    for (j=0;j<Qr;j++)
    { QV=QM[j];
      xi=QV[coli];
      QV[coli]= -c*xi+s*QV[colj];
      QV[colj]=s*xi+c*QV[colj];
    }
  }
  T.r--;T1M=T.M;T1r=T.r;T1c=T.c;
  for (k=0;k<T1r;k++)
  { T1V=TM[k];TV=TM[k];
    for (j=0;j<T1c-k-1;j++) T1V[j]=0.0;
    for (j=T1c-k-1;j<T1c;j++)
    if (k<sth) T1V[j]=TV[j];
    else T1V[j]=TM[k+1][j];
  }
  return(T);
}


void getgk(gk,G,cT,y) matrix gk,G,cT,y;

/* Gets the current gradient, gk, of the objective given the current */
/* position y, from gk=Gy+cT.                                        */

{ long i;
  matmult(gk,G,y,0,0);
  for (i=0;i<gk.r;i++) gk.V[i]+=cT.V[i];
}




long lagrange(gk,Q,T,G,cT,y,I,delog,Af,getg)
matrix gk,Q,T,G,cT,y,I,delog,Af;int getg;

/*                                                        T           */
/* uses the AcQ=[0,T] factorisation of Ac to solve min||Ac l-gk|| and */
/* thereby find the lagrange multipliers l of the active constraints. */

{ long n,i,j,r,c,ni,mini= -1,Tr,Qc,rn;
  matrix l,g;
  double minl=TOL,eek,*gV,**QM,*gkV,**TM,*IV,*lV;
  Tr=T.r;Qc=Q.c;
  g=initmat(Q.r,1L);l=initmat(Tr,1L);
  if (getg) getgk(gk,G,cT,y);
  gkV=gk.V;gV=g.V;QM=Q.M;IV=I.V;TM=T.M;lV=l.V;
  n=Q.r-Tr;
  for (i=0;i<Tr;i++)
  { gV[i]=0.0;ni=n+i;
    for (j=0;j<Qc;j++) gV[i]+=QM[j][ni]*gkV[j];
  }
  for (r=0;r<Tr;r++)
  { c=T.r-r-1;eek=0.0;rn=r+n;
    for (i=c+1;i<Tr;i++) eek+=TM[i][rn]*lV[i];
    if (TM[c][rn]==0.0)
    {
#ifdef QP_TEXT_OUT
      printf("*");
#endif
      if ((long)delog.M[(long)IV[c]]<(DELMAX))
      return(c);
      else
      lV[c]=0.0;
    } /* delete degenerate constraint */
    else { lV[c]=(gV[r]-eek)/TM[c][rn];}
  }
  for (i=Af.r;i<Tr;i++)  // note that only the inequality constraints are tested
  if (lV[i]<minl)
  if (((long)delog.V[(long)IV[i]])<DELMAX)
  { minl=lV[i];mini=i;} else
  { mini*=1;} // debugging stop point
  freemat(l);freemat(g);
  return(mini);
}



void updateZGZ(ZGZ,a) matrix *ZGZ,a;

/* adds final row and final col given by a to ZGZ on constraint deletion */

{ long i,k;
  double **ZGZM,*aV;
  ZGZM=ZGZ->M;aV=a.V;
  k=ZGZ->r;
  for (i=0;i<k+1;i++) ZGZM[k][i]=ZGZM[i][k]=aV[i];
  ZGZ->c++;ZGZ->r++;
}

void deleteconstraint(ZGZ,gk,pk,R,G,cT,Q,Ac,I,T,y,s,tk,getg,newR)
matrix *ZGZ,gk,pk,*R,G,cT,Q,*Ac,I,*T,y;long s,*tk;int getg,newR;

/* The sth constraint is deleted from Ac, Q, T and tk are updated
   ZGZ, R, and pk are updated if newR is non-zero
   gk is obtained from getgk() if newR and getg are non-zero
   ZGZ is updated iff ZGZ->r!=0.
*/

{ long n,i,j,Qr;
  matrix  Atemp,z,a,b,pz;
  double tot,gamma,beta,**RM,*pzV,**QM,*zV,*aV,*pkV,*bV,**AcM,**AtempM,*IV;
  (*T)=delconQT(Q,(*T),s);
  n=Q.c-(*tk);(*tk)--;
  QM=Q.M;Qr=Q.r;pkV=pk.V;
  if ((getg)&&(!newR))
  { ErrorMessage("Bad call to deleteconstraint().",1);}
  if (newR)
  { z=initmat(Q.r,1L);a=initmat(Q.r,1L);b=initmat(Q.r,1L);pz=initmat(n+1,1L);
    aV=a.V;bV=b.V;zV=z.V;
    for (i=0;i<Q.r;i++) zV[i]=QM[i][n];
    matmult(b,G,z,0,0);
    for (i=0;i<n;i++)
    { for (j=0;j<Qr;j++) aV[i]+=QM[j][i]*bV[j];
    }
    aV[n]=dot(z,b);
    if (ZGZ->r) updateZGZ(ZGZ,a);
    (*R)=choleskiupdate((*R),a);
    freemat(a);freemat(b);
  }
  Atemp=(*Ac);(*Ac)=initmat((*Ac).r-1,(*Ac).c);
  AcM=Ac->M;AtempM=Atemp.M;IV=I.V;
  for (i=0;i<s;i++) for (j=0;j<(*Ac).c;j++)
  AcM[i][j]=AtempM[i][j];
  for (i=s+1;i<Atemp.r;i++)
  { IV[i-1]=IV[i];
    for (j=0;j<Atemp.c;j++)
    AcM[i-1][j]=AtempM[i][j];
  }
  if (newR)
  { RM=R->M;pzV=pz.V;
    if (getg) getgk(gk,G,cT,y);
    gamma=dot(gk,z);
    beta= -gamma/RM[n][n];
    for (i=n;i>=0;i--)
    { tot=0.0;
      for (j=i+1;j<n+1;j++) tot+=RM[j][i]*pzV[j];
    /* Recall that R is lower triangular */
      if (i==n) pzV[i]=beta/RM[i][i];
      else pzV[i]= -tot/RM[i][i];
    }
    for (i=0;i<Q.r;i++)
    { pkV[i]=0.0;
      for (j=0;j<n+1;j++) pkV[i]+=QM[i][j]*pzV[j];
    }
    freemat(z);freemat(pz);
  }
  freemat(Atemp);
}


void searchdirection(gk,pk,R,G,Q,cT,y,tk,getg)
matrix gk,pk,R,G,Q,cT,y;long tk;int getg;

/*          T      T                                                        */
/* Solves RR pz= -Z gk for pz given the gradient vector gk and the choleski */
/* factor of the projected Hessian R. The search direction is pk=Zpz.      */
/* getg is set to 1 if getgk() is to be called and to 0 if gk is supplied */

{ matrix x,pz;
  long i,j,n,Gr;
  double **QM,*gkV,*xV,*pkV,*pzV;
  if (getg) getgk(gk,G,cT,y);
  n=gk.r-tk;
  Gr=G.r;QM=Q.M;
  x=initmat(n,1L);pz=initmat(n,1L);
  xV=x.V;pzV=pz.V;gkV=gk.V;pkV=pk.V;
  for (i=0;i<n;i++)
  { /*xV[i]=0.0;*/
    for (j=0;j<Gr;j++) xV[i]-=QM[j][i]*gkV[j];
  }
  choleskisolve(R,pz,x);
  for (i=0;i<Q.r;i++)
  { pkV[i]=0.0;
    for (j=0;j<n;j++) pkV[i]+=QM[i][j]*pzV[j];
  }
  freemat(x);freemat(pz);
}


int isitdegen(Q,A,tk)
matrix Q,A;long tk;

/* A service routine for step() it tests to see if A is degenerate w.r.t. the
   currently active constraints */

{ matrix b;
  int a;
  double enb,ena;
  b=initmat(A.r,A.c);
  matmult(b,A,Q,0,0);
  b.c -= tk;
  enb=enorm(b);ena=enorm(A);
  if (enb< b.c*(TOL)*ena) /* testing to see if the first b.c - tk cols
  of b are zero indicating that A is a linear combination of the active cons.*/
  a=1;else a=0;freemat(b);

  return(a);
}


long step(coninfile,confn,Q,Ain,Af,I,pk,y,b,tk)
int coninfile;char *confn;matrix Q,Ain,Af,I,pk,y,b;long tk;

/* Given the search direction pk and the current position y, the distance to */
/* each constraint along pk is found. If any of these steps is less than pk  */
/* then the shortest step is taken to give the new y and the index of the new*/
/* active constraint returned, otherwise a step of pk is taken to the minimum*/
/* in the current null space of constraints and -1 is returned. The routine  */
/* also checks that constraints are not 'crept past' through rounding error  */
/* and that any new constraints are independent of the old ones. Confused?   */

{ long i,j,imin,r,col;
  int degen;
  matrix A,zap,y1,c,d;
  double Ay1,ay,apk,alphamin,alpha,reltol,
	 *y1V,*AV,*yV,*pkV; /* dummy pointers to speed matrix comps */
  FILE *in;
  char *errs;
  reltol=TOL*enorm(y);
  if (coninfile)
  { in=fopen(confn,"rb");
    if (in==NULL)
    { errs=(char *)calloc(200,sizeof(char));
      sprintf(errs,"%s not found, nothing read ! ",confn);
      ErrorMessage(errs,1);
    }
    fread(&r,sizeof(long),1,in);
    fread(&col,sizeof(long),1,in);
    if ((col!=y.r)||(r!=b.r))
    { fclose(in);
      ErrorMessage("Constraint matrix file incompatible with knot vector.",1);
      return(0);
    }
  } else
  { r=Ain.r;col=Ain.c;}
  zap=initmat(r,1L);y1=initmat(y.r,1L);
  for (i=0;i<y.r;i++) y1.V[i]=y.V[i]+pk.V[i];
  y1V=y1.V;yV=y.V;pkV=pk.V;
  for (i=0;i<r;i++) zap.V[i]=0.0;
  for (i=Af.r;i<tk;i++)
  { zap.V[(long)I.V[i]]=1.0;
  }
  c=initmat(col,1L);d=initmat(col,1L);
  if (coninfile) A=initmat(1L,col);
  else { A.c=col;A.r=1L;A.vec=1;}
  alphamin=1.0;imin= -1;
  for (i=0;i<r;i++)
  { if (coninfile) fsaferead(A.M[0],sizeof(double),A.c,in);
    else { A.M=Ain.M+i;AV=A.V=A.M[0];}
    if (zap.V[i]==0.0)
    { Ay1=0.0;
      for (j=0;j<A.c;j++) Ay1+=AV[j]*y1V[j]; /*checks that a step to */
      if ((b.V[i]-Ay1)>0.0/*reltol*/) /* the min. in the null space would violate cons.*/
      { ay=0.0;apk=0.0;
	     for (j=0;j<A.c;j++)
	     { ay+=AV[j]*yV[j];
	       apk+=AV[j]*pkV[j];
	     }
	     if (fabs(apk)>0.0)
	     { alpha=(b.V[i]-ay)/apk;
          if (alpha<alphamin)
	       { degen=0;
	         if (alpha<=0.0) degen=isitdegen(Q,A,tk);
            if (!degen)   /* this is an attempt to avoid degenerate vertices */
	         { alphamin=max(0.0,alpha);imin=i;
	           for (j=0;j<y.r;j++) y1V[j]=yV[j]+alphamin*pkV[j]; /* 2/2/97 - avoids distance calc for all that would violate full step */
	         }
          }
        }
      }
    }
  }
#ifdef HOGWASH
  if (imin > -1)   /* This prevents rounding error induced reduction of the*/
  zap.M[imin]=1.0; /* step length (alpha) to zero, in the next bit of code,*/
  if (coninfile) fseek(in,2L*(long)sizeof(long),0);
  for (i=0;i<y.r;i++) y1.M[i][0]=y.M[i][0]+alphamin*pk.M[i][0];
  for (i=0;i<r;i++)
  { if (coninfile) fsaferead(A.M,sizeof(double),A.c,in);
    else { A.M=Ain.M+i;A.V=A.M[0];}
    if (zap.M[i][0]==0.0)
    { Ay1=0.0;
      for (j=0;j<A.c;j++) Ay1+=A.M[0][j]*y1.M[j][0];
      if ((b.M[i][0]-Ay1)>reltol)
      { alphamin=0.0;imin=i;
  	/* resets step if constraint violation */
      }                 /*       has crept in                  */
    }
  }
#endif
  for (i=0;i<y.r;i++) yV[i]+=alphamin*pkV[i];
  freemat(zap);freemat(y1);freemat(c);freemat(d);
  if (coninfile)
  { freemat(A);
    fclose(in);
  }
  return(imin);
}



void QFHmult(A,u) matrix A,u;

/* forms (I-uu')A(I-uu')=A-Auu'-(Auu')'+uu'Auu' and returns it in A */

{ matrix Au;
  double uAu=0,a,**AM,*AuV,*uV,*temp;
  long i,j,Ac;
  Ac=A.c;AM=A.M;uV=u.V;
  Au=initmat(Ac,1L);
  AuV=Au.V;
  for (i=0;i<A.r;i++)
  { /*Au.V[i]=0.0;*/
    temp=AM[i];
    for (j=0;j<Ac;j++) AuV[i]+=temp[j]*uV[j];
  }
  for (i=0;i<A.r;i++) for (j=0;j<Ac;j++)
  { a=AuV[i]*uV[j];
    AM[i][j] -= a;
    AM[j][i] -= a;
  }
  for (i=0;i<Au.r;i++) uAu+=uV[i]*AuV[i];
  for (i=0;i<Au.r;i++) AuV[i]=uV[i]*uAu;
  for (i=0;i<A.r;i++) for (j=0;j<Ac;j++)
  AM[i][j] += uV[i]*AuV[j];
  freemat(Au);
}






void getR(ZGZ,R,Q,G,tk,Greset,u) matrix *ZGZ,*R,Q,G,u;long tk;int Greset;

/* REDUNDANT: Could be removed!!!*/

/*        T                                                                */
/* Forms Z GZ and then its choleski factor R. Z is made up of the first    */
/* Q.c-tk cols of Q.                                                       */

{ long i,j,n,k,Bc,ZGZc,Gc;
  matrix B;
  double **ZGZM,**QM,**BM,**GM;
  n=Q.c-tk;
  if (R->M!=NULL) freemat((*R));(*R)=initmat(n,n);
  Gc=G.c;ZGZM=ZGZ->M;QM=Q.M;GM=G.M;
  if (n!=0)
  { if (Greset)  /* G has changed so Z'GZ must be completely reformed*/
    { B=initmat(G.r,n);ZGZc=ZGZ->r=ZGZ->c=n;Bc=B.c;
      BM=B.M;
      for (i=0;i<B.r;i++) for (j=0;j<Bc;j++)
      for (k=0;k<Gc;k++) BM[i][j]+=GM[i][k]*QM[k][j];
      for (i=0;i<ZGZ->r;i++) for (j=i;j<ZGZc;j++)
      { ZGZM[i][j]=0.0;
	     for (k=0;k<Gc;k++) ZGZM[i][j]+=QM[k][i]*BM[k][j];
	     ZGZM[j][i]=ZGZM[i][j];
      }
      freemat(B);
    }
    else   /* easy update using Householder matrices (I + uu') */
    { QFHmult((*ZGZ),u);
      ZGZ->r--;ZGZ->c--;
    }
    choleski((*ZGZ),(*R),0,0);
  } else
  { ZGZ->r--;ZGZ->c--;
  }
}



void getAc(coninfile,confn,Ain,Af,Ac,I,tk)
int coninfile;char *confn;matrix Ain,Af,*Ac,I;long tk;

/* Given the matrix I indexing the active constraints of A, the active  */
/* constraint matrix Ac is produced.                                    */

{ long i,j,k,r,c;
  matrix A;
  FILE *in;
  char *errs;
  if (coninfile)
  { in=fopen(confn,"rb");
    if (in==NULL)
    { errs=(char *)calloc(200,sizeof(char));
      sprintf(errs,"%s not found, nothing read ! ",confn);ErrorMessage(errs,1);
    }
    fread(&r,sizeof(long),1,in);
    fread(&c,sizeof(long),1,in);
    A=initmat(1L,c);
  } else
  { r=Ain.r;c=Ain.c;A.r=1L;A.c=c;A.vec=1;}
  if (Ac->M!=NULL) freemat((*Ac));
  (*Ac)=initmat(tk,c);
  for (i=0;i<Af.r;i++) for (j=0;j<Af.c;j++)  /* include fixed cons */
  Ac->M[i][j]=Af.M[i][j];
  for (i=Af.r;i<tk;i++)
  { k=(long)I.V[i];
    if (coninfile)
    { fseek(in,(k*c*sizeof(double)+2*sizeof(long)),0);
      fsaferead(A.M[0],sizeof(double),A.c,in);
    } else { A.M=Ain.M+k;A.V=A.M[0];}
    for (j=0;j<A.c;j++) Ac->M[i][j]=A.V[j];
  }
  if (coninfile)
  { fclose(in);
    freemat(A);
  }
}

void Rupdate(ZGZ,R,s,c) matrix *ZGZ,*R,s,c;

/* Updates ZGZ (iff ZGZ->r!=0) and R using Givens rotations calculated in
   GivensAddconQT for updating the null space after constraint addition. */

{ long i,j;
  double x0,x1,ss,cc,r,**RM,*RM0,*RM1,**ZGZM;
  /* Update the choleski factor R first */
  RM=R->M;
  for (i=0;i<s.r;i++)
  { RM0=RM[i];RM1=RM[i+1];
    ss=s.V[i];cc=c.V[i];
    for (j=0;j<i+2;j++) /* apply Givens to the left */
    { x0=RM0[j];x1=RM1[j];
      RM0[j]=cc*x0+ss*x1;
      RM1[j]=ss*x0-cc*x1;
    }
    /* now remove the single extra element above the leading diagonal
       with a Givens rotation from the right */
    x0=RM[i][i];x1=RM[i][i+1];
    r=x0*x0+x1*x1;r=sqrt(r); /* elements on ld always +ve */
    cc=x0/r;ss=x1/r;
    RM[i][i]=r;RM[i][i+1]=0.0;
    for (j=i+1;j<R->r;j++) /* apply to rest of matrix */
    { RM0=RM[j];x0=RM0[i];x1=RM0[i+1];
      RM0[i]=cc*x0+ss*x1;
      RM0[i+1]=ss*x0 -cc*x1;
    }
  }
  R->r--;R->c--;
  /* now update ZGZ. There is an argument for not updating this matrix,
     since it isn't needed for the Q.P. It is used for cross validation
     - but only for full cross validation - not the approximate version,
     and it can be obtained in 0(n^3) ops from R anyway - only updated
     if ZGZ->r!=0 */
  if (!ZGZ->r) return;
  ZGZM=ZGZ->M;
  for (i=0;i<s.r;i++)
  { cc=c.V[i];ss=s.V[i];
    RM0=ZGZM[i];RM1=ZGZM[i+1];
    for (j=0;j<ZGZ->c;j++) /* Givens from left */
    { x0=RM0[j];x1=RM1[j];
      RM0[j]=cc*x0+ss*x1;
      RM1[j]=ss*x0-cc*x1;
    }
    for (j=0;j<ZGZ->r;j++) /* Givens from right */
    { RM0=ZGZM[j];x0=RM0[i];x1=RM0[i+1];
      RM0[i]=cc*x0+ss*x1;
      RM0[i+1]=ss*x0-cc*x1;
    }
  }
  ZGZ->r--;ZGZ->c--;
}


void addconstraint(coninfile,confn,ZGZ,Ain,Af,Ac,I,R,Q,T,s,tk,newR)
int coninfile;char *confn;matrix *ZGZ,Ain,Af,*Ac,I,*R,*Q,*T;long s,*tk;
int newR;

/* Adds a constraint to the working set Ac and performs updates of the
   factorisation matrices Q and T before obtaining R afresh,if newR
   non-zero. When adding a constraint, update is done using Givens rotations,
   in such a way that R can be easily updated. */

{ matrix a,Gs,Gc;
  long r,c,q;
  FILE *in;
  char *errs;
  if (coninfile)
  { in=fopen(confn,"rb");
    if (in==NULL)
    { errs=(char *)calloc(200,sizeof(char));
      sprintf(errs,"%s not found, nothing read!",confn);
      ErrorMessage(errs,1);
    }
    fread(&r,sizeof(long),1,in);
    fread(&c,sizeof(long),1,in);
    fseek(in,(s*c*sizeof(double)+2*sizeof(long)),0);
    a=initmat(c,1L);
    fsaferead(a.M[0],sizeof(double),c,in);
  } else
  { c=Ain.c;r=Ain.r;a.c=c;a.r=1L;a.vec=1;a.M=Ain.M+s;a.V=a.M[0];}
  I.V[*tk]=(double)s;(*tk)++;
  getAc(coninfile,confn,Ain,Af,Ac,I,*tk);
  q=T->c-T->r-1; /* number of Givens transformations needed */
  Gc=initmat(q,1L);Gs=initmat(q,1L); /* storage for Givens coefficients */
  GivensAddconQT(Q,T,&a,&Gs,&Gc);
  if (newR) Rupdate(ZGZ,R,Gs,Gc);
  freemat(Gc);freemat(Gs);
  if (coninfile)
  { freemat(a);
    fclose(in);
  }
}


long feasibility(coninfile,name,Ain,x,b,maxviol)
int coninfile;char *name;matrix Ain,x,b;double *maxviol;

/* checks that x satisfies Ax>=b. */

{ long i,j,k=1L,r,c;
  double z,fetol;
  matrix A;
  FILE *in;
  char *errs;
  *maxviol=0.0;
  fetol=TOL*enorm(x);
  if (coninfile)
  { in=fopen(name,"rb");
    if (in==NULL)
    { errs=(char *)calloc(200,sizeof(char));
      sprintf(errs,"%s not found, nothing read ! ",name);ErrorMessage(errs,1);
    }
    fread(&r,sizeof(long),1,in);
    fread(&c,sizeof(long),1,in);
    if ((c!=x.r)||(r!=b.r))
    { fclose(in);
      ErrorMessage("Constraint matrix file incompatible with parameter vector.",1);
      return(0);
    }
    A=initmat(1L,x.r);
  } else
  { A.r=1L;A.c=x.r;c=Ain.c;r=Ain.r;A.vec=1;} /* A used as dummy row vector */
  for (i=0;i<r;i++)
  { z=0.0;
    if (coninfile) fsaferead(A.M[0],sizeof(double),A.c,in);
    else { A.M=Ain.M+i;A.V=A.M[0];}
    for (j=0;j<c;j++) z+=A.V[j]*x.V[j];
    if ((b.V[i]-z)>0.0)
    { k=0L;if ((b.V[i]-z)> *maxviol) *maxviol=b.V[i]-z;}
  }
  if (coninfile) { fclose(in);freemat(A);}
  return(k);
}



double objective(F,f,J,da,a,prob_dat,G,Y,W,S,Q,gk,ro,lam,tk,getGJ,minimum)
int (*F)(matrix,matrix,matrix,matrix,matrix,void *,int,double);
matrix f,J,da,a,G,Y,W,S,Q,gk;double ro,lam;void *prob_dat;int getGJ,*minimum;long tk;

/* This function is called by NonLinLS() to get the value of the objective
   function, the local gradient, the Jacobian, J etc. if getGJ!=0 it also
   tests for a minimum returning minimum=0 if there isn't one and minimum=1
   if there is. 
   
   * A returned *minimum value of -2 indicates a user termination (signalled by F 
     returning a non-zero value).
   
   * Pass *minimum in as -1 to get an evaluation at 10 times regular accuracy 
     (if F() supports this). This feature will not work with getGJ==1.

   * The value returned is:

     [(Y-f)'W(Y-f) + n*ro*a'Sa]*0.5

   where n is the number of datapoints, and the 0.5 entirely escapes me.
   F() is the function that is called to run the model being fitted in order
       to produce the fitted values, f , that should approximate the data, Y.
       F() also returns a jacobian, J, when getGJ is set to a non-zero value.
   a   is the vector of parameters to be obtained by fitting.
   da  is the vector of finite difference intervals used to obtain J
       (these are obtained automatically in F())
   G returned with J'WJ+ro*Y.r*S+ lam*I
   gk returned with gradient of objective w.r.t. a
   tk is number of active constraints
   prob_dat is a pointer used to pass in a problem specific structure containing
            information needed by the particular application. Typically this is 
            caste to the correct type within F(). 
   
   Q, da, G and J are only used if getGJ=1;

*/



{ matrix rss,e,JW,Sa,D,resp,resb,ap,temp,pgf,pgb,WDp,WDb;
  double res=0.0,tol_fac;
  long i,j,k;
  e=initmat(Y.r,1L);
  D=initmat(J.r,J.c*2);
  if (*minimum==-1) tol_fac=0.1; else tol_fac=1.0; *minimum=0;
  if (getGJ>=1) tol_fac=1.0; 
  i=(*F)(f,J,D,a,da,prob_dat,getGJ,tol_fac); // if F() returns 1 then user has halted it
  if (i==1)
  { *minimum=-2; 
    freemat(e);
    freemat(D);
    return(1e300);
  }
  matrixintegritycheck();
  for (i=0;i<Y.r;i++) e.V[i]=f.V[i]-Y.V[i];
  if (getGJ&&(getGJ!=2))
  { resp=initmat(a.r,1L);resb=initmat(a.r,1L);
    JW=initmat(J.c,J.r);
    for (i=0;i<J.c;i++) for (j=0;j<J.r;j++)
    { D.M[j][i] -= Y.V[j];D.M[j][i+a.r] -= Y.V[j];}
    if (W.c==1L) /* diagonal weight matrix */
    for (i=0;i<J.c;i++) for (j=0;j<J.r;j++)
    { JW.M[i][j]=J.M[j][i]*W.V[j];  /* forming J'W */
      resp.V[i]+=W.V[j]*D.M[j][i]*D.M[j][i];   /* getting Forward RSS term */
      resb.V[i]+=W.V[j]*D.M[j][i+a.r]*D.M[j][i+a.r];
    } else /* full W matrix, so computations a bit more involved */
    { WDp=initmat(W.r,1L);WDb=initmat(W.r,1L);
      matmult(JW,J,W,1,0);
      for (k=0;k<J.c;k++) /* loop through parameters */
      { for (i=0;i<W.r;i++) /* loop through all samples/model populations */
   	  { WDp.V[i]=0.0;WDb.V[i]=0.0;
	       for (j=0;j<W.r;j++) /* getting W multiplied by resid. for fd on param k*/
	       { WDp.V[i]+=W.M[i][j]*D.M[j][k];
	         WDb.V[i]+=W.M[i][j]*D.M[j][k+a.r];
	       }
	     }
	     for (i=0;i<W.r;i++)
	     { resp.V[k]+=D.M[i][k]*WDp.V[i];
	       resp.V[k]+=D.M[i][k+a.r]*WDb.V[i];
	     }
      }
      freemat(WDp);freemat(WDb);
    }
    if (S.r)   /* Only include smoothness if it's there ! */
    { Sa=initmat(1L,1L);ap=initmat(a.r,1L);
      for (j=0;j<a.r;j++) ap.V[j]=a.V[j];
      for (j=0;j<J.c;j++)
      { ap.V[j]+=da.V[j];
	     multi(3,Sa,ap,S,ap,1,0,0);
	     ap.V[j]-=da.V[j];
	     resp.V[j]+=Y.r*ro*Sa.M[0][0];
      }
      for (j=0;j<J.c;j++)
      { ap.V[j]-=da.V[j];
	     multi(3,Sa,ap,S,ap,1,0,0);
	     ap.V[j]+=da.V[j];
	     resb.V[j]+=Y.r*ro*Sa.M[0][0];
      }
      freemat(ap);freemat(Sa);
    }
    for (j=0;j<J.c;j++) { resp.V[j]/=2.0;resb.V[j]/=2.0;}
    matmult(gk,JW,e,0,0);     /* this estimate gets worse as no. of datapoints up */
    if (S.r)    /* Only include smoothness if it's there ! */
    { Sa=initmat(a.r,1L);
      matmult(Sa,S,a,0,0);
      mad(gk,gk,Sa,1.0,Y.r*ro);
      freemat(Sa);
    }
    matmult(G,JW,J,0,0);
    temp=initmat(J.c,J.c);
    
    freemat(temp);
    if (S.r&&ro>0.0) mad(G,G,S,1.0,Y.r*ro);   /* Only include smoothness if its there ! */
    for (i=0;i<S.r;i++) G.M[i][i]+=lam;
    freemat(JW);
  }  // end of Jacobian calculation
  // now calculate value of objective function....
  if (W.c==1L) /* then W is diagonal */
  for (i=0;i<Y.r;i++)  /* the RSS term */
  { if (!getGJ&&e.V[i]*sqrt(W.V[i])>1e150/Y.r)
    { if (res<1e300) res=1e300;break;} /* avoids overflow on silly steps */
    res+=W.V[i]*e.V[i]*e.V[i];
  }
  else    /* W is full matrix */
  { rss=initmat(1L,1L);
    multi(3,rss,e,W,e,1,0,0);
    res=rss.V[0];
    freemat(rss);
  }
  freemat(e);
  if (S.r)    /* Only include smoothness if it's there ! */
  { e=initmat(1L,1L);
    multi(3,e,a,S,a,1,0,0);
    res+=Y.r*ro*e.M[0][0];freemat(e);
  }
  res/=2.0; // the objective is finished....
  if (getGJ==2)
  { qpoutdata.obj=res;
  }
  if ((getGJ)&&(getGJ!=2)) /* time to directly FD the objective and perform minimum test */
  { *minimum=1;
    for (i=0;i<a.r;i++)
    { gk.V[i]=(resp.V[i]-resb.V[i])/(2.0*da.V[i]);
      resp.V[i]=(resp.V[i]-res)/da.V[i];
      resb.V[i]=(res-resb.V[i])/da.V[i];
      /* resp and resp, now transformed into forward and backward grad vectors */
    }

    pgf=getprojectedgradient(resp,Q,tk);
    pgb=getprojectedgradient(resb,Q,tk);
    /* the gradients have now been projected into the null space Z */
    for (i=0;i<pgf.r;i++)
    if ( pgf.V[i]*pgb.V[i] > 0.0 ) *minimum=0; /* because gradients were of same sign */
    freemat(resp);freemat(resb);freemat(pgf);freemat(pgb);
  }
  freemat(D);
  return(res);
}


void LSQPaddcon(matrix *Ain,matrix *Q,matrix *T,matrix *Rf,matrix *Py,matrix *PX,
                matrix *s,matrix *c,int sth)
/* Adds the sth row of Ain to the avtive set, updates Q and T using a sequence
   of T->c-T->r-1 Givens rotations from the right, coefficients of which are
   stored in s and c. The ith rotation acts on elements (i,i+1) (i=0,1,...).
   Updates the upper triangular (lower left 0) matrix Rf = PXQ, by applying the
   above Givens rotations from the right (updating Q) which introduces elements
   on the sub diagonal of Rf; these subdiaogonal elements are then zeroed using
   Givens rotations from the left, by way of updating P. Hence Py and PX can
   be updated at the same time. */

{ matrix a;
  double RfMji,*RfV,*RfV1,ss,cc,r,x1,x2;
  int i,j,k;
  a.V=Ain->M[sth];a.r=Ain->c;a.c=1L; // vector containing sth constraint
  s->r=T->c-T->r-1;  // number of Givens rotations about to be returned
  // Update Q and T and return Givens rotations required to do so ....
  GivensAddconQT(Q,T,&a,s,c);
  // Now apply the rotations from the right to Rf....
  for (i=0;i<s->r;i++)
  { cc=c->V[i];ss=s->V[i];
    k=i+2;if (k>Rf->r) k--;
    for (j=0;j<k;j++)
    { RfV=Rf->M[j];
      RfMji=RfV[i];
      RfV[i]=cc*RfMji+ss*RfV[i+1];
      RfV[i+1]=ss*RfMji - cc*RfV[i+1];
    }
  }
  /* Now zero the subdiagonal elements that have just been introduced, and apply
     the Givens rotations from the left, used to do this, to Py and PX */
  for (i=0;i<s->r;i++) // work through the extra subdiagonal elements
  { // this will act on rows i and i+1, zeroing i+1,i - work out coefficients
    RfV=Rf->M[i];RfV1=Rf->M[i+1];
    x1=RfV[i];x2=RfV1[i];
    r=sqrt(x1*x1+x2*x2);ss=x2/r;cc=x1/r;
    Rf->M[i][i]=r;Rf->M[i+1][i]=0.0;
    for (j=i+1;j<Rf->c;j++) // apply rotation along the rows
    { x1=RfV[j];x2=RfV1[j];
      RfV[j]=cc*x1+ss*x2;
      RfV1[j]=ss*x1-cc*x2;
    }
    // Apply this rotation to  Py
    x1=Py->V[i];x2=Py->V[i+1];
    Py->V[i]=cc*x1+ss*x2;
    Py->V[i+1]=ss*x1-cc*x2;
    // and apply the same rotation to PX
    for (j=0;j<PX->c;j++) // work along the rows
    { x1=PX->M[i][j];x2=PX->M[i+1][j];
      PX->M[i][j]=cc*x1+ss*x2;
      PX->M[i+1][j]=ss*x1-cc*x2;
    }
  }
}


int LSQPstep(int *ignore,matrix *Ain,matrix *b,matrix *p1,matrix *p,matrix *pk)

/* This is the stepping routine for the constrained least squares fitting
   routine. It should be faster than step, but more or less does the same thing.
   The return value is -1 for a minimum, otherwise the row of Ain containing the
   constraint to add is returned.
   ignore[i] should be set to 1 to ignore row i of Ain, to 0 to include it.

   Starting from p a step is taken to p+pk, if this would violate any
   constraints in the working set, then a step is taken from p along pk, to the
   closest constraint. The constraints are Ain p >= b.

   On exit: p1 contains the new parameter vector; the return value is -1 for a
            minimum, otherwise the constraint that needs to be added (i.e. the
            row of Ain)
*/
{ double Ap1,ap,apk,alpha,alphamin,*AV,*pV,*p1V,*pkV;
  int imin,i,j;
  alphamin=1.0;imin= -1;
  p1V=p1->V;pV=p->V;pkV=pk->V;
  for (i=0;i<p->r;i++) p1V[i]=pV[i]+pkV[i]; // step all the way to minimum
  for (i=0;i<Ain->r;i++)         // work through the constraints
  { AV=Ain->M[i];
    if (!ignore[i])     // skip any already in working set
    { Ap1=0.0;
      for (j=0;j<Ain->c;j++) Ap1+=AV[j]*p1V[j]; // form  A p1 = A(p+pk)
      if ((b->V[i]-Ap1)>0.0) // does p+pk violate the ith constraint?
      { ap=0.0;apk=0.0;        // working out quantities needed to find distance to constraint from p
	     for (j=0;j<Ain->c;j++)
	     { ap+=AV[j]*pV[j];
	       apk+=AV[j]*pkV[j];
	     }
	     if (fabs(apk)>0.0)
	     { alpha=(b->V[i]-ap)/apk; // p + alpha*pk is on the ith constraint
          if (alpha<alphamin)    // if this is the closest constraint to p, record the fact
	       { alphamin=max(0.0,alpha);imin=i;
            for (j=0;j<p->r;j++) p1V[j]=pV[j]+alphamin*pkV[j]; /* 2/2/97 - avoids distance calc for all that would violate full step */
          }
        }
      }
    }
  }
  return(imin);

}


void LSQPdelcon(matrix *Q,matrix *T,matrix *Rf,matrix *Py,matrix *PX,int sth)

/* This routine deletes row s from the active set matrix, A, say, where
   AQ=[0,T] and T is reverse lower triangular (upper left is zero). It updates
   Q and T using Givens rotations from the right. These rotations induce
   subdiagonal elements in Rf=PXQ from column Rf->c-T->r to column Rf->c-s+2,
   where T->r is the number of active constraints before deletion.
   Note however that the Givens rotations that update Q and T, have to be
   applied in an order that works back through the columns of Rf=PXQ - this has
   the potential to produce a triangular block of elements below the diagonal,
   if they are all applied before applying the update rotations for P. Hence the
   appropriate thing to do is to apply each rotation from the left to Rf, as it
   is obtained and then work out the Givens rotation from the left that will
   immediately zero the unwanted subdiagonal element - this being an update of
   P, which should immediately be applied to PX and Py.
*/

{ int i,j,colj,coli,k,Tr,Tc,Qr,T1r,T1c;
  double r,s,c,xi,xj,**TM,**QM,**T1M,*TV,*QV,*T1V,*RfV,*RfV1;
  Tr=T->r;TM=T->M;QM=Q->M;Tc=T->c;Qr=Q->r;
  for (i=sth+1;i<Tr;i++)   // work down the rows from the deletion point (row not removed yet)
  { coli=Tc-i-1;colj=Tc-i;    // coli is zeroed - colj=coli+1
    xi=TM[i][coli];xj=TM[i][colj];
    r=xi*xi+xj*xj;
    r=sqrt(r);
    s=xi/r;c=xj/r;         // Givens coefficients
    for (j=i;j<Tr;j++)     // Apply rotation to T
    { TV=TM[j];
      xi=TV[coli];
      TV[coli]= -c*xi+s*TV[colj];
      TV[colj]=s*xi+c*TV[colj];
    }
    for (j=0;j<Qr;j++)   // Apply rotation to Q
    { QV=QM[j];
      xi=QV[coli];
      QV[coli]= -c*xi+s*QV[colj];
      QV[colj]=s*xi+c*QV[colj];
    }
    // Now the awkward bit - the rotation must be applied to Rf=PXQ
    for (j=0;j<=colj;j++) // working down to the diagonal (and just below!)
    { RfV=Rf->M[j];       // row to apply rotation to
      xi=RfV[coli];
      RfV[coli]= -c*xi+s*RfV[colj];
      RfV[colj]=s*xi+c*RfV[colj];
    } // There is now an unwanted element at row colj, column coli
    // Calculate a rotation from the right that will zero the extra element
    xi=Rf->M[coli][coli];xj=Rf->M[colj][coli]; // xj to be zeroed
    r=sqrt(xi*xi+xj*xj);
    s=xj/r;c=xi/r;         // Givens coefficients to zero xj into xi
    Rf->M[coli][coli]=r;Rf->M[colj][coli]=0.0;
    // Now apply to rest of row from column colj (column coli already done)
    RfV=Rf->M[coli];RfV1=Rf->M[colj];
    for (j=colj;j<Rf->c;j++)
    { xi=RfV[j];xj=RfV1[j];
      RfV[j]=c*xi+s*xj;
      RfV1[j]=s*xi-c*xj;
    }
    // And apply this rotation from the right to Py and PX
    // Apply this rotation to  Py
    xi=Py->V[coli];xj=Py->V[colj];
    Py->V[coli]=c*xi+s*xj;
    Py->V[colj]=s*xi-c*xj;
    // and apply the same rotation to PX
    for (j=0;j<PX->c;j++) // work along the rows
    { xi=PX->M[coli][j];xj=PX->M[colj][j];
      PX->M[coli][j]=c*xi+s*xj;
      PX->M[colj][j]=s*xi-c*xj;
    }
  }
  // Now actually remove the extra row from T - this could be done awefully efficiently
  // by shuffling the pointers to rows, but it would probably end in tears, so I haven't
  T->r--;T1M=T->M;T1r=T->r;T1c=T->c;
  for (k=0;k<T1r;k++)
  { T1V=TM[k];TV=TM[k];
    for (j=0;j<T1c-k-1;j++) T1V[j]=0.0;
    for (j=T1c-k-1;j<T1c;j++)
    if (k<sth) T1V[j]=TV[j];
    else T1V[j]=TM[k+1][j];
  }
}



int LSQPlagrange(matrix *X,matrix *Q,matrix *T,matrix *p,matrix *Xy,matrix *p1,
                 matrix *y1,int *fixed, int fixed_cons)

/* This routine attempts to find the lagrange multipliers associated with the
   currently active constraints (assuming that we're at a minimum in the current
   null space). If the Active constraint matrix is A then Ap=b where b is a set
   of constants. Furthermore AQ=[0,T] where T is reverse lower triangular
   (zero at upper left). The Lagrange multipliers, l, should satisfy: A'l=g
   where g is the gradient of the quadratic form at p, i.e. X'Xp-X'y.
   (Unfortunately I can't figure out a way of avoiding explicit formation of
   X'Xp....) So, l'A=g' => l'[0,T]=g'Q, and to find l, solve l'T=x, where x is
   the last tk=T->r rows of g'Q - this also yields the minimum of ||A'l-g||,
   which is appropriate.

   Note that T passed to the routine actually contains [0,T] and the first
   fixed_cons rows of T relate to the fixed constraints (if any).

   p1 and y1 are workspace matrices of length p->r and X->r respectively

   The routine returns -1 if there are no -ve multiplier estimates, otherwise it
   returns the index of *Inequlity* constraint with the most negative one.

   fixed[i] is set to 1 if the corresponding inequlity constraint is to be
   left in the active set regardless of lagrange multiplier - this is part
   of a strategy to avoid repeatedly deleting constraints wrongly.
*/

{ int i,j,tk;
  double x;
  tk=T->r;
  vmult(X,p,y1,0);  // form y1= Xp
  vmult(X,y1,p1,1); // form p1 = X'Xp
  for (i=0;i<p1->r;i++) p1->V[i]+= -Xy->V[i]; // form p1 = g = X'Xp - X'y
  // now create the last tk=T->r elements of g'Q and store in y1
  for (i=0;i<tk;i++)
  { y1->V[i]=0.0;
    for (j=0;j<Q->r;j++) y1->V[i]+=p1->V[j]*Q->M[j][Q->c-tk+i];
  }
  // Now solve l'T=g'Q (where first tk rows of y1 contain g'Q)....
  for (i=tk-1;i>=fixed_cons;i--) // work down through the the lagrange multipliers
  { x=0.0;for (j=i+1;j<tk;j++) x+=p1->V[j]*T->M[j][T->c-i-1];
    if (T->M[i][T->c-i-1]!=0.0) p1->V[i]=(y1->V[tk-i-1]-x)/T->M[i][T->c-i-1];else p1->V[i]=0.0;
  }
  // Now look for the most negative multiplier for an inequlity constraint
  x=0.0;j=-1;
  for (i=fixed_cons;i<tk;i++)
  if ((!fixed[i-fixed_cons])&&(p1->V[i]<x)) { j=i;x=p1->V[i];}
//  if (j==-1) if (p1->V[i]<x) { j=i;x=p1->V[i];} // only delete last constraint added if it has only -ve multiplier
  if (j!=-1) j -= fixed_cons;
  return(j); // returns index of inequality constraint to delete
}


/***************************************************************************/
/* Main Public Routines.                                                   */
/***************************************************************************/

void QPCLS(matrix *Z,matrix *X, matrix *p, matrix *y,matrix *Ain,matrix *b,matrix *Af,int *active)

/* This routine aims to fit linearly constrained least squares problems of the
   form:
           min ||Xp-y||^2    subject to Ain p>=b and Af p = constant

   *without* forming X'X directly.
   By suitable redefinition of X and y it's easy to perform weighted and/or
   penalized regressions using this routine......

   The routine uses working matrices T, Q, Rf, PX
   and working vectors Py, Xy, pz, pk, Pd
   In addition the routine creates workspace for the various service routines
   called by it, in order to avoid excessive memory allocation and deallocation.

   The Algorithm is as follows...
   1. Form the QT factorisation of Af: Af Q = [0,T] T reverse lower triangular
      (i.e top left 0). Q contains column bases for the null and range spaces of
      Af: Q=[Z,Y]. Apply Q to X to get XQ(=[XZ,XY]). Form Q explicitly to give
      ready access to the null space basis Z.
   2. Perform QR decomposition: XQ = P'Rf where P is orthogonal and Rf is upper
      triangular (lower left 0). Hence Rf= PXQ=[PXZ,PXY], as required.
      Apply P to y to get Py. Apply P to X to get PX.
   3. Form Pd = Py-PXp, and solve: minimise || R pz - Pd ||^2, where R is the
      first p->r-tk-Af->r rows and columns of Rf. Solution occurs when R pz=x and
      x is the first p->r - tk - Af->r rows of Pd. (Note that Gill et al. get
      the sign wrong for Pd.)
   4. Evaluate pk=Z pz, and step along it to minimum (goto 6.) or constraint.
   5. Add constraint to working set: update QT factorisation; update Rf; update
      Py and PX. Return to 3.
   6. Evaluate Lagrange multipliers l where Ac'l=g and g=X'Xp-X'y - Ac is the
      active constraint matrix. Clearly g involves X'X, which is unfortunate,
      but I can't figure out a way around it - however, it is only the signs of
      l that matter, so hopefully this is not critical. If multipliers are all
      +ve goto 8. otherwise proceed....
   7. Delete the constraint with the most -ve multiplier, updating Q, T, Rf, Py
      and PX at the same time. Return to 3.
   8. Convergence! A minimum has been achieved. Free the workspace matrices and
      vectors and the indexing arrays, obtain Z, and return.


   On exit active[] contains the number of active inequlity constraints in active[0], 
   and the row number of these constraints in Ain in the remaining elements of
   active[], active must be initialized to length p.r+1 on entry.

   See documentation in service routines:
   LSQPlagrange(); LSQPaddcon(); LSQPdelcon(); (above)
   Rsolv() (in matrix.c)
   for further details on steps 6, 5, 7 and 3.
   The approach is taken from Gill, Murray and Wright (1981) Practical
   Optimization page 180-181 Section 5.3.3. (But note wrong signs on p181
   first display equation and definition of d_k)

   Routine has been tested against less numerically stable alternative using QP().

   20/11/99

*/

{ matrix Q,T,Rf,PX,Py,a,P,p1,s,c,Xy,y1,u,Pd,pz,pk;
  int k,i,j,tk,*I,*ignore,iter=0,*fixed,*delog,maxdel=100;
  double x;
  I=(int *)calloc((size_t) p->r,sizeof(int)); // I[i] is the row of Ain containing ith active constraint
  fixed=(int *)calloc((size_t) p->r,sizeof(int)); // fixed[i] is set to 1 when the corresponding inequlity constraint is to be left in regardless of l.m. estimate
  ignore=(int *)calloc((size_t) Ain->r,sizeof(int)); // ignore[i] is 1 if ith row of Ain is in active set, 0 otherwise
  delog=(int *)calloc((size_t) Ain->r,sizeof(int)); // counts up number of times a constraint is deleted
  p1=initmat(p->r,1L);    // a working space vector for stepping & lagrange
  y1=initmat(y->r,1L);    // a work space vector for lagrange
  s=initmat(p->r,1L);c=initmat(p->r,1L); // working space vectors for Givens rotation
  Xy=initmat(p->r,1L);     // vector storing X'y for use in lagrange multiplier calculation
  vmult(X,y,&Xy,1);      // form X'y
  Rf=initmat(X->r,X->c);  // Rf=PXQ, where P and Q are orthogonal
  mcopy(X,&Rf);          // initialize Rf while P and Q are identity matrices
  T=initmat(p->r,p->r);   // initialised to max possible size
  Q=initmat(p->r,p->r);   // required for access to Z for null space to full space transform
  // initialize Q, T and Rf using fixed constraints (if any) ....
  for (i=0;i<p->r;i++) for (j=0;j<p->r;j++) Q.M[i][j]=0.0;
  for (i=0;i<p->r;i++) Q.M[i][i]=1.0;
  T.r=0L;a.r=1L;a.c=Af->c;
  for (i=0;i<Af->r;i++)
  { a.V=Af->M[i];
    T=addconQT(&Q,T,a,&u); // adding constraint from Af to working set
    Hmult(Rf,u);           // updating Rf (=XQ, at present)
    freemat(u);            // freeing u created by addconQT()
  }
  // Now Form Rf, proper. i.e. PXQ, using QR factorization
  P=initmat(Rf.c,Rf.r);
  QR(&P,&Rf);   // Rf now contains Rf=PXQ   (on entry it contained XQ)
  Py=initmat(y->r,1L);mcopy(y,&Py);
  OrthoMult(&P,&Py,0,(int)P.r,0,1,1); // Form Py
  PX=initmat(X->r,X->c);mcopy(X,&PX);
  OrthoMult(&P,&PX,0,(int)P.r,0,1,1); // Form PX
  freemat(P); // no longer needed
  P=initmat(b->r,1L); // used solely for feasibility checking
  Pd=initmat(y->r,1L);pz=initmat(p->r,1L);pk=initmat(p->r,1L);
  tk=0;             // The number of inequality constraints currently active
  printf("\nLSQ");
  while(1)
  { iter++;
    // Form Pd=Py-PXp and minimize ||R pz - Pd||
    vmult(&PX,p,&Pd,0); // Pd = PXp
    for (i=0;i<Pd.r;i++) Pd.V[i] = Py.V[i]-Pd.V[i]; // Pd=P(y-Xp)
    Rf.c=Rf.r=p->r-tk-Af->r; // Restrict attention to QR factor of PXZ
    for (i=0;i<Rf.c;i++) if (Rf.M[i][i]==0.0) ErrorMessage("QPCLS - Rank deficiency in model",1);
    Rsolv(&Rf,&pz,&Pd);  // solve R pz= Pd for pz - search direction in null space
    Rf.r=X->r;Rf.c=X->c; // Restore Rf
    pz.r=p->r-tk-Af->r;
    // Find pk = Z pz, the search direction
    for (i=0;i<pk.r;i++)
    { pk.V[i]=0.0; for (j=0;j<pz.r;j++) pk.V[i]+=Q.M[i][j]*pz.V[j];}
    // Take a step from p along pk to minimum or a constraint ...
    k=LSQPstep(ignore,Ain,b,&p1,p,&pk);   // s is the constraint to include or -1
    mcopy(&p1,p); // updating the parameter vector
    if (k>-1) // add a constraint to the working set and update Rf, Py and PX
    { I[tk]=k;ignore[k]=1; // keeping track of what's in working set
      LSQPaddcon(Ain,&Q,&T,&Rf,&Py,&PX,&s,&c,k);tk++;
      if (delog[k]>maxdel)
      fixed[tk-1]=1;
      printf("+");
    } else   // it's a minimum - check lagrange multipliers
    { k=LSQPlagrange(X,&Q,&T,p,&Xy,&p1,&y1,fixed,(int)Af->r);
      if (k>-1) // then a constraint must be deleted
      { LSQPdelcon(&Q,&T,&Rf,&Py,&PX,k+(int)Af->r);  // the Af.r added to k ensures that correct row of T deleted
        printf("-");
        // update the fixed constraint list
        { for (i=k;i<tk-1;i++)
          fixed[i]=fixed[i+1];
        }
        tk--;
        if (k>-1) // updating indexing arrays
        { ignore[I[k]]=0;
          delog[I[k]]++;
          for (i=k;i<tk;i++) I[i]=I[i+1];
        }
      } else  // routine has arrived at a minimum
      { // feasibility check.....
        matmult(P,*Ain,*p,0,0);
        x=0.0;for (i=0;i<c.r;i++) if (P.V[i]-b->V[i]<x) x=P.V[i]-b->V[i];
        printf("P\n Worst feasibility violation %g",x);
        // create Z - this version is a full null space matrix, rather than sequence of rotations
        *Z=Q; Z->c -= tk;
        // copy active constraint information to active 
        active[0]=tk;
        for (i=0;i<tk;i++) active[i+1]=I[i]; 
        // free memory
        freemat(T);freemat(Rf);freemat(PX);freemat(Py);freemat(p1);freemat(y1);
        freemat(s);freemat(c);freemat(Xy);freemat(Pd);freemat(pz);freemat(pk);
        free(I);free(ignore);freemat(P);free(fixed);
        // return
        return;
      }
    }
  }
}


void PCLS(matrix *X,matrix *p,matrix *y,matrix *w,matrix *Ain,matrix *b,
          matrix *Af,matrix *H,matrix *S,int *off,double *theta,int m,int *active)

/* Routine for Penalized Constrained Least Squares problems.
   PCLS() is an interface routine for QPCLS for solving the general problem class:

             minimise    ||W^0.5(Xp-y)||^2 + p'Bp
             subject to   Ain p >=b  &  Af p = "a constant vector"

   ...where B is a sum of m S[i] matrices multiplied by smoothing parameters
   theta[i]. The S[i]'s may be smaller than B (p->r by p->r) so S[i] is
   added to B starting at row and column off[i]. B must be non-negative
   definite, which means that the S[k]'s must be. W is the diagnoal matrix
   having w on the leading diagonal. In many applications the ith element of w
   will be the reciprocal of the variance associated with the ith element of i.

   The routine uses the fact that the problem can be re-written as....

     minimise || Fp - z ||^2 Subject to Ain p >= b Af p = constant

   ... where F = [ X'W^0.5, B^0.5']'  and z = [y'W^0.5, 0]'. This rewrite is
   performed and then QPCLS is called to obtain the solution.

   If H->r==y->r on entry, then an influence (or "hat") matrix is returned in H.
   At present the calculation of H is inefficient and none too stable.

   On exit active[] contains a list of the active inequlity constraints in elements 
   1->active[0]. This array should be initialized to length p.r+1 on entry.

   20/11/99

*/

{ int i,j,k;
  matrix z,F,W,Z,B,C;
  double x,xx;
  // form transformed data vector z
  z=initmat(y->r+p->r,1L);W=initmat(w->r,1L);
  for (i=0;i<y->r;i++) { W.V[i]=sqrt(w->V[i]);z.V[i]=W.V[i]*y->V[i];}
  // form transformed design matrix X
  F=initmat(z.r,p->r);
  // first put in W^0.5X
  for (i=0;i<X->r;i++) for (j=0;j<X->c;j++) F.M[i][j]=W.V[i]*X->M[i][j];
  // add up the Penalties
  B=initmat(p->r,p->r);
  for (k=0;k<m;k++) for (i=0;i<S[k].r;i++) for (j=0;j<S[k].c;j++)
  B.M[i+off[k]][j+off[k]]=theta[k]*S[k].M[i][j];
  // and find a square root of B.....
  root(&B,&C,0.0);

  // copy C' into the last p->r rows of F
  for (i=0;i<C.r;i++) for (j=0;j<C.c;j++) F.M[j+X->r][i]=C.M[i][j];
  freemat(B);freemat(C);
  printf("\ncond(F)=%g",condition(F));
  // Which means that the problem is now in a form where QPCLS can solve it....
  QPCLS(&Z,&F,p,&z,Ain,b,Af,active); // note that at present Z is full not HH
  if (H->r==y->r) // then calculate the influence matrix XZ(Z'F'FZ)^{-1}Z'X'W
  { freemat(W);W=initmat(Z.c,Z.c);
    multi(4,W,Z,F,F,Z,1,1,0,0);invert(&W); // Wildly inefficient!!
    multi(5,*H,*X,Z,W,Z,*X,0,0,0,1,1);      // ditto
    for (i=0;i<H->r;i++) for (j=0;j<H->c;j++) H->M[i][j]*=w->V[j];
  }
  // working out value of objective at minimum
  B=initmat(z.r,1L);matmult(B,F,*p,0,0);
  xx=0.0;for (i=0;i<z.r;i++) { x=B.V[i]-z.V[i];xx+=x*x;}
  printf("\nObjective at Minimum = %g\n",xx); freemat(B);
  // freeing storage ....
  freemat(F);freemat(z);freemat(W);freemat(Z);
}


long QP(ZGZ,Z,Af,Ain,b,y,G,cT,confn,conon,reset,coninfile,maxviol)
matrix *ZGZ,*Z,Af,Ain,b,y,G,cT;char *confn;int conon,reset,coninfile;
double  *maxviol;

/****************************************************************************
 Solves the quadratic programming problem:

	    minimise cTy + 0.5y'Gy

	    subject to Ain y >= b  and Af y=b

 using an active set method as described in Gill, Murray and Wright (1981)

 VARIABLES PASSED AS POINTERS

 ZGZ contains the matrix Z'GZ. It is initialised within the routine. Do not
     free between calls. Unlimited modification between calls is allowed.
  Z  will contain the basis for the null space at the solution. Z is created
     by the routine, and re-created on subsequent calls. Do not free between
     calls; alteration is ok. 25/11/97 modified to output Z as householders
     if fullZ=0 below. Set fullZ=1 for old version. Each row of Z contains one
     of the householder rotation vectors required to construct the null space of
     the active constraints.

     NOTE that with efficient Z storage, Z is NOT what was actually used to
     produce ZGZ. If you form Z'GZ explicitly you will not get exactly ZGZ.
     In itself this doesn't matter, but if you use ZGZ in something like
     Z(Z'GZ)^{-1}Z' will NOT get the expected answer! If in
     doubt, reform ZGZ outside this routine - it is only not re-formed here
     for efficiency reasons!

 VARIABLES PASSED BY VALUE

  Af is the matrix for the fixed constraints. If there are no fixed
     constraints then set Af.r=0L.
  Ain is the matrix for inequality constraints. Need not be initialized if
      inequality constraints are to be supplied in a file.
  b  is the vector of inequality constraints
  y  is the vector w.r.t. which minimisation is taking place. It must be
     initialized so that none of the constraints are violated.
  G  is the matrix in the quadratic form to be minimised
  cT is the vector in the quadratic form to be minimised
  confn is the string containing the name of the constraint file if Ain is
	not being used
  conon is set to 1 if the inequality constraints are to be imposed ) if not
  reset is set to 1 when the routine is first called and whenever the old
	null space is to be discarded. It must be set to 1 if the constraint
	matrices have changed or the size of the problem has changed. Set
	to zero to start with the same null space as last time. Set to 2
	if only cT has changed.
  coninfile set to 1 if the inequality constraints are in the file whose
	    name is in confn. Set to 0 if the inequlity constraints are in
	    Ain.
  maxviol returns the maximum constraint violation

  returns -1 on failure through lack of +ve definiteness
******************************************************************************/


{ long i,s,j,constrainedmin=0L;
  double k;
  static int iterate=0,first=1,fullZ=0,
#ifdef QP_TEXT_OUT
  op=1;
#else
  op=0;
#endif
  static matrix Q,T,I,Ac,R;
  static long tk;
  matrix gk,pk,a,u;
  static matrix delog, /* stores number of deletions of a constraint to
			  eliminate cycling */
  ZGZp,Zp;      /* These store old matrices - which are freed on reset */
  if (op) printf("\nQ");

  gk=initmat(G.r,1L);pk=initmat(G.r,1L);
  if (reset==1)
  { tk=Af.r;
    if (!first) { freemat(Q);freemat(T);freemat(I);}
    Q=initmat(G.r,G.c);
    T=initmat(y.r,y.r); T.r=tk;
    I=initmat(y.r,1L);
  }
  if (!first&& reset!=2)
  { freemat(ZGZp);freemat(Ac);freemat(R);}
  if (first)
  { first=0;delog=initmat(b.r,1L);
  } else freemat(Zp);
  for (i=0;i<delog.r;i++) delog.V[i]=0.0;
  if (reset!=2)  /* G has changed */
  { Ac.M=NULL;
    getAc(coninfile,confn,Ain,Af,&Ac,I,tk);
    /* efficient forming of ZGZ */
    ZGZp=(*ZGZ)=initmat(G.r,G.c);
    for (i=0;i<G.r;i++) for (j=0;j<G.c;j++)
    { ZGZ->M[i][j]=G.M[i][j];Q.M[i][j]=0.0;}
    for (i=0;i<Q.r;i++) Q.M[i][i]=1.0;
    T.r=0L;a.r=1L;a.c=Ac.c;
    for (i=0;i<tk;i++)
    { a.V=Ac.M[i];
      T=addconQT(&Q,T,a,&u);
      QFHmult((*ZGZ),u);
      freemat(u);
      ZGZ->r--;ZGZ->c--;
    }
    R=initmat(ZGZ->r,ZGZ->r);
    if (!chol((*ZGZ),R,0,0)) /* failure through non +ve definiteness */
    { freemat(gk);freemat(pk);Zp=(*Z)=initmat(1L,1L);
      return(-1L);
    }
  }

  iterate++;
  if (op) printf(" %d ",iterate);
  while(1)
  { if (constrainedmin)
    { if (!conon) s= -1L;
      else s=lagrange(gk,Q,T,G,cT,y,I,delog,Af,1);
      if (s>=0)
      { k=(long)I.V[s];
	     if (op) {printf("-");fflush(stdout);}
	     deleteconstraint(ZGZ,gk,pk,&R,G,cT,Q,&Ac,I,&T,y,s,&tk,1,1);
	     I.V[tk]=k;/*tk++;*/ /* prevents a step of zero to the sth constraint */
	     s=step(coninfile,confn,Q,Ain,Af,I,pk,y,b,tk);/*tk--;*/
	     if (s>=0) delog.V[s]+=1.0;
      }
      else
      { if (fullZ)
        { Zp=(*Z)=initmat(G.r,G.c-tk);
	       for (i=0;i<Z->r;i++) for (j=0;j<Z->c;j++)
	       Z->M[i][j]=Q.M[i][j];
        } else /* efficient Z storage */
        { Zp=(*Z)=initmat(G.r,G.c);
          u=initmat(Ac.r,Ac.c);
          for (i=0;i<Ac.r;i++) for (j=0;j<Ac.c;j++) u.M[i][j]=Ac.M[i][j];
          QT(*Z,u,0);Z->r=Ac.r;
          freemat(u);
        }
	     /*freemat(Ac);freemat(R); no longer deleted to allow recalc. with new c */
	     freemat(gk);freemat(pk);
	     if (conon)
	     feasibility(coninfile,confn,Ain,y,b,maxviol);
	     if (op) {printf("P");fflush(stdout);}
#ifdef OS2
	     _heapmin();
#endif
        /* The following packs the redundent rows and columns used for ZGZ storage
           with zeros. This can be useful later when obtaining Influence matrices */

	     return(tk-Af.r);
      }
    } else
    { searchdirection(gk,pk,R,G,Q,cT,y,tk,1);
      if (conon) s=step(coninfile,confn,Q,Ain,Af,I,pk,y,b,tk);
      else for (i=0;i<y.r;i++) y.V[i]+=pk.V[i];
      if ((conon)&&(s>=0)) delog.V[s]+=1.0;
    }
    if ((s>=0)&&(conon))
    { if (op) {printf("+");fflush(stdout);}
      addconstraint(coninfile,confn,ZGZ,Ain,Af,&Ac,I,&R,&Q,&T,s,&tk,1);
      constrainedmin=0;
    } else constrainedmin=1;
  }
}




matrix getprojectedgradient(gk,Q,tk) matrix gk,Q;int tk;

/* returns a vector pz containing the gradient gk projected into the current
   null space, whose basis is given by the the first Q.c-tk columns of Q */

{ int i,j;
  matrix pz;
  pz=initmat(Q.c-tk,1L);
  for (i=0;i<Q.c-tk;i++)
  for (j=0;j<Q.r;j++) pz.V[i]+=Q.M[j][i]*gk.V[j]; /* Z'gk */
  return(pz);
}


void NonLindirection(Q,J,W,S,M,f,Y,p,pk,ro,lam,tk,QNcorr)
matrix Q,J,W,S,M,f,Y,p,pk;double ro,lam;long tk;int QNcorr;

/* produces a stabilised version of the search direction for NonLinLS:
   Let e=Y-f-Kp then....
   Algorithm as follows: The search direction is given by the solution of
   Z'(J'WJ+ro*S+M)Zp=Z'J'We - If J is badly conditioned J'WJ is evily
				   conditioned so....
   1) Find K such that J'WK= ro*S - can be done with a sneaky orthogonal
				    factorisation -see notinv() in matrix.c
   2) Find L such that J'WL=M     -      ditto
   3) Now the original system becomes:
      Z'J'W((J+K+L)Zp-e)=0
   4) The system in (2) is compatible and thus soluble by minimisation of
      ||(J+K+L)Zp-e|| for p.... This should be done by another orthogonal
			      factorization - leastsq() in matrix.c
			      OR using truncated SVD svdLS in matrix.c - the latter
			      estimates the rank of (J+K+L)Z and if it is possibly
			      not full rank, seeks the minimum length p.
   5) pk=Zp.
   ... fix applied 12/8/96 to include gradient due to smoothness on rhs of
   original system.
   - The matrix W may either be a vector of weights or a full inverse
     covariance matrix. The routine tests whether W.c=1L to decide.
*/

{ matrix JKZ,JW,B,L,K,e;
  long i,j;
  /* first form B=Y.r*ro*S and J'W - then find K */
  K=initmat(J.r,J.c);e=initmat(Y.r,1L);L=initmat(J.r,J.c);
  if (QNcorr||((ro!=0.0)&&(S.r)))
  { JW=initmat(J.c,J.r);
    if (W.c==1L)
    for (i=0;i<J.c;i++) for (j=0;j<J.r;j++) JW.M[i][j]=J.M[j][i]*W.V[j];
    else matmult(JW,J,W,1,0);
    if (QNcorr) /* get L so that J'WL=M */
    { notinv(JW,L,M);/* have to reform J'W */
      if (W.c==1L)
      for (i=0;i<J.c;i++) for (j=0;j<J.r;j++) JW.M[i][j]=J.M[j][i]*W.V[j];
      else matmult(JW,J,W,1,0);
    }
    if ((ro!=0.0)&&(S.r)) /* don't bother if ro zero */
    { B=initmat(S.r,S.c);
      for (i=0;i<S.r;i++) for (j=0;j<S.c;j++) B.M[i][j]=Y.r*ro*S.M[i][j];
      notinv(JW,K,B);
      freemat(B);
      matmult(e,K,p,0,0);
    }
    freemat(JW);
  }
  for (i=0;i<Y.r;i++) e.V[i]=Y.V[i]-f.V[i]-e.V[i];
  /* Now form (J+K)Z and -f to feed into leastsq() */
  Q.c-=tk; /*can be used as Z*/
  for (i=0;i<J.r;i++) for (j=0;j<J.c;j++) K.M[i][j]+=J.M[i][j]+L.M[i][j];
  JKZ=initmat(K.r,Q.c);
  matmult(JKZ,K,Q,0,0);
  p=initmat(JKZ.c,1L);
  svdLS(JKZ,p,e,1.0e-7);
  matmult(pk,Q,p,0,0);
  Q.c+=tk;
  freemat(JKZ);freemat(p);freemat(K);freemat(e);freemat(L);
}

double brent(F,ap,anew,prob_dat,fdum,J,G,Y,W,S,Q,A,Af,I,bc,pk,gk,ro,tk,gotone,sc,f0,tol)
int (*F)(matrix,matrix,matrix,matrix,matrix,void*,int,double);
matrix ap,anew,fdum,J,G,Y,W,S,Q,A,Af,I,bc,pk,gk;
void *prob_dat;
double ro,f0,tol;
long tk,*sc;
int *gotone;

/* this routine aims to mimise F() along the line of pk from a. 
   prob_dat is a void pointer to be passed to F() which points to a problem specific 
   structure required by F()

   On exit *gotone == 1 to indicate success, 0 to indicate failure and -1 to indicate user 
   termination. User termination is signalled from F() via objective() returning *minimum==-2
   Since this routine allocates no memory exiting on user termination is straightforward.

*/

{ static double xa,xb,xc,fa,fb,fc,a,b,d,etemp,fu,fv,fw,fx,p,q,r,tol1,tol2,u,v,w,x,xm,
	 e=0.0,Cgold=0.3819660,zeps=1.0e-10,z,length=1.0; // static for DEBUG
  long i,s;
  int iter,ok,qq;
  char str[200];
  matrix dum;
  dum.r=0L;dum.c=0L;
/* first deal with possibility that step has zero length */
  length=enorm(pk);
  if (length<1.0e-10*enorm(ap))
  { *sc=-1L;*gotone=0;
    return(f0);
  }
/* the next task is to take a unit step along pk */
  for (i=0;i<ap.r;i++) anew.V[i]=ap.V[i];
  s=step(0,"",Q,A,Af,I,pk,anew,bc,tk); /* takes a step to a+pk or the nearest constraint */
  if (s>=0) for (i=0;i<ap.r;i++) pk.V[i]=anew.V[i]-ap.V[i];  /* adjusts pk to the step actually taken */
  if (enorm(pk)<1.0e-10*enorm(ap))  /* checking that not trying to step straight through con */
  { *sc=s;*gotone=1;
    return(f0);
  }
  fb=objective(F,fdum,J,dum,anew,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&qq);    /* was the step any good? */
  if (qq==-2) { *gotone=-1;return(1e300);} // signal user stop
  if (fb>=f0) /* step was no use so it must be contracted */
  { xb=1.0;xa=0.0;fa=f0;s=-1L;
    while ((fb>=fa)&&(xb>0.00000001))
    { xc=xb;fc=fb;xb/=1.2;
      for (i=0;i<pk.r;i++) { pk.V[i]/=1.2;anew.V[i]=pk.V[i]+ap.V[i];}  /* this step is less than the initial step => can not hit a constraint */
      fb=objective(F,fdum,J,dum,anew,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&qq);
      if (qq==-2) { *gotone=-1;return(1e300);} // signal user stop
    }
    for (i=0;i<pk.r;i++) pk.V[i]*=1.2; /* this is for consistancy - so that pk contains step to point c */
    if (fb<fa) ok=1; else ok=0;  /* signal to proceed to brent search */
  } else  /* fb < f0 so continue downhill until a constraint or bracket minimum */
  { xa=0.0;fa=f0;xc=1.0;fc=fb;ok=1;
    while ((fc<=fb)&&(ok)) /* still heading downhill and not hit constraint */
    { if (s>=0)   /* then still heading downhill when last constraint encountered */
      { xb=0.999*xc+0.001*xa;
	     for (i=0;i<pk.r;i++) anew.V[i]=ap.V[i]+xb*pk.V[i]/xc; /* contracting step slightly hoping for min before constraint */
	     fb=objective(F,fdum,J,dum,anew,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&qq);
         if (qq==-2) { *gotone=-1;return(1e300);} // signal user stop 
	     if ((fc<=fb)||(xc==0.0))
	     { ok=0;   /* signalling that a step to a constraint is required */
	       for (i=0;i<pk.r;i++) anew.V[i]=ap.V[i]+pk.V[i];
        }
      } else      /* no constraints yet, expand the step */
      { xb=xc;fb=fc;xc*=2.0;
	     for (i=0;i<pk.r;i++) { pk.V[i]*=2.0;anew.V[i]=ap.V[i];}
	     s=step(0,"",Q,A,Af,I,pk,anew,bc,tk);
	     if (s>=0)
	     { z=enorm(pk);
	       for (i=0;i<ap.r;i++) pk.V[i]=anew.V[i]-ap.V[i];
	       xc*=enorm(pk)/z;
	     }
	     fc=objective(F,fdum,J,dum,anew,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&qq);
         if (qq==-2) { *gotone=-1;return(1e300);} // signal user stop 
        if (xc<=xb) { fb=fc;xb=xc;} // prevents rounding error invalid triplets
      }
      if (fc<=fb) /* still not a valid triplet */
      { xa=xb;fa=fb;}
    }
  }
  /* by this stage either ok==1 in which case there should be a valid triplet
     and Brents method should be applied, or ok==0: if there s>=0 then step to
     constraint, otherwise, failed to find a minimum */
  if (!ok)  /* returning straight away */
  { if (s>=0)       /* stopped by a constraint */
    { *sc=s;        /* the constraint number */
      *gotone=1;    /* signals a downward step */
      return(fc);   /* returning the new low */
    } else          /* could not reduce the objective */
    { *sc=-1L;        /* no constraint */
      *gotone=0;    /* signals NO downward step */
      return(f0);   /* returning the old low */
    }
  } else   /* line minimisation by Brents method (after Press et al. 1988) */
  { if ((fa<fb)||(fc<fb)||(xa>xb)||(xb>xc))
    { sprintf(str,
"Brent search initial triplet invalid\n\
 fa-fb=%g  fc-fb=%g\n\
 xc-xb=%g  xb-xa=%g",fa-fb,fc-fb,xc-xb,xb-xa);
      ErrorMessage(str,0); // DEBUG: error non-fatal to allow follow up
    }  /* checking that there is a valid triplet with which to start brent */
    x=w=v=xb;
    fw=fv=fx=fb;
    a=xa;b=xc;
    for (iter=1;iter<100;iter++)
    { xm=0.5*(a+b);
      tol2=2.0*(tol1=tol*fabs(x)+zeps);
      if (fabs(x-xm) <= (tol2-0.5*(b-a))) /* testing if finished */
      {
	    /* final return values */
	    if (xc==x) *sc=s; else *sc=-1L;
	    *gotone=1;
	    for (i=0;i<pk.r;i++) anew.V[i]=ap.V[i]+pk.V[i]*x/xc;
	    for (i=0;i<pk.r;i++) pk.V[i]=anew.V[i]-ap.V[i];
	    length=enorm(pk)/length;
	    return(fx);
      }
      if (fabs(e) >tol1)   /* then fit a parabola to the best so far and minimise */
      { r=(x-w)*(fx-fv);
	     q=(x-v)*(fx-fw);
	     p=(x-v)*q-(x-w)*r;
	     q=2.0*(q-r);
	     if (q>0.0) p = -p;
	     q=fabs(q);
	     etemp=e;e=d;
	     /* now test whether or not parabola will do */
	     if (fabs(p) >= fabs(0.5*q*etemp) || p <= q*(a-x) || p >= q*(b-x))
	     d=Cgold*(e=(x >= xm ? a-x : b-x ));    /* golden section */
	     else    /* parabolic */
	     { d=p/q;
	       u=x+d;
	       if (u-a < tol2 || b-u < tol2)
	       { if ((xm-x)>0.0) d=fabs(tol1); else d= -fabs(tol1);}
	      }
      } else
      { d=Cgold*(e=(x>= xm ? a-x : b-x ));
      }
      u=(fabs(d) >= tol1 ? x+d : x+ (d>0.0 ? fabs(tol1) : - fabs(tol1)));
      for (i=0;i<pk.r;i++) anew.V[i]=ap.V[i]+pk.V[i]*u/xc;
      fu=objective(F,fdum,J,dum,anew,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&qq);
      if (qq==-2) { *gotone=-1;return(1e300);} // signal user stop
      if (fu<=fx)
      { if (u>= x) a=x; else b=x;
	     v=w;w=x;x=u;
	     fv=fw;fw=fx;fx=fu;
      } else
      { if (u<x) a=u; else b=u;
	     if (fu <=fw || w==x )
	     { v=w;w=u;fv=fw;fw=fu;}
        else if (fu<= fv || v==x || v==w)
	     { v=u;fv=fu;}
      }
    }
  }
  if (xc==x) *sc=s; else *sc=-1L;
  *gotone=1;
  for (i=0;i<pk.r;i++) anew.V[i]=ap.V[i]+pk.V[i]*x/xc;
  for (i=0;i<pk.r;i++) pk.V[i]=anew.V[i]-ap.V[i];
  return(fx);
}

int NonLinLS(F,J,Z,f,a,prob_dat,W,S,Y,A,Af,b,ro,maxviol,reset)
int (*F)(matrix,matrix,matrix,matrix,matrix,void*,int,double);
matrix J,*Z,f,a,W,S,Y,A,Af,b;double ro,*maxviol;void *prob_dat;
int reset;

/* This routine is intended to:
			       2
   minimise Sum W ( Y - F (a) )  + n*ro*a'Sa
		 i   i   i
   Subject to Aa>=b & Af a = const
   Where F is a non-linear function of the parameter vector a and it is
   possible to calculate:
	_                            _
       | dF /da   dF /da  .  .  .  .  |
       |   0   0    0   1             |
       | dF /da   dF /da  .  .  .  .  |
    J= |   1   0    1   1             |
       |                              |
       |   .        .                 |
       |   .        .                 |
       |_  .        .                _|

  The user must supply the routine
  void F(f,J,a,afix,getJ) matrix f,J,a,afix;int getJ;
  which returns the model values in f and if getJ!=1 the jacobian in J, using
  the parameters in a and afix.

  The Parameters
  F      pointer to function F described above.
  Coa    this is a matrix which will contain the posterior covariance matrix
	 of the parameter vector DIVIDED BY the error variance. It should be
	 initialised as an a.r by a.r matrix.
  J      this is a matrix containing the Jacobian on exit
  Z      pointer to the current null space basis. 
  
  f      vector of model approximations to Y.
  a      vector of variable parameters.
  afix   vector of parameters that aren't being fitted.
  W      weight matrix for the Y's.
  S      quadratic objective matrix (often a smoothness term)
  Y      the data vector.
  A      matrix in the inequality constraints Aa>=b.
  b      see above.
  ro     the 'smoothing parameter' controlling the trade off between the
	 least squares term and the quadratic objective.
  maxviol returns maximum constraint violation
  reset  is no longer used (set to 1 if the constraints have changed or you want to re-start
	        abandoning the old null space.)

  * returns 1 for user termination and 0 otherwise

  * Initializes Z and returns null space basis in it, as a series of Householder rotations 
    from which the null space basis can be efficiently reconstructed. Note that internally
    this routine represetns the null space basis explicitly, i.e. using a matrix the columns
    of which are a basis for the null space of the active constraints.
*/


{ long i,j,s,iter=0L,dels=0,adds=0,lastdeletion=-1L;
  double f0,f1,Tf=1e-6,fmin,fmin0,fmin1,interr,**hisa,*hisf;
  static int iterate=0,hislag=4,hisp;
  int mincount=0,itisamin=0,ok,gotone,maybe,tightfit=0,nosig,nscount=0;
  matrix G,Ac,R,gk,pz,da,dafd,pk,pk0,pk1,anew,anew0,anew1,dum,fdum,amin,M,ZGZ;
  matrix Q,T,I,delog; /* stores number of deletions of a constraint to
				eliminate cycling */
  static long tk=0L;
  dum.M=NULL;dum.r=0L;dum.c=0L;M.r=0L;
  dafd=initmat(a.r,a.c);
  if (tightfit) hislag=10; else hislag=4;
  amin=initmat(a.r,1L);
  if (!feasibility(0,"",A,a,b,maxviol))
  { ErrorMessage("Initial point not feasible.",0);}
  G=initmat(a.r,a.r);gk=initmat(G.r,1L);
  pk=initmat(G.r,1L);pk0=initmat(G.r,1L);pk1=initmat(G.r,1L);
  da=initmat(G.r,1L);fdum=initmat(f.r,f.c);
  anew=initmat(a.r,1L);anew0=initmat(a.r,1L);anew1=initmat(a.r,1L);
  tk=Af.r;

   
  Q=initmat(G.r,G.c);for (i=0;i<Q.r;i++) Q.M[i][i]=1.0;
  T=initmat(a.r,a.r);T.r=tk;
  I=initmat(a.r,1L);
 
  
  delog=initmat(b.r,1L);
  
  for (i=0;i<delog.r;i++) delog.V[i]=0.0;
  Ac.M=NULL;
  getAc(0,"",A,Af,&Ac,I,tk);
  /* if (tk==0)*/
  { for (i=0;i<tk;i++)
    for (j=0;j<a.r;j++) T.M[i][j]=Ac.M[i][j];
    QT(Q,T,1);
  }
  R.M=NULL;
  iterate++;

  fmin=f0=f1=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,1,&itisamin);
  if (itisamin==-2) gotone=-1;
  ZGZ=initmat(a.r-tk,a.r-tk);
  /* initialising ring buffers to store lagged parameter vectors and objectives
     for use in termination criteria */
  hisa=(double **)calloc((size_t)hislag,sizeof(double *));
  for (i=0;i<hislag;i++)
  hisa[i]=(double *)calloc((size_t)a.r,sizeof(double));
  hisf=(double *)calloc((size_t)hislag,sizeof(double));
  hisp=0;
  for (i=0;i<a.r;i++) hisa[hisp][i]=a.V[i];hisf[hisp]=fmin;
  /***************************************************************************/
  /*  Start of fitting .......                                               */
   /***************************************************************************/
  while(itisamin!=-2) // i.e. while user not signalled to stop
  { iter++;
    feasibility(0,"",A,a,b,maxviol);gotone=0;

    /* finding GN direction first */

    NonLindirection(Q,J,W,S,M,f,Y,a,pk0,ro,0.0/*lam*/,tk,0);
    fmin0=brent(F,a,anew0,prob_dat,fdum,J,G,Y,W,S,Q,A,Af,I,b,pk0,gk,ro,tk,&gotone,&s,f0,0.001);
    if (gotone==-1) // user stop
    { break;
    }  
    ok=gotone;

    /* Now try steepest descent */
    pz=getprojectedgradient(gk,Q,tk);
    Q.c-=tk;matmult(pk1,Q,pz,0,0);Q.c+=tk;
    freemat(pz);
    for (i=0;i<pk.r;i++) pk1.V[i]*=-1.0; /* DOWNhill ! */
    fmin1=brent(F,a,anew1,prob_dat,fdum,J,G,Y,W,S,Q,A,Af,I,b,pk1,gk,ro,tk,&gotone,&s,f0,0.001);
    if (gotone==-1) // user stop
    { break;
    }
    if (ok||gotone==1) gotone=1;
    /* Which went lower ? */
    if (fmin0<fmin1) /* GN */
    { fmin=fmin0;for (i=0;i<pk.r;i++) { anew.V[i]=anew0.V[i];pk.V[i]=pk0.V[i];}}
    else    /* Steepest descent */
    { fmin=fmin1;for (i=0;i<pk.r;i++) { anew.V[i]=anew1.V[i];pk.V[i]=pk1.V[i];}}
    /* again if gotone=1 then amin contains best parameters, otherwise a */

    if (!gotone)
    { if (enorm(pk)) s=-1;
      for (i=0;i<a.r;i++) anew.V[i]=a.V[i];
    }

    if ((!gotone)||(fmin-f0<=0.0)||(tk==a.r))
    { if (s>=0)
      { addconstraint(0,"",&ZGZ,A,Af,&Ac,I,&R,&Q,&T,s,&tk,0);
        delog.V[s]+=1.0;adds++;
	     if (lastdeletion==s) /* some evidence of cycling */
	     if (itisamin) delog.V[s]+=DELMAX+200.0; /* cycling at proper minimum */
	     else delog.V[s]+=DELMAX+10.0; /* cycling only on approx min */
      }
      for (i=0;i<a.r;i++)
      { da.V[i]=anew.V[i]-a.V[i];
	     a.V[i]=anew.V[i];
      }
      /* ..... have to get Jacobian NOTE: should not be needed if
	 constraint deleted and (!gotone) */
      f1=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,1,&itisamin);
      if (itisamin==-2) { gotone=-1;break;} // user stop 
     /* because of tolerance proportionality in the solution it is possible to
	 estimate the error in the objective resulting from integration error.
	 This quantity gives a lower bound on the change in objective which can
	 be counted as `improvement'. I have simply divided the tolerance by
	 10 and differenced the objective to get this error - of course a
	 correction of the order of 10% might be applied to this - but the order
	 of magnitude is all that matters here.... */
      itisamin=-1; // signal a 10 times more accurate evaluation
      fmin0=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&itisamin);
      if (itisamin==-2) { gotone=-1;break;} // user stop 
      interr=fabs(f1-fmin0); //.... use it to estimate error in objective
      //if (itisamin) itisamin=0; /*DEBUG ONLY */
      /* is drop significant ? */
      if (f0-f1<interr) { if (s<0) { nosig=1;nscount++;} else nosig=0;}
      else { nosig=0;nscount=0;}

      j=rank(J);

      if (itisamin) itisamin=2;
      /* update history buffers */
      if (s<0)   /* don't include steps to constraints */
      { hisp++; if (hisp==hislag) hisp=0;
	     for (i=0;i<a.r;i++) hisa[hisp][i]=a.V[i];hisf[hisp]=fmin;
      }

      if ((!itisamin)&&(s<0))
      { pz=getprojectedgradient(gk,Q,tk);
      /* Now test for minimum (in current space) */
	     ok=1;
	     for (i=0;i<da.r;i++) if (fabs(da.V[i])>sqrt(Tf)*fabs(a.V[i])) ok=0;
	     if ((ok)&&(f0-f1<Tf*(1.0+fabs(f1)))&&
	        (((!tk)&&(enorm(gk)<=pow(Tf,1.0/3.0)*(1.0+fabs(f1)) ))||
	        ((tk)&&(enorm(pz)<=pow(Tf,1.0/3.0)*enorm(gk)) ) ) )
	     itisamin=3; else itisamin=0;
        freemat(pz);
	     /* criteria based on minimal decrease over last hislag steps */
	     if (!tightfit)
	     { i=hisp+1;if (i==hislag) i=0; /* i is position of earliest a and f in buffers */
	       pz=initmat(a.r,1L);
          for (j=0;j<a.r;j++) pz.V[j]=a.V[j]-hisa[i][j];
	       if ( (enorm(pz)<0.0005*enorm(a)) && (fabs(hisf[i]-fmin)<0.0005*fabs(fmin)) ) /*insufficient progress over hislag steps*/
	       itisamin=4;
	       freemat(pz);
	       if ((!itisamin)&&(s<0))
	       { itisamin=5;
	         for (j=0;j<a.r;j++) if (fabs(dafd.V[j])<fabs(pk.V[j])) itisamin=0;
	       }
	     }

	/* above implements test that if every element of step is smaller than
	   every corresponding finite difference interval, then we may as well
	   stop - requires da to be returned from objective() - if every element
	   of da is zero then the condition is never met. Steps to constraints
	   are not tested, for obvious reasons.
	*/
      }
      if (!gotone) itisamin=1;
      qpoutdata.obj=fmin;qpoutdata.obj_change=f0-fmin;
      qpoutdata.constraints=tk;
      if (((fabs(f1-f0)<1e-11*fabs(f0+f1)))&&(s<0))
      mincount++;
      if (mincount>4) itisamin=6;
      f0=f1;
      /* If convergence is slowing down and there are constraints in the active
	 set, then it is worth getting approximate lagrange multipliers, before
	 full convergence criteria are met, since there is no point polishing
	 a minimum that will change with the deletion of an active constraint -
	 this is what the following code tries to establish - if maybe ends up
	 at 1 then the minimum in the current space is deemed `good enough'
	 to try working out LMs - note that the estimates are only linear and
	 the usual problems of estimating LMs away from the `true' minimum apply */
      if (!itisamin)
      { maybe=1; /* less rigorous test for minimum to allow approx LM estimates */
	     for (i=0;i<da.r;i++) if (fabs(da.V[i])>0.005*fabs(a.V[i])) maybe=0;
	     if ((fabs(f1-f0)>1e-5*fabs(f0+f1))) maybe=0;
	     if (tk==0) maybe=0;
	     if (s>=0) maybe=0;
      }
      if (nscount>3) itisamin=7;
      if ((!gotone)||(itisamin)||(tk==a.r)||(maybe)||nosig)
      { mincount=0;
	     if (itisamin) /* its a bonafide minimum, so unfix constraints that cycled, moved from after deleteconstraint 19/8/96 */
	     for (i=0;i<delog.r;i++)
	     if ((delog.V[i] > DELMAX + 10.0)&&(delog.V[i]< DELMAX +200.0))
	     delog.V[i] -= DELMAX + 10.0;
	     s=lagrange(gk,Q,T,G,dum,a,I,delog,Af,0); /* lo, a minimum */
	     if (s>=0)
	     { /* make a note of the deleted constraint */
	       lastdeletion=(long)round(I.V[round(s)]);dels++;
          deleteconstraint(&ZGZ,gk,pk,&R,G,dum,Q,&Ac,I,&T,a,s,&tk,0,0);
	     } else
	     if (itisamin)  /* full blown minimum, not just a trial of L-Ms */
	     { break; // leave while loop 
	     }
      }
    }
  }
  // Get null space in compact form suitable for efficient calcualtion outside routine
  (*Z)=initmat(Ac.c,Ac.c);
  QT(*Z,Ac,0);Z->r=Ac.r; 
  if (gotone==-1) // then user has signalled stop
  { j=1;
  } else   
  { j=0;
  }
  freemat(ZGZ);freemat(Q);freemat(T);freemat(I);
  freemat(amin);freemat(dafd); 
  freemat(fdum);freemat(delog);
  freemat(anew);freemat(anew0);freemat(anew1);freemat(Ac);freemat(da);
  freemat(gk);freemat(pk);freemat(pk0);freemat(pk1);freemat(G);
  for (i=0;i<hislag;i++) free(hisa[i]);free(hisa);free(hisf);
  return(j);
}

int CQN(F,J,Z,f,a,prob_dat,W,S,Y,A,Af,b,ro,maxviol,reset)
int (*F)(matrix,matrix,matrix,matrix,matrix,void*,int,double);
matrix J,*Z,f,a,W,S,Y,A,Af,b;double ro,*maxviol;void *prob_dat;
int reset;
/* Constrained Quasi-Newton fitter. Operates by maintaining the choleski
   decomposition of the approximate Hessian LL'=Z'BZ. Algorithm as follows:
   Variation 1:
   1. On constraint addition:
      i) Update Lz (the choleski factor of the current Approx Hessian)
	     in the space Z that is not yet orthogonal to the new active constraint.
	     Only do this if +ve definiteness can be maintained.
      ii) Update Lz by constraint addition.
   2. On constraint deletion update Lz by expanding by one row and column
      and adding a one in the last place on the leading diagonal.
   Variation 2, as variation 1 with the additional steps:
   1. On constraint addition update L, the choleski factor of the unconstrained
      Hessian.
   2. On deletion update Lz by augmentation consistent with the unconstrained
      Hessian approximation.

   Variation 1 is all that's implemented so far.......

   * Returns 1 on user termination, 0 otherwise

   * Initialises  Z (which must be freed elsewhere). Internally Z is stored as a 
     full matrix (i.e. its columns are a  basis for the null space of the 
     constraints). BUT on exit Z contains a sequence of Householder rotations for 
     reconstructing this null space efficiently.
*/

{ matrix at,dafd,G,Q,gk0,gk,pk,L,anew1,anew,I,fdum,sBs,u,v,dum,Ac,R,T,delog,pz,ZGZ;
  int i,j,iter,itisamin=0,gotone,decrease=1,notsig=0,nscount=0,trydown,update=1,
      iniJWJrS=0;
  long tk=0L,s=-1L;
  double f0,f1,sp,interr,fmin0;
  char errs[100];
  static int first=1;
  dum.M=NULL;dum.r=0L;dum.c=0L;
  ZGZ=initmat(a.r,a.r);
  G=initmat(a.r,a.r); /* dummy in this analysis */
  v=initmat(a.r,1L);
  /* Initialise constraint index vector etc.... */
  I=initmat(a.r,1L);delog=initmat(A.r,1L);
  gk=initmat(a.r,1L);pk=initmat(a.r,1L);anew=initmat(a.r,1L);
  fdum=initmat(f.r,f.c);gk0=initmat(a.r,1L);anew1=initmat(a.r,1L);
  dafd=initmat(a.r,1L); /* holds finite differences used for gradients */

  pz=initmat(a.r,1L);sBs=initmat(1L,1L); /* used in BFGS update */
  Q=initmat(a.r,a.r); /* initial columns are the basis of the null space */
  for (i=0;i<Q.r;i++) Q.M[i][i]=1.0;
  tk=Af.r;
  /* It's best to start the Quadratic approximation as I + ro * S, in order
     to make best of use of the available information, or to start using
     J'WJ+ro*S - BUT: this can lead to indefinite initial Hessian problems*/
  if (iniJWJrS)
  { f0=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,1,&itisamin);
    if (itisamin==-2) gotone=-1;
  } 
  else
  for (i=0;i<G.r;i++)
  { if (ro>0.0) for (j=0;j<G.c;j++) G.M[i][j]=ro*S.M[i][j];
    G.M[i][i]+=1.0;
  }

  for (i=0;i<G.r;i++) for (j=0;j<G.c;j++) ZGZ.M[i][j]=G.M[i][j];

  // start QP insert to deal with fixed constraints
  Ac.M=NULL;
  getAc(0,"",A,Af,&Ac,I,tk);

  T=initmat(a.r,a.r);T.r=tk;
  T.r=0L;at.r=1L;at.c=Ac.c;
  for (i=0;i<tk;i++)
  { at.V=Ac.M[i];
    T=addconQT(&Q,T,at,&u);
    QFHmult(ZGZ,u);
    freemat(u);
    ZGZ.r--;ZGZ.c--;
  }
  /* initialise vector containing choleski factor of the Approx. Hessian */
  L=initmat(ZGZ.r,ZGZ.r);
  /* choleski factor of the Quadratic approximation matrix */
  choleski(ZGZ,L,0,0);
  u=initmat(a.r,1L);

  // end QP code

  /* check whether initial parameters meet the constraints */
  if (!feasibility(0,"",A,a,b,maxviol))
  { ErrorMessage("Initial point not feasible.",-1);}

  /* set up initial active set - old version before fixed constraints allowed */

  R.M=NULL;/*T=initmat(0L,a.r);*/
  if (gotone!=-2) // then user not stopped
  for (iter=0;iter<1000;iter++)
  { matrixintegritycheck();
    if (decrease) i=1; else i=0;
    // next evaluate the error in the objective
    itisamin=0;
    if (iter>0||iniJWJrS==0)
    f0=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,(int)i,&itisamin);
    if (itisamin==-2) // user stop
    { gotone=-1; 
      break;
    } 
    itisamin=-1; // signal that increased accuracy required....
    fmin0=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,0,&itisamin);
    if (itisamin==-2) // user stop
    { gotone=-1; 
      break;
    } 
    interr=fabs(f0-fmin0); // .... to work out accuracy (usually integration accuracy)
    qpoutdata.obj=f0;
    qpoutdata.constraints=tk;
    if ((iter)&&update) /* BFGS update (in current Null space) */
    { Q.c -= tk; /* treat as Z */
      pz.r -= tk;
      matmult(pz,Q,pk,1,0);
      multi(4,sBs,pz,L,L,pz,1,0,1,0);
      mad(v,gk,gk0,1.0,-1.0);
      u.r -= tk;
      matmult(u,Q,v,1,0); /* u contains projected gradient difference */
      v.r -=tk;
      multi(3,v,L,L,pz,0,1,0);
      sp=dot(u,pz);
      if (sp<=0.0)
      { sprintf(errs,"CQN -ve sp = %g\n",sp);  /* => update not +ve definite */
        // ErrorMessage(errs,0);
      }
      else
      { choleskir1ud(L,u,1.0/sp);
	     choleskir1ud(L,v,-1.0/sBs.V[0]);
      }
      v.r+=tk;u.r+=tk;Q.c+=tk;pz.r+=tk;
    }
    if (s>=0)   /* add an inequality constraint to the working set */
    { addconstraint(0,"",&ZGZ,A,Af,&Ac,I,&L,&Q,&T,s,&tk,1);
    }
    if (itisamin || !decrease || notsig)
    { s=lagrange(gk,Q,T,G,dum,a,I,delog,Af,0);
      if (s>=0)
      { deleteconstraint(&ZGZ,gk,pk,&L,G,dum,Q,&Ac,I,&T,a,s,&tk,0,1);
	     for (i=0;i<L.c-1;i++) L.M[L.r-1][i]=0.0;
	     L.M[i][i]=1.0;  /* update suggested in GMW */
      }
      else
      if (itisamin||!decrease||(nscount>3)) break;
    }
    Q.c -= tk; /* sets Q to Z - the null space column basis  */
    u.r -= tk; /* using this for the projected gradient */

    matmult(u,Q,gk,1,0);
    for (i=0;i<u.r;i++) u.V[i]*=-1;
    v.r -= tk; /* used to store direction in the null space */
    choleskisolve(L,v,u);
    matmult(pk,Q,v,0,0);
    v.r += tk;u.r += tk;Q.c += tk;
    for (i=0;i<gk.r;i++) gk0.V[i]=gk.V[i];
    /* pk now contains the search direction */
    f1=brent(F,a,anew,prob_dat,fdum,J,G,Y,W,S,Q,A,Af,I,b,pk,gk,ro,tk,&gotone,&s,f0,0.001);
    if (gotone==-1) break;
    qpoutdata.obj_change=f0-f1;
    qpoutdata.obj=f1;
    qpoutdata.constraints=tk;
    notsig=0;decrease=0;
    if (interr<fabs(f0+f1)*1e-9) interr=fabs(f0+f1)*1e-9;
    if (f1<f0)
    { if (f0-f1<=interr)
      { update=0;if (s<0) { notsig=1;nscount++;}} /* drop was less than integration error */
      else { nscount=0;update=1;}
      f0=f1;decrease=1;
      trydown=0;
    } else { trydown=1;update=0;}
    /* try steepest descent */
    if (!s&&(notsig||trydown))
    { Q.c -= tk;
      pz.r -=tk;
      matmult(pz,Q,gk,1,0);
      matmult(pk,Q,pz,0,0);
      for (i=0;i<pk.r;i++) pk.V[i]*=-1;
      Q.c +=tk;
      pz.r+=tk;
      f1=brent(F,a,anew1,prob_dat,fdum,J,G,Y,W,S,Q,A,Af,I,b,pk,gk,ro,tk,&gotone,&s,f0,0.001);
      if (gotone==-1) break;
      if (f1<f0)
      { if (f0-f1<=interr) /* drop was less than integration error */
	     { update=0;if (s<0) { notsig=1;nscount++;}}
	     else { nscount=0;notsig=0;update=1;}
	     f0=f1;decrease=1;
	     for (i=0;i<a.r;i++) anew.V[i]=anew1.V[i];
      } else
      update=0;
    }
    if (update)
    { update=0; /* check that step significant relative to f.d.s */
      for (i=0;i<pk.r;i++)
      if (fabs(pk.V[i])>4.0*fabs(dafd.V[i])) { update=1;break;}
    }
    if (s>-1) decrease=1;
    if (decrease)
    for (i=0;i<a.r;i++) a.V[i]=anew.V[i];
  }
  /* load up Z and ZGZ */
  // get Z as sequence of householder rotations....
  QT(Q,Ac,0);Q.r=Ac.r; 
  *Z = Q;
  // this is a final run to get a final J and force output
  if (gotone!=-1)
  { f0=objective(F,f,J,dafd,a,prob_dat,G,Y,W,S,Q,gk,ro,0.0,tk,2,&itisamin);
    if (itisamin==-2) gotone=-1;
  }  

  if (gotone==-1) // user stop
  { j=1;
  } else
  { 
    j=0;
  }
  freemat(ZGZ);
  freemat(G);freemat(v);freemat(u);freemat(pz);freemat(sBs);
  freemat(gk);freemat(pk);freemat(anew);freemat(fdum);freemat(gk0);
  freemat(anew1);freemat(dafd);freemat(L);freemat(I);freemat(delog);
  freemat(T);freemat(Ac);
  return(j);
}





int IRLS(F,J,Z,f,a,prob_dat,W,S,y,A,Af,b,ro,maxviol,reset)
int (*F)(matrix,matrix,matrix,matrix,matrix,void*,int,double);
matrix J,*Z,f,a,W,S,y,A,Af,b;double ro,*maxviol;void *prob_dat;
int reset;

/* Iterative constrained least squares algorithm:
   Aim is to solve

   minimise (y - f(a))'W(y - f(a)) + a'Sa

   Subject to Aa>=b and Af a = const

   where f(a) is some nonlinear function, whose Jacobian can be
   evaluated. Algorithm defines:

   z=y-f+Ja

   where all are evaluated at current a and seeks to minimise

   (z-Ja)'W(z-Ja) +  a'Sa

   subject to the constraints, to find a new vector a. The process is
   iterative.

   * The QP code used is that designed specifically for least squares problems. 

   * On exit the Z contains the null space of the active constraints as a sequanece of 
     Householder rotations stored in Z. Because this routine may have adjusted the step 
     length after the end of the last QP step the active set of constraints is re-checked
     before Z is formed.  
   
   * Returns 1 on user termination, 0 otherwise
    

*/

{ matrix z,dafd,Afp,cT,G,p,Q,gk,anew,fdum,dum,H;
  static matrix ZGZ;  // this is unsatisfactory - need to replace 
  int j,iter,itisamin=0,down,m,off=0,*active;
  long tk,*aliased,n,i,k;
  double f0,f1,fmin=0.0,scale,lam=1.0,xx,yy;
  tk=0L;dum.M=NULL;dum.r=0L;dum.c=0L;
  G=initmat(a.r,a.r);
  Afp=initmat(J.c+Af.r,a.r);       /* Afp allows extra fixed constraints to be added in */
  Afp.r=Af.r;
  for (i=0;i<Af.r;i++) for (j=0;j<Af.c;j++) Afp.M[i][j]=Af.M[i][j];
  anew=initmat(a.r,1L);p=initmat(a.r,1L);
  z=initmat(y.r,1L);gk=initmat(a.r,1L);cT=initmat(1L,a.r);
  Q=initmat(a.r,a.r); /* initial columns are the basis of the null space */
  active=(int *)calloc((size_t)y.r+1,sizeof(int));
  for (i=0;i<Q.r;i++)
  { Q.M[i][i]=1.0;} // required for objective(), but does not contain null space
  fdum=initmat(f.r,f.c);
  dafd=initmat(a.r,1L); /* holds finite differences used for gradients */
  /* initialise vector containing choleski factor of the Approx. Hessian */
  tk=0L;
  for (i=0;i<a.r;i++) anew.V[i]=a.V[i];
  aliased=(long *)calloc((size_t)J.c,sizeof(long));
  down=0;
  for (iter=0;iter<1000;iter++)
  { matrixintegritycheck();
    f0=objective(F,f,J,dafd,anew,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,0,&itisamin);
    if (itisamin==-2) break;
    if (!iter||fmin>f0)
    { fmin=f0;down++; // next redo f,J,G.....
      f0=objective(F,f,J,dafd,anew,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,1,&itisamin);
      if (itisamin==-2) break;
    }
    else // try shorter step
    { scale=1.0;
      for (i=0;i<15;i++)
      { scale*=0.8;
	    for (k=0;k<a.r;k++) p.V[k]=a.V[k]+(anew.V[k]-a.V[k])*scale;
	    f0=objective(F,f,J,dafd,p,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,0,&itisamin);
        if (itisamin==-2) break;
	    if (f0<fmin) { fmin=f0;break;}
      }
      if (fmin==f0)
      { for (i=0;i<a.r;i++) anew.V[i]=p.V[i];
        f0=objective(F,f,J,dafd,anew,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,1,&itisamin);
        if (itisamin==-2) break; 
      }
      else
      { f0=objective(F,f,J,dafd,a,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,0,&itisamin);
        if (itisamin==-2) break;
        break; // can't get any lower, J & G ok but above line needed to get f right !
      }
    }
    for (i=0;i<a.r;i++) a.V[i]=anew.V[i];

    n=alias(J,aliased,1e-7);
    if (n>0) /* then some parameters are aliased */
    { Afp.r=Af.r+n;
      for (i=0;i<n;i++)
      { for (k=0;k<Af.c;k++) Afp.M[Af.r+i][k]=0.0;
	     Afp.M[Af.r+i][aliased[i]]=1.0;  /* fix an aliased parameter */
      }
    } else
    Afp.r=Af.r;
    if (iter) qpoutdata.obj_change=f1-f0;
    qpoutdata.obj=f0;
    qpoutdata.constraints=tk;
    f1=f0;
    matmult(z,J,a,0,0);
    for (i=0;i<z.r;i++) z.V[i]=y.V[i]-f.V[i]+z.V[i]; 
    for (i=0;i<a.r;i++) anew.V[i]=a.V[i];

   // PCLS(matrix *X,matrix *p,matrix *y,matrix *w,matrix *Ain,matrix *b,
   //       matrix *Af,matrix *H,matrix *S,int *off,double *theta,int m,int *active);
   H.r=0L; // signal that influence matrixz is not wanted
   if (S.r) m=1; else m=0;
   PCLS(&J,&anew,&z,&W,&A,&b,&Af,&H,&S,&off,&lam,m,active);
   tk=active[0];

  }
  free(aliased);
  // reform Z, first checking that all constraints in active[] are still active
  *Z=initmat(a.r,a.r);
  for (i=0;i<active[0];i++)
  { // check appropriate constraint
    xx=0.0;yy=enorm(a);
    for (j=0;j<a.r;j++) xx+=A.M[active[i+1]][j]*a.V[j];
    xx += -b.V[active[i+1]]; // amount by which constraint is out
    if (xx<yy*1e-10) // then constraint still active, so add to working set
    { for (j=0;j<a.r;j++) Afp.M[Afp.r][j]=A.M[active[i+1]][j];Afp.r++;}
  }
    // perform QT factorization on active constraint matrix to get Z. 
  QT(*Z,Afp,0);Z->r=Afp.r;
  // Final run for final J and forced o/p
  if (itisamin!=-2)
  { f0=objective(F,f,J,dafd,a,prob_dat,G,y,W,S,Q,gk,ro,0.0,tk,2,&itisamin);}
  if (itisamin==-2) // then it's a user stop
  { j=1;
  } else 
  { j=0;
  } 

  freemat(G);freemat(Afp);freemat(anew);freemat(p);freemat(z);freemat(gk);
  freemat(cT);freemat(dafd);freemat(fdum);free(active);
  return(j);
}






void TotalSmooth(matrix *St,matrix *S,long *off,double *theta,long n,int m)
{ int i,l,j;
  for (i=0;i<St->r;i++) for (j=0;j<St->c;j++) St->M[i][j]=0.0;
  for (l=0;l<m;l++) for (i=0;i<S[l].r;i++) for (j=0;j<S[l].c;j++)
  St->M[off[l]+i][off[l]+j]+=S[l].M[i][j]*theta[l]/n;
}

double slowtrace(matrix X,matrix S)

{ int i,j;
  double tr=0.0;
  matrix XX,XXX,XXXX;
  XX=initmat(X.c,X.c);matmult(XX,X,X,1,0);
  for (i=0;i<XX.r;i++) for (j=0;j<XX.c;j++) XX.M[i][j]+=S.M[i][j];
  invert(&XX);
  XXX=initmat(X.r,XX.r);
  matmult(XXX,X,XX,0,0);
  XXXX=initmat(X.r,X.r);
  matmult(XXXX,XXX,X,0,1);
  for (i=0;i<XXXX.r;i++) tr+=XXXX.M[i][i];
  freemat(XX);freemat(XXXX);freemat(XXX);
  return(tr);
}

void optNLLS(int (*F)(matrix,matrix,matrix,matrix,matrix,void*,int, double),
               matrix J,matrix *Z,matrix f,matrix a,
               void *prob_dat,matrix W,matrix *S,matrix y,matrix A,matrix Af,matrix b,
               int *off,double *trace,double *lam,double *lmax,int m, int method)


/* Routine for iterative least squares fitting with multiple smoothing parameter estimation.

   Aim is to minimise:

   ||W^{0.5} (y-f(a)) ||^2 + \sum \lambda_i a' S_i a

   subject to A a >= b A_f a = constant.

   Algorithm defines pseudodata z=y-f+Ja and solves:

   ||W^{0.5} (z-Ja) ||^2 + \sum \lambda_i a' S_i a

   subject to constraints as well as estimating the \lambda_i's. 
   
   The basic loop to iterate is this:

   1. Estimate updated smoothing parameters given current model parameters

   2. Update model parameters:
      i) given old smoothing parameters.
      ii) given new smoothing parameters.
      ... calculating gcv scores for both.

   3. Accept the smoothing parameter, model parameter pair with the lowest gcv score
      if both are acceptable, acceptable means that model parameter update has not
      increased the objective (given corresponding smoothing parameters). 

   4. Terminate if the update doesn't change the smoothing parameters or model parameters.
 

   Notes:     

   *lmax is a vector of maximum values for the smoothing parameters in lam.
    if lmax[i] <=0.0 then no upper bound is put on lam[i]....

   * Returns *trace==-1 to signal a user termination.

   * Before returning the null space of any current constraints is obtained and put in *Z
     as a series of Householder rotations.

   * objective assumes that S_t= \sum \lambda_i S_i / n where n is number of data, but
     gcv routines and PCLS() do not use this factor of n. The s.p.s returned in lam 
     do not assume that the factor of n is used in the calculation of St. 
*/

{ matrix z,dafd,Afp,G,p,Q,gk,anew,fdum,dum,St,at,H,At,a0,a1,J0,J1,fv0,fv1;
  int l,j,iter,itisamin=0,down,bayesian=1,maxiter=1000,*active,*active0,*active1,stop=0;
  long tk=0L,*aliased,n,i,k,*loff;
  double maxviol,tr,f0,f1,fmin=0.0,*theta,*otheta,sig2=-1 /* signals GCV to multismooth */
  ,dr=0.0,xx,yy,f00,f01,f02,f10,f11,f12,cv0,cv1; /* damping ratio for theta update */
  n=y.r;
  dum.M=NULL;dum.r=0L;dum.c=0L;
  G=initmat(a.r,a.r);
  At=initmat(a.r,a.r); // matrix for holding active set for Z calculation
  *Z=initmat(a.r,a.r); // matrix for holding Z - returned by this routine
  Afp=initmat(J.c+Af.r,a.r);       /* Afp allows extra fixed constraints to be added in */
  Afp.r=Af.r;
  for (i=0;i<Af.r;i++) for (j=0;j<Af.c;j++) Afp.M[i][j]=Af.M[i][j];
  anew=initmat(a.r,1L);p=initmat(a.r,1L);
  z=initmat(y.r,1L);gk=initmat(a.r,1L);
  at=initmat(a.r,1L);a0=initmat(a.r,1L);a1=initmat(a.r,1L);
  J0=initmat(J.r,J.c);J1=initmat(J.r,J.c);
  fv0=initmat(f.r,1L);fv1=initmat(f.r,1L);
  Q=initmat(a.r,a.r); /* initial columns are the basis of the null space */
  for (i=0;i<Q.r;i++)
  { Q.M[i][i]=1.0;}
  fdum=initmat(f.r,f.c);
  dafd=initmat(a.r,1L); /* holds finite differences used for gradients */
  St=initmat(a.r,a.r);  /* total smoothness matrix */
  theta=(double *)calloc((size_t)m,sizeof(double));
  otheta=(double *)calloc((size_t)m,sizeof(double));
  loff= (long *)calloc((size_t)m,sizeof(long));
  for (i=0;i<m;i++) loff[i]=(long)off[i];

  active=(int *)calloc((size_t)a.r+1,sizeof(int)); // storage for list of active constraints
  active0=(int *)calloc((size_t)a.r+1,sizeof(int));
  active1=(int *)calloc((size_t)a.r+1,sizeof(int));
  /* get first attempt at relative weights */
  for (l=0;l<m;l++)
  { for (i=0;i<S[l].r;i++) theta[l]+=S[l].M[i][i];
    otheta[l]=theta[l]=1.0/theta[l];
  }
  /* Do fitting step */
  TotalSmooth(&St,S,loff,theta,n,m);
  /* optional QN step.... not fully debugged  */

  if (method!=2) CQN(F,J,Z,f,a,prob_dat,W,St,y,A,Af,b,1.0/*ro*/,&maxviol,1);
  /* end of optional QN step */

  tk=0L;for (i=0;i<a.r;i++) anew.V[i]=a.V[i];
  f1=fmin=f0=objective(F,f,J,dafd,anew,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
  down=1;
  aliased=(long *)calloc((size_t)J.c,sizeof(long));
  if (itisamin!=-2) // then user not stopped the fit
  for (iter=0;iter<maxiter;iter++)
  { //for (i=0;i<a.r;i++) a.V[i]=anew.V[i];
    // check parameters for evidence of aliasing
    n=alias(J,aliased,1e-7);
    if (n>0) // then some parameters are aliased
    { Afp.r=Af.r+n;
      for (i=0;i<n;i++)
      { for (k=0;k<Af.c;k++) Afp.M[Af.r+i][k]=0.0;
	     Afp.M[Af.r+i][aliased[i]]=1.0;  /* fix an aliased parameter */
      }
    } else
    Afp.r=Af.r;
     
    // form pseudodata for PCLS......
    matmult(z,J,a,0,0);
    for (i=0;i<z.r;i++) z.V[i]= y.V[i]-f.V[i]+z.V[i];
    
    for (i=0;i<a.r;i++) a0.V[i]=a.V[i]; 
    
    // get objective with a0 and old theta
    
    TotalSmooth(&St,S,loff,otheta,y.r,m);
    f00=objective(F,f,J,dafd,a0,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);     
    if (itisamin==-2) break;
    // PCLS(matrix *X,matrix *p,matrix *y,matrix *w,matrix *Ain,matrix *b,
    //       matrix *Af,matrix *H,matrix *S,int *off,double *theta,int m,int *active);      
    H.r=0L; // ensuring H not calculated
   
    PCLS(&J,&a0,&z,&W,&A,&b,&Afp,&H,S,off,otheta,m,active0);
  
    /* try contracting the step to see if min. improves */
    f01=objective(F,f,J,dafd,a0,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);
    if (itisamin==-2) break;
    for (i=0;i<20;i++)
    { for (j=0;j<a.r;j++) at.V[j]=a0.V[j]-(a0.V[j]-a.V[j])*0.05*i;
      f02=objective(F,f,J,dafd,at,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);
      if (itisamin==-2) break;
      if (f02>f01) break;
      f01=f02;
    }

    if (itisamin==-2) break; // user break - so stop altogether

    if (f02>f01&&i)   // set step to best achieved
    { for (j=0;j<a.r;j++) a0.V[j] -= (a0.V[j]-a.V[j])*0.05*(i-1);
    }  // f01 is best achieved by this iterate
    
    if (f01>f00) { mcopy(&a,&a0);f01=f00;} // no improvement by any step
    
    if (iter) // then try step using new theta estimates
    { for (i=0;i<a.r;i++) a1.V[i]=a.V[i]; 
    
      // get objective with a1 and new theta
    
      TotalSmooth(&St,S,loff,theta,y.r,m);
      f10=objective(F,f,J,dafd,a1,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);     
      if (itisamin==-2) break;

      PCLS(&J,&a1,&z,&W,&A,&b,&Afp,&H,S,off,theta,m,active1);
  
      /* try contracting the step to see if min. improves */
      f11=objective(F,f,J,dafd,a1,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);
      if (itisamin==-2) break;
      for (i=0;i<20;i++)
      { for (j=0;j<a.r;j++) at.V[j]=a1.V[j]-(a1.V[j]-a.V[j])*0.05*i;
        f12=objective(F,f,J,dafd,at,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,0,&itisamin);
        if (itisamin==-2) break;
        if (f12>f11) break;
        f11=f12;
      }

      if (itisamin==-2) break; // user break - so stop altogether

      if (f12>f11&&i)   // set step to best achieved
      { for (j=0;j<a.r;j++) a1.V[j] -= (a1.V[j]-a.V[j])*0.05*(i-1);
      }  // f01 is best achieved by this iterate
      
      if (f11>f10) { mcopy(&a,&a1);f11=f10;} // no improvement managed

      if (f11<f10) // then objective improved by step
      { // Now get gcv scores for a0, otheta and a1, theta......
        // a0 first......
        TotalSmooth(&St,S,loff,otheta,y.r,m);
        f01=objective(F,fv0,J0,dafd,a0,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
        if (itisamin==-2) break; // user stop
        for (i=0;i<St.r;i++) for (j=0;j<St.c;j++) St.M[i][j]*=y.r; // St from TotalSmooth divided by y.r
        tr=TrInf(&J,Z,&W,&St,1.0);cv0=0.0;
        for (i=0;i<St.r;i++) for (j=0;j<St.c;j++) St.M[i][j]/=y.r;
        for (i=0;i<y.r;i++) cv0+=W.V[i]*(y.V[i]-fv0.V[i])*(y.V[i]-fv0.V[i]);
        cv0/=(y.r-tr)*(y.r-tr);
        // now a1....
        TotalSmooth(&St,S,loff,theta,y.r,m);
        f11=objective(F,fv1,J1,dafd,a1,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
        if (itisamin==-2) break; // user stop
        for (i=0;i<St.r;i++) for (j=0;j<St.c;j++) St.M[i][j]*=y.r; // St from TotalSmooth divided by y.r
        tr=TrInf(&J,Z,&W,&St,1.0);cv1=0.0;
        for (i=0;i<St.r;i++) for (j=0;j<St.c;j++) St.M[i][j]/=y.r;
        for (i=0;i<y.r;i++) cv1+=W.V[i]*(y.V[i]-fv1.V[i])*(y.V[i]-fv1.V[i]);
        cv1/=(y.r-tr)*(y.r-tr); 
        // choose one with lowest gcv score....
        if (cv1<cv0) // accept a1, theta
        { mcopy(&J1,&J);
          mcopy(&a1,&a);
          mcopy(&fv1,&f); 
          for (i=0;i<active1[0]+1;i++) active[i]=active1[i]; 
          for (i=0;i<m;i++) otheta[i]=theta[i]; 
          f0=f10;f1=f11;
        } else // accept a0, otheta
        { mcopy(&J0,&J);
          mcopy(&a0,&a);
          mcopy(&fv0,&f);
          for (i=0;i<active0[0]+1;i++) active[i]=active0[i];
          f0=f00,f1=f01;
          if (f0==f1) 
          { stop=1;
            for (i=0;i<m;i++) theta[i]=otheta[i]; 
          }
        }
      } else
      if (f01<f00) // accept a0, otheta
      { mcopy(&a0,&a);
        f01=objective(F,f,J,dafd,a0,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
        if (itisamin==-2) break; 
        for (i=0;i<active0[0]+1;i++) active[i]=active0[i];
        f0=f00;f1=f01;
      } else // no improvement, stop.
      { 
        f01=objective(F,f,J,dafd,a,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
        if (itisamin==-2) break;
        stop=1;
        for (i=0;i<m;i++) theta[i]=otheta[i];   
        
      } 
    } else // first iteration, just accept a0
    { mcopy(&a0,&a);
      f01=objective(F,f,J,dafd,a0,prob_dat,G,y,W,St,Q,gk,1.0,0.0,tk,1,&itisamin);
      if (itisamin==-2) break;
      for (i=0;i<active0[0]+1;i++) active[i]=active0[i];
      f0=f00;f1=f01;
    }  
    // Now reform Z, checking which if any constraints are needed. 
    { mcopy(&Afp,&At);At.r=Afp.r;

      for (i=0;i<active[0];i++)
      { // check appropriate constraint
        xx=0.0;yy=enorm(a);
        for (j=0;j<a.r;j++) xx+=A.M[active[i+1]][j]*a.V[j];
        xx += -b.V[active[i+1]]; // amount by which constraint is out
        if (xx<yy*1e-10) // then constraint still active, so add to working set
        { for (j=0;j<a.r;j++) At.M[At.r][j]=A.M[active[i+1]][j];At.r++;}
      } 
      QT(*Z,At,0);Z->r=At.r;
     
    }
    // output stuff   
    qpoutdata.obj_change=f0-f1;
    qpoutdata.obj=f1;
    qpoutdata.constraints=Z->r;
    if (stop) break; // convergence has occured, J, Z, f and theta are consistent   
    /* now re-estimate smoothing parameters  */
   
    for (i=0;i<m;i++) theta[i]=otheta[i];
    matmult(z,J,a,0,0);
    for (i=0;i<z.r;i++) z.V[i]= y.V[i]-f.V[i]+z.V[i]; // pseudodata, revised 
    mcopy(&a,&at);
    MultiSmooth(&z,&J,Z,&W,S,&at,theta,loff,m,&sig2);
    /* following implements damping on theta change - new theta is weighted
       average of MultiSmooth estimate and old value... */
    for (i=0;i<m;i++) theta[i]=exp((1.0-dr)*log(theta[i])+dr*log(otheta[i]));
    for (i=0;i<m;i++) if (lmax[i]>0.0&&theta[i]>lmax[i]) theta[i]=lmax[i]; // constrain s.p. to upper limit
 
  } // This is the end of the iterative loop. Z is correct unless iter==maxiter
  if (iter==maxiter) ErrorMessage("Failure to converge in optNLLS().",0);

 
  if (itisamin==-2) // then user stopped
  { *trace=-1.0;                    
    // NOTE: code needed here
  } else
  { for (i=0;i<m;i++) lam[i]=theta[i];
  }
  free(aliased);free(theta);free(otheta);free(active);
  freemat(Afp);freemat(anew);freemat(p);freemat(z);freemat(gk);
  freemat(dafd);freemat(fdum);freemat(At);freemat(fv0);freemat(fv1);
  freemat(a0);freemat(a1);freemat(J0);freemat(J1);free(active0);free(active1);
}



/***************************************************************************/
/* Update and bug fix notes.                                               */
/***************************************************************************/
/*

1. 4/3/00 - Removal of dead and useless routines. Old version of this file
   is now in qp2.c. Also routines now assume ANSI C. Tested with ddefit.
   optNLLS() left in - not sure it's very useful!

2. IRLS should be modified to use new stable QP routine for least squares 
   problems.

3. 24/4/00 - Replaced vector afix, used by many routines to pass information to 
   F() the model function for many routines, to prob_dat, a void pointer which
   can be used to pass a pointer to a problem specific data structure for use
   by F(). This pointer can then be cast to a pointer of the correct type within 
   F(). 

4. 24/4/00 - Assessment of accuracy of objective() modified, to avoid direct 
   manipulation of afix (or its replacement *prob_dat). 

5. 11/5/00 - Stop handling added - if F() returns 1 this signals user stop
   request. Then objective sets *minimum to -2. Brent handles this by setting
   gotone to -1; NonLinLS(), CQN() and IRLS() return 1 on user stop, 0 otherwise
   optNLLS() sets *trace to -1.0 on user stop.

6. 12/5/00 - CQN, NonLinLS() no longer free Z and ZGZ on entry.

7. PROBLEM QP() still uses static internal matrix allocation in an inconvenient 
   way for really modular code - and in a way that will crash DDEfit!!

8. ZGZ no longer returned from OptNLLS, IRLS, CQN, NonLinLS, but IRLS and OptNLLS 
   are not in a satisfactory condition. 

9. Z return MUST be standardized as either full matrix or list of Householder rotations

10. Z returned as Householder rotations from IRLS and Z forced to be consistent with parameter 
    estimates! Same Z scheme used for NonlinLS, CQN and optNLLS

11. optNLLS updated - Z consistent with active constraints on exit, and algorithm tightened 
    up so that smoothing parameter updates are only accepted if they lead to an improved gcv 
    score relative to the old s.p.s  at the next iterative update of the parameters. 

12. 25/6/00 Bug fixed in LSQPlagrange().... array fixed[] was accessed wrongly: specifically 
    I'd used fixed[i + fixed_cons] instead of fixed[i - fixed_cons]. (The fact that this only
    showed up now does suggest that cycling is pretty rare for this sort of problem!)
  
13. 12/7/00 bug fix in NonLinLS() - Z not initialised on user stop and return non-zero
    on normal termination (which spoils b.s. restarting)! - both fixed.
14. 18/7/00 memory leak in NonLinLS() - Q, T and I were not freed on exit.
*/