[ADMB Users] scaling bounded independent variables in random effects models -- application to glmmadmb

Thu Oct 17 15:32:23 PDT 2013

I thought I had done some of this with Hans already, but I found it did 
not work for me using the current
release.  However it is possible and perhaps superior to accomplisth the 
same objective in user space.

first the functions needed.

In the GLOBALS_SECTION  I add

   dvariable mybound(const prevariable& x,double lb,double ub,double sc)
   {
     dvariable sx=x/(sc*(ub-lb));
     return 0.5*(lb+ub) + sin(sx)*0.5*(ub-lb);
   }

   df1b2variable mybound(const df1b2variable& x,double lb,double 
ub,double sc)
   {
     df1b2variable sx=x/(sc*(ub-lb));
     return 0.5*(lb+ub) + sin(sx)*0.5*(ub-lb);
   }

Here lb and ub are the upper and lower bounds and sc is the scaling 
factor (we divide by sc)

Previously log_alpha had been declared as

   init_bounded_number log_alpha(lb,ub,alpha_phase)

This is modified to

  init_number log_alpha(alpha_phase)

In the code log_alpha was used as

     alpha=exp(log_alpha);

This is modified to

     alpha=exp(mybound(lb,ub,sc));

That is all there is to it.  One adjusts the value of sc as required.

I made a few changes to glmmadmb like this and it has imprived the stability
of the code quite a bit at least for the example I was working on.

I have attched a copy of the modified glmadmb.tpl file.

-------------- next part --------------
// 2011-03-24 version from Dave Fournier
// modified 2011-04-27 Hans Skaug, BMB
DATA_SECTION

  init_int n					// Number of observations
  init_int p_y					// Dimension of y(i) (multivariate response)
  init_matrix y(1,n,1,p_y)			// Observation matrix
  init_int p					// Number of fixed effects
  init_matrix X(1,n,1,p)			// Design matrix for fixed effects
  vector predy(1,n)
  vector mytau(1,n)
  init_int M					// Number of RE blocks (crossed terms)
  init_ivector q(1,M)				// Number of levels of the grouping variable per RE block; Can be skipped
  init_ivector m(1,M)				// Number of random effects parameters within each block
  int sum_mq					// sum(m*q), calculated below: should be read from R
  init_int ncolZ				
  init_matrix Z(1,n,1,ncolZ)			// Design matrix for random effects
  init_imatrix I(1,n,1,ncolZ)			// Index vectors into joint RE vector "u" for each
  init_ivector cor_flag(1,M)			// Indicator for whether each RE block should be correlated
  init_ivector cor_block_start(1,M) 		// Not used: remove
  init_ivector cor_block_stop(1,M) 		// Not used: remove
  init_int numb_cor_params			// Total number of correlation parameters to be estimated
  init_int like_type_flag   			// 0 poisson 1 binomial 2 negative binomial 3 Gamma 4 beta 5 gaussian 6 truncated poisson 7 trunc NB 8 logistic 9 betabinomial
  init_int link_type_flag   			// 0 log 1 logit 2 probit 3 inverse 4 cloglog 5 identity
  init_int rlinkflag                            // robust link function?
  init_int no_rand_flag   			// 0 have random effects 1 no random effects
  init_int zi_flag				// Zero inflation (zi) flag: 1=zi, 0=no zi
  // init_int zi_model_flag                        // ZI varies among groups/covariates?
  // init_matrix G(1,n,1,ncolG)                    // Design matrix for zero-inflation (fixed effects)
  // TESTING: remove eventually?
  init_int zi_kluge				// apply zi=0.001?
  init_int nbinom1_flag				// 1=NBinom1, 0=NBinom2
  init_int intermediate_maxfn			// Not used
  init_int has_offset				// Offset in linear predictor: 0=no offset, 1=with offset
  init_vector offset(1,n)			// Offset vector

  // Makes design matrix X orthogonal to improve numeric stability
  matrix rr(1,n,1,6)
  matrix phi(1,p,1,p)
 LOC_CALCS
  int i,j;
  phi.initialize();
  for (i=1;i<=p;i++)
  {
    phi(i,i)=1.0;
  }
  dmatrix trr=trans(rr);
  trr(6).fill_seqadd(1,1);
  rr=trans(trr);

  dmatrix TX(1,p,1,n);
  TX=trans(X);
  for (i=1;i<=p;i++)
  {
    double tmp=norm(TX(i));
    TX(i)/=tmp;
    phi(i)/=tmp;
    for (j=i+1;j<=p;j++)
    {
      double a=TX(j)*TX(i);
      TX(j)-=a*TX(i);
      phi(j)-=a*phi(i);
    }
  }
  X=trans(TX);

  sum_mq = 0;
  for (i=1;i<=M;i++)
    sum_mq += m(i)*q(i);

  ofstream ofs("phi.rep");
  for (i=1; i<=p; i++)
  {
    for (j=1; j<=p; j++)
    {
      ofs << phi(i,j) << " ";
    }
    ofs << endl;
  }
  ofs << endl;

INITIALIZATION_SECTION
 tmpL 1.0
 tmpL1 0.0
 log_alpha 1
 pz .001

PARAMETER_SECTION
 LOC_CALCS

  // BMB: FIXME: do we need this?  formerly disallowed for binomial (was like_type_flag 2);
  //     would be problematic for binary data but otherwise OK.  Should test in R code
  //  if(zi_flag && (like_type_flag>=2))
  // {
  //  cerr << "Zero inflation not allowed for this response type" << endl;
  //  ad_exit(1);
  // }

  // Determines "phases", i.e. when the various parameters  becomes active in the optimization process
  int pctr = 2;		// "Current phase" in the stagewise procedure
  // FIXME: move trunc_poisson, logistic earlier in like_type hierarchy (or add a scale parameter flag vector)
  int alpha_phase = like_type_flag>1 && like_type_flag!=6 ? pctr++ : -1;        // Phase 2 if active
  int zi_phase = zi_flag ? pctr++ : -1;                      			// After alpha
  int rand_phase = no_rand_flag==0 ? pctr++ : -1;    				// SD of RE's
  int cor_phase = (rand_phase>0) && (sum(cor_flag)>0) ? pctr++ : -1 ; 		// Correlations of RE's

  // Count the number of variance/correlation parameters to be estimated
  ivector ncolS(1,M);
  double log_alpha_lowerbound = nbinom1_flag==1 ? 0.001 : -5.0 ;
  ncolS = m;                       	// Uncorrelated random effects
  for (int i=1;i<=M;i++)                // Modifies the correlated ones
    if(cor_flag(i))
      ncolS(i) = m(i)*(m(i)+1)/2;
  int nS = sum(ncolS);             	//  Total number
 END_CALCS

  init_bounded_number pz(.000001,0.999,zi_phase)
  init_vector beta(1,p,1)     	// Fixed effects
  sdreport_vector real_beta(1,p)     
  init_vector tmpL(1,ncolZ,rand_phase)		// Log standard deviations of random effects
  init_bounded_vector tmpL1(1,numb_cor_params,-10,10.5,cor_phase)	// Offdiagonal elements of cholesky-factor of correlation matrix
  init_number log_alpha(alpha_phase)	
  sdreport_number alpha
  sdreport_vector S(1,nS)
  random_effects_vector u(1,sum_mq,rand_phase)    // Pool of random effects 
  objective_function_value g                    	   // Log-likelihood
 LOC_CALCS
  for (int i=I.indexmin() ; i<= I.indexmax(); i++)
  {
    if (min(I(i)) < u.indexmin() || max(I(i)) > u.indexmax())
    {
      cerr << "Bounds error in I for i = " << i << endl
         << " I(" << i << ") = " << I(i) << endl
         <<  "u.indexmin() = " << u.indexmin() << endl
          <<  "u.indexmax() = " << u.indexmax() << endl;
      ad_exit(1);
    }
  }

PRELIMINARY_CALCS_SECTION
  cout << setprecision(4);

PROCEDURE_SECTION
  g=0.0;

  int i;

  if(!no_rand_flag)
    for (i=1;i<=sum_mq;i++)
      n01_prior(u(i));			// u's are N(0,1) distributed

  if (rlinkflag && !last_phase()) 
  {
    betapen(beta);
  }

  for(i=1;i<=n;i++)
    log_lik(i,tmpL,tmpL1,u(I(i)),beta,log_alpha,pz);

  if (sd_phase())
  {
    alpha = exp(mybound(log_alpha,-5.0,5.0,sc));
    real_beta=beta*phi;

    int i,j,i_m;
    int i1=1, i2=1, ii=1;
    for (i_m=1;i_m<=M;i_m++)
    {
      dvar_matrix L(1,m(i_m),1,m(i_m));
      L.initialize();
      dvar_matrix tmpS(1,m(i_m),1,m(i_m));
      tmpS.initialize();

      if(cor_flag(i_m))
      {
        int ii=1;
        L(1,1)=1;
        for (i=1;i<=m(i_m);i++)
        {
          L(i,i)=1;
          for (int j=1;j<i;j++)
            L(i,j)=tmpL1(i2++);
          L(i)(1,i)/=norm(L(i)(1,i));
        }
        for (i=1;i<=m(i_m);i++)
          L(i)*=exp(mybound(tmpL(i1++),-10.0,10.5,sc2));
      }
      else
      {
        for (i=1;i<=m(i_m);i++)
          L(i,i)=exp(mybound(tmpL(i1++),-10.0,10.5,sc2));
      }

      tmpS=L*trans(L);

      for (i=1;i<=m(i_m);i++)
      {
          if(cor_flag(i_m))
            for(j=1;j<i;j++)
              S(ii++) = tmpS(i,j);
          S(ii++) = tmpS(i,i);
      }
    }

  }

SEPARABLE_FUNCTION void kludgepen(const prevariable&  v)
 g +=.5*square(v);

SEPARABLE_FUNCTION void betapen(const dvar_vector&  v)
  g+=0.5*norm2(v);

SEPARABLE_FUNCTION void n01_prior(const prevariable&  u)
 g -= -0.5*log(2.0*M_PI) - 0.5*square(u);

SEPARABLE_FUNCTION void log_lik(int _i,const dvar_vector& tmpL,const dvar_vector& tmpL1,const dvar_vector& _ui, const dvar_vector& beta,const prevariable& log_alpha, const prevariable& pz)

  ADUNCONST(dvar_vector,ui)
  int i,j, i_m, Ni;
  double e1=1e-8; // formerly 1.e-20; current agrees with nbmm.tpl
  double e2=1e-8; // formerly 1.e-20; current agrees with nbmm.tpl

  dvariable alpha = e2+exp(mybound(log_alpha,-5.0,5.0,sc));

  // Construct random effects vector with proper var-covar structure from u
  dvar_vector b(1,ncolZ);

  int i1=1, i2=1;
  for (i_m=1;i_m<=M;i_m++)
  {
    dvar_matrix L(1,m(i_m),1,m(i_m));
    L.initialize();

    if(cor_flag(i_m))
    {
      L(1,1)=1;
      for (i=1;i<=m(i_m);i++)
      {
        L(i,i)=1;
        for (int j=1;j<i;j++)
          L(i,j)=tmpL1(i2++);
        L(i)(1,i)/=norm(L(i)(1,i));
      }
      for (i=1;i<=m(i_m);i++)
         L(i)*=exp(mybound(tmpL(i1++),-10.0,10.5,sc2));
    }
    else
    {
      for (i=1;i<=m(i_m);i++)
        L(i,i)=exp(mybound(tmpL(i1++),-10.0,10.5,sc2));
    }

    int upper = sum(m(1,i_m));
    int lower = upper-m(i_m)+1;

    dvar_vector tmp1(1,m(i_m));

    //tmp1 = ui(lower,upper).shift(1);

    // FIXME: re-introduce rlinkflag here?
    if (initial_params::current_phase < initial_params::max_number_phases-1)
    {
      tmp1 = random_bound(ui(lower,upper).shift(1),5);
    }
    else if 
      (initial_params::current_phase == initial_params::max_number_phases-1)
    {
      tmp1 = random_bound(ui(lower,upper).shift(1),20);
    }
    else
    {

      tmp1 = ui(lower,upper).shift(1);
    }

    tmp1 = L*tmp1;
    b(lower,upper) = tmp1.shift(lower);

  }

  // fudge factors for inverse link
  double eps=1.e-2;
  double eps1=1.e-2;
  double eps2=1.e-4; // works on cloglog test in link.R; FAILS when eps2=1.e-6
  switch (current_phase())
  {
  case 1:
    eps=.01;
    eps1=.05;
    break;
  default:
    eps=.01;
    eps1=.01;
  }

  dvariable eta = X(_i)*beta + Z(_i)*b;

  if(has_offset)
    eta += offset(_i);
  dvariable lambda;
  dvariable fpen=0.0;
  switch(link_type_flag) 
  {
     case 0:    // [robust] log
       if (rlinkflag) {
          lambda = e1+mfexp(eta);
       } else {
          lambda = exp(eta);
       }
       break;
     case 1:    // [robust] logistic
       if (rlinkflag) {
        if (value(eta)<10)
          {
            dvariable eeta=mfexp(eta);
            lambda=eeta/(1.0+eeta);
          }
          else
         {
            dvariable eeta=mfexp(-eta);
            lambda=1.0/(1.0+eeta);
         }
       } else {
          lambda = 1.0/(1.0+exp(-eta));
       }
       if (initial_params::current_phase < initial_params::max_number_phases-1)
       {
         lambda=0.999*lambda+0.0005;
       }
       else if 
         (initial_params::current_phase == initial_params::max_number_phases-1)
       {
         lambda=0.999999*lambda+0.0000005;
       }
       break;
     case 2:   // probit (cum norm)
       lambda = cumd_norm(eta);
       break;
     case 3:   // inverse link
       if (rlinkflag) {
         lambda = 1.0/(eps+posfun(eta-eps,eps1,fpen));
          g+=fpen;
        } else {
         lambda = 1.0/eta;
        }
       break;
     case 4: // cloglog
	 {
       // FIXME: document/clarify epsilon values  Add rlinkflag?
       dvariable eeta;

       if (rlinkflag) {
          eeta = -mfexp(eta);
       } else {
          eeta = -exp(eta);
       }
       const double onesixth=1.0/6.0;
       const double one24=1.0/24.0;
       const double one120=1.0/120.0;
       // safe (1-exp()); tip from http://www.johndcook.com/cpp_expm1.html
       if (fabs(value(eeta))<1e-5) {
	   lambda = -eeta*
             (1.0+eeta*(0.5+eeta*(onesixth+eeta*(one24+eeta*one120))));
       } else {
	   lambda = 1-mfexp(eeta);
       }
       // restrict to (0,1)
      /*
       if (rlinkflag) {
          lambda = posfun(lambda,eps2,fpen);
          lambda = (1.0-posfun(1.0-lambda,eps2,fpen));
       }
      */
       if (rlinkflag && !last_phase()) 
       {
         lambda=0.999999*lambda+0.0000005;
       }
       }
       break;
     case 5: // identity
        lambda = eta;
	break;
     default:
       cerr << "Illegal value for link_type_flag" << endl;
       ad_exit(1);
  }

  dvariable  tau = nbinom1_flag ? alpha : 1.0 + e1 + lambda/alpha ;
  mytau(_i)=value(tau);
  dvariable tmpl; 				// Log likelihood

  int cph=current_phase();

  // FIXME: does having lots of choices (for like_type_flag and link_flag) slow things down?
  // Is there any advantage to doing this stuff in a vectorized way?
  // Is there some other better approach to the per-point switch() ?

  switch(like_type_flag)
  {
    case 0:   // Poisson
      if (cph<2)  // ignore zero-inflation for first phase
      {
        tmpl = -square(log(1.0+y(_i,1))-log(1.0+lambda));
      }
      else if (cph<4)
      {
        tmpl = log(1.e-6+exp(log_density_poisson(y(_i,1),lambda)));
      }
      else if  (cph<5)
      {
        tmpl = log(1.e-9+exp(log_density_poisson(y(_i,1),lambda)));
      }
      else 
      {
        tmpl = log_density_poisson(y(_i,1),lambda);
      }
      break;
    case 1:   // Binomial: y(_i,1)=#successes, y(_i,2)=#failures, 
      if (p_y==1) {
         if (y(_i,1)==1) {
           tmpl = log(e1+lambda); //BMB: bvprobit.tpl uses e1=1e-10 here
         } else {
           tmpl = log(e1+(1.0-lambda));
         }
      }	 else {
         Ni = sum(y(_i));			// Number of trials
         tmpl = log_comb(Ni,y(_i,1)) + y(_i,1)*log(lambda) + (Ni-y(_i,1))*log(1.0-lambda);
      }
      break;
    case 2:   // neg binomial
      predy(_i)=value(lambda);
      if (cph<2)  // would like to use alpha_phase rather than 2 but it's in local_calcs
      {
        tmpl = -square(log(1.0+y(_i,1))-log(1.0+lambda));
      }
      else if (cph<3)  
      {
        tmpl = -(1.0+y(_i,1))*square(log(1.0+y(_i,1))-log(1.0+lambda));
      }
      else if (cph<4)  
      {
        tmpl = log(1.e-6+exp(log_negbinomial_density(y(_i,1),lambda,tau)));
      }
      else 
      {
        tmpl = log_negbinomial_density(y(_i,1),lambda,tau);
      }
      break;
    case 3: // Gamma 
        tmpl = log_gamma_density(y(_i,1),alpha,alpha/lambda);
	break;
    case 4: // Beta 
      // FIXME: "log_beta_density" seems more consistent but changing name
      //       causes problems -- already exists somewhere?
      tmpl = ln_beta_density(y(_i,1),lambda,alpha);
      break;
    case 5: // Gaussian
      tmpl = -0.5*(log(2.0*M_PI))-log(alpha)-0.5*square((y(_i,1)-lambda)/alpha);	
      break; 
    case 6:   // truncated Poisson
      // FIXME: check somewhere (here, or preferably in R code) for trunc poisson + not ZI + 0 in response
      if (value(lambda) > 1.0e-10) {
          tmpl = log_density_poisson(y(_i,1),lambda)-log(1.0-exp(-lambda));
      } else {
          tmpl = log_density_poisson(y(_i,1),lambda)-log(lambda);
      }
      break;
    case 7:  // truncated NB
    // NB(0) = p^alpha = (alpha/(alpha+lambda))^alpha = (1+lambda/alpha)^(-alpha)
    //   -> exp(-lambda) as alpha -> infty
      if (cph<2)  // ignore zero-inflation for first phase
        tmpl = -square(log(1.0+y(_i,1))-log(1.0+lambda));
      else
        tmpl = log_negbinomial_density(y(_i,1),lambda,tau)-log(1.0-pow(1.0+lambda/alpha,-alpha));
      break;
    case 8: // logistic 
	tmpl = -log(alpha) + (y(_i,1)-lambda)/alpha - 2*log(1+exp((y(_i,1)-lambda)/alpha));
      break;
    case 9: // beta-binomial
	Ni = sum(y(_i));
	tmpl = log_comb(Ni,y(_i,1)) + // log(C(Ni,y(_i,1)))
	    gammln(y(_i,2)+alpha*(1-lambda))+gammln(y(_i,1)+alpha*lambda)-gammln(Ni+alpha) + // lbeta(...)
	    -(gammln(alpha*(1-lambda))+gammln(alpha*lambda)-gammln(alpha)); // lbeta(...)
	break;
  default:
      cerr << "Illegal value for like_type_flag" << endl;
      ad_exit(1);
  }

  // Zero inflation part 
  // zi_kluge: apply ZI whether or not zi_flag is true
  if(zi_flag || zi_kluge) 
  {
    if(y(_i,1)==0)
    {
      g -= log(e2+pz+(1.0-pz)*mfexp(tmpl));
    }
    else
    {
      g -= log(e2+(1.0-pz)*mfexp(tmpl));
    }
  } 
  else 
  {
    g -= tmpl;
  }

REPORT_SECTION
  report << beta*phi << endl;
  for (int i=1;i<=n;i++)
  {
    report << i << "   " << y(i,1) << "   " << predy(i)
           << "   " << square(y(i,1)-predy(i))/(predy(i)*mytau(i))<< endl;
  }

TOP_OF_MAIN_SECTION
  arrmblsize=40000000;
  gradient_structure::set_GRADSTACK_BUFFER_SIZE(2000000);
  gradient_structure::set_CMPDIF_BUFFER_SIZE(100000000);
  gradient_structure::set_MAX_NVAR_OFFSET(2000000);

GLOBALS_SECTION

  #include <admodel.h>
  #include <df1b2fun.h>
  void add_slave_suffix(const adstring & s) {}

  const double sc=10.;
  const double sc2=100.;

  ofstream ofs11("b1");
  ofstream ofs12("b2");
  ofstream ofs13("s1");
  ofstream ofs14("s2");

  dvariable betaln(const prevariable& a,const prevariable& b )
  {
    return gammln(a)+gammln(b)-gammln(a+b);
  }

  dvariable mybound(const prevariable& x,double lb,double ub,double sc)
  {
    dvariable sx=x/(sc*(ub-lb));
    return 0.5*(lb+ub) + sin(sx)*0.5*(ub-lb);
  }

  df1b2variable mybound(const df1b2variable& x,double lb,double ub,double sc)
  {
    df1b2variable sx=x/(sc*(ub-lb));
    return 0.5*(lb+ub) + sin(sx)*0.5*(ub-lb);
  }

  dvariable ln_beta_density(double y,const prevariable & mu,
    const prevariable& phi)
  {
    dvariable omega=mu*phi;
    dvariable tau=phi-mu*phi;
    dvariable lb=betaln(omega,tau);
    dvariable d=(omega-1)*log(y)+(tau-1)*log(1.0-y)-lb;
    return d;
  }

  df1b2variable betaln(const df1b2variable& a,const df1b2variable& b )
  {
    return gammln(a)+gammln(b)-gammln(a+b);
  }

  df1b2variable ln_beta_density(double y,const df1b2variable & mu,
    const df1b2variable& phi)
  {
    df1b2variable omega=mu*phi;
    df1b2variable tau=phi-mu*phi;
    df1b2variable lb=betaln(omega,tau);
    df1b2variable d=(omega-1)*log(y)+(tau-1)*log(1.0-y)-lb;
    return d;
  }

  dvariable random_bound(const prevariable& u,double a)
  {
    if (fabs(value(u))<=a)
      return u;
    else if (value(u)>a)
    {
      dvariable y=u-a;
      return a+y/(1+y);
    }
    else if (value(u)<-a)
    {
      dvariable y=-a-u;
      return -a-y/(1+y);
    }
  }
  df1b2variable random_bound(const df1b2variable& u,double a)
  {
    if (fabs(value(u))<=a)
      return u;
    else if (value(u)>a)
    {
      df1b2variable y=u-a;
      return a+y/(1+y);
    }
    else if (value(u)<-a)
    {
      df1b2variable y=-a-u;
      return -a-y/(1+y);
    }
  }
  dvar_vector random_bound(const dvar_vector& v,double a)
  {
    int mmin=v.indexmin();
    int mmax=v.indexmax();
    dvar_vector tmp(mmin,mmax);
    for (int i=mmin;i<=mmax;i++)
    {
      tmp(i)=random_bound(v(i),a);
    }
    return tmp;
  }
  df1b2vector random_bound(const df1b2vector& v,double a)
  {
    int mmin=v.indexmin();
    int mmax=v.indexmax();
    df1b2vector tmp(mmin,mmax);
    for (int i=mmin;i<=mmax;i++)
    {
      tmp(i)=random_bound(v(i),a);
    }
    return tmp;
  }