#*************************************************************************
# R code to fit location normal Dirichlet process mixture model,         *
# written by Thanasis Kottas (Applied Math & Statistics, University      *
# of California, Santa Cruz)                                             *
#*************************************************************************
#
#
#
#---------------------------------------------------------------------------
# Function "mcmc": the main program 
#
#  Required INPUTS:
#    IB = number of initial iterations to be discarded (burn-in)
#    B = posterior sample size 
#    (IB + B = total number of iterations)
#    y = data vector 
#    a.alpha, b.alpha = prior hyperparameters for alpha
#    a.mu, b.mu = prior hyperparameters for mu
#    a.tau2, b.tau2 = prior hyperparameters for tau2
#    a.phi, b.phi = prior hyperparameters for phi
#
#  OUTPUT: files with posterior samples for alpha ("alpha.txt"), 
#  mu ("mu.txt"), tau2 ("tau2.txt"), phi ("phi.txt"), n^{*} ("n_star.txt")
#  and draws from the prior predictive distribution for y.0 
#  ("prior_pred_y.txt"), and the posterior predictive distribution for 
#  theta.0 ("post_pred_theta.txt") and y.0 ("post_pred_y.txt")
#---------------------------------------------------------------------------
#
#
mcmc <- function(IB,B,y,a.alpha,b.alpha,a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi)
{
#
# output files 
#
  out.f1 = file("alpha.txt")
  out.f2 = file("mu.txt")
  out.f3 = file("tau2.txt")
  out.f4 = file("phi.txt")
  out.f5 = file("post_pred_theta.txt")
  out.f6 = file("post_pred_y.txt")
  out.f7 = file("prior_pred_y.txt")
  out.f8 = file("n_star.txt")
  open(out.f1,"w")
  open(out.f2,"w")
  open(out.f3,"w")
  open(out.f4,"w")
  open(out.f5,"w")
  open(out.f6,"w")
  open(out.f7,"w")
  open(out.f8,"w")
#
# space allocation for the mcmc samples
#
  n <- length(y) 
  theta <- matrix(rep(0,n*(IB+B+1)),nrow=IB+B+1,ncol=n)
  theta[1,] <- rep(0,n)
  theta.star <- list()
  n.star <- rep(0,IB+B+1)
  y.0.post <- rep(0,B+1)
  y.0.prior <- rep(0,B+1)
  theta.0.post <- rep(0,B+1)
  alpha <- rep(0,IB+B+1)
  mu <- rep(0,IB+B+1)
  tau2 <- rep(0,IB+B+1)
  phi <- rep(0,IB+B+1)
#
# starting values for the Gibbs sampler
#
  alpha[1] <- 1.0
  mu[1] <- 0.0
  tau2[1] <- 1.0
  phi[1] <- 1.0
#
#--------------------- Gibbs sampler steps ---------------------------
#
  for(t in 2:(IB+B+1))
  {
    #
    # update the theta_i, i=1,...,n, and 
    # obtain the associated number of clusters, n^{*}, the 
    # cluster values theta_j^{*}, j=1,...,n^{*}, and 
    # vector of configuration indicators w=(w_1,...,w_n) 
    #
      theta[t,] <- sample.theta(theta[t-1,],n,alpha[t-1],mu[t-1],
                                  tau2[t-1],phi[t-1],y)
      theta.star[[t]] <- union(theta[t,],NULL)
      n.star[t] <- length(theta.star[[t]])      
      w <- list()
      for(i in 1:(n.star[t]))
      {
         w[[i]] <- seq(1,n)[theta[t,] == (theta.star[[t]])[i]]
      }
    #
    # update the cluster locations theta_j^{*}, j=1,...,n^{*}
    # 
      theta.star[[t]] <- sample.theta.star(w,n.star[t],mu[t-1],
                                           tau2[t-1],phi[t-1],y)
      for(i in 1:(n.star[t]))
      {
         theta[t,w[[i]]] <- (theta.star[[t]])[i]
      }
    #
    # update hyperparameters alpha, mu, tau2, phi
    #
      alpha[t] <- sample.alpha(alpha[t-1],n,n.star[t],a.alpha,b.alpha)
      mu[t] <- sample.mu(theta.star[[t]],n.star[t],tau2[t-1],a.mu,b.mu)
      tau2[t] <- sample.tau2(theta.star[[t]],n.star[t],mu[t],a.tau2,b.tau2)
      phi[t] <- sample.phi(theta[t,],n,y,a.phi,b.phi)
    #  
    #------------- save output after burn-in ----------------------
    #
      if(t < IB)
      {
        if ( ( t %% 100 ) == 0 ) print(paste("burn-in iteration=",t))
      }else{
        if ( ( t %% 100 ) == 0 ) print(paste("monitoring iteration=",t))
        #
        # posterior predictive draws for theta.0 and y.0
        # prior predictive draw for y.0
        #
        theta.0.post[t] <- post.pred.theta.0(theta.star[[t]],n.star[t],
                                  theta[t,],n,alpha[t],mu[t],tau2[t])
        y.0.post[t] <- post.pred.y.0(theta.star[[t]],n.star[t],theta[t,],
                                      n,alpha[t],mu[t],tau2[t],phi[t])
        y.0.prior[t] <- prior.pred.y.0(a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi)
        #
        #
        write(alpha[t],file=out.f1,ncolumns=1,append=T)
        write(mu[t],file=out.f2,ncolumns=1,append=T)
        write(tau2[t],file=out.f3,ncolumns=1,append=T)
        write(phi[t],file=out.f4,ncolumns=1,append=T)
        write(theta.0.post[t],file=out.f5,ncolumns=1,append=T) 
        write(y.0.post[t],file=out.f6,ncolumns=1,append=T) 
        write(y.0.prior[t],file=out.f7,ncolumns=1,append=T)       
        write(n.star[t],file=out.f8,ncolumns=1,append=T)    
      }
  }
}
#
#=======================================================================
#                  Functions used in the program 
#=======================================================================
#
#---------------------------------------------------------------------
# Function "sample.theta": updates for all latent mixing parameters 
# theta_i, i=1,...,n (calls function "sample.theta.i")
#   
sample.theta <- function(theta,n,alpha,mu,tau2,phi,y)
{
  theta[1] <- sample.theta.i(theta[2:n],alpha,mu,tau2,phi,y[1])
  for(i in 2:(n-1))
  {
     theta[i] <- sample.theta.i(theta[c(1:(i-1),(i+1):n)],alpha,
                                                mu,tau2,phi,y[i])
  }
  theta[n] <- sample.theta.i(theta[1:(n-1)],alpha,mu,tau2,phi,y[n])
  return(theta)
}
#
#---------------------------------------------------------------------
# Function "sample.theta.i": update for the i-th latent mixing 
# parameter given theta_k, for all k other than i, y_i, and all 
# other hyperparameters
#  
sample.theta.i <- function(theta.wout.i,alpha,mu,tau2,phi,y.i)
{
  th.star.wout.i <- union(theta.wout.i,NULL)
  n.star.wout.i <- length(th.star.wout.i)
  n.j.minus <- apply(matrix(th.star.wout.i,nrow=n.star.wout.i,ncol=1),
                      1,function(a)sum(theta.wout.i == a))
  q.0 <- alpha*dnorm(y.i,mean=mu,sd=sqrt(phi+tau2))
  q.j <- dnorm(y.i,mean=th.star.wout.i,sd=sqrt(phi))
  ccc <- n.j.minus*q.j
  ddd <- q.0 + sum(ccc)
  uu <- runif(1,0,1)
  if(uu < (q.0/ddd))
  {
    mean.h <- ((tau2*y.i)+(phi*mu))/(phi+tau2)
    return(rnorm(1,mean=mean.h,sd=sqrt((phi*tau2)/(phi+tau2)) ))
  }else{
    return(sample(th.star.wout.i,1,replace=TRUE,prob=ccc))
  }
}
#
#---------------------------------------------------------------------
# Function "sample.theta.star": updates for all cluster locations 
# theta_j^{*}, j=1,...,n^{*} 
#
# INPUTS: w = vector of configuration indicators
#         n.star = number of clusters
#         y = data vector 
#         mu,tau2,phi = hyperparameters
#
sample.theta.star <- function(w,n.star,mu,tau2,phi,y)
{
  theta.star <- rep(0,n.star)
  for(i in 1:n.star)
  {
     y.star <- y[w[[i]]]
     n.j <- length(y.star)
     mean.j <- ((mu*phi)+(n.j*mean(y.star)*tau2))/(phi+(n.j*tau2))
     var.j <- (phi*tau2)/(phi+(n.j*tau2))
     theta.star[i] <- rnorm(1,mean=mean.j,sd=sqrt(var.j))
  }
  return(theta.star)
}
#
#---------------------------------------------------------------------
# Function "sample.mu": update for hyperparameter mu
#
sample.mu <- function(theta.star,n.star,tau2,a.mu,b.mu)
{
  mean.mu <- ((tau2*a.mu)+(sum(theta.star)*b.mu))/(tau2+(n.star*b.mu))
  var.mu <- (tau2*b.mu)/(tau2+(n.star*b.mu))
  return(rnorm(1,mean=mean.mu,sd=sqrt(var.mu)))
}
#
#---------------------------------------------------------------------
# Function "sample.tau2": update for hyperparameter tau2
#
sample.tau2 <- function(theta.star,n.star,mu,a.tau2,b.tau2)
{
  aa.tau2 <- a.tau2 + (0.5*n.star)
  bb.tau2 <- b.tau2 + (0.5*sum((theta.star-mu)**2.0))
  return(bb.tau2/rgamma(1,shape=aa.tau2,scale=1.0))
}
#
#---------------------------------------------------------------------
# Function "sample.phi": update for hyperparameter phi
#
sample.phi <- function(theta,n,y,a.phi,b.phi)
{
  aa.phi <- a.phi + (0.5*n)
  bb.phi <- b.phi + (0.5*sum((theta-y)**2.0))
  return(bb.phi/rgamma(1,shape=aa.phi,scale=1.0))
}
#
#---------------------------------------------------------------------
# Function "sample.alpha": update for hyperparameter alpha
# (uses the augmentation method from Escobar and West, 1995, which 
#   is based on an auxiliary beta random variable)
#
sample.alpha <- function(alpha.old,n,n.star,a.alpha,b.alpha)
{
  eta <- rbeta(1,shape1=alpha.old+1.0,shape2=n)
  tt1 <- a.alpha + n.star - 1.0
  tt2 <- b.alpha - log(eta)
  prob <- tt1/((n*tt2) + a.alpha + n.star - 1.0)
  uu <- runif(1,0,1)
  if(uu < prob)
  {
    return(rgamma(1,shape=tt1+1.0,rate=tt2))
  }else{
    return(rgamma(1,shape=tt1,rate=tt2))
  }
} 
#
#
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#       Functions for sampling from prior and posterior 
#                  predictive distributions 
#+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
#
#---------------------------------------------------------------------
# Function "prior.pred.y.0": draw from the prior predictive 
#  distribution for y.0, which is defined as the average of a
#  Normal(y.0|theta,phi) distribution over the prior for phi
#  and theta, where theta|mu,tau2 ~ Normal(mu,tau2) with the 
#  specified priors for mu and tau2
#
#
prior.pred.y.0 <- function(a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi)
{
  phi <- b.phi/rgamma(1,shape=a.phi,scale=1.0)
  mu <- rnorm(1,mean=a.mu,sd=sqrt(b.mu))
  tau2 <- b.tau2/rgamma(1,shape=a.tau2,scale=1.0)
  theta <- rnorm(1,mean=mu,sd=sqrt(tau2))
  return(rnorm(1,mean=theta,sd=sqrt(phi)))
}
#---------------------------------------------------------------------
# Function "post.pred.theta.0": draw from the posterior predictive 
#  distribution for theta.0 given n^{*}, the cluster values 
#  theta_j^{*}, j=1,...,n^{*}, and hyperparameters alpha,mu,tau2,
#  p(theta.0|n.star,theta.star,alpha,mu,tau2)
#
# 
post.pred.theta.0 <- function(theta.star,n.star,theta,n,alpha,mu,tau2)
{
  uu <- runif(1,0,1)
  if(uu < alpha/(alpha+n))
  {
    return(rnorm(1,mean=mu,sd=sqrt(tau2)))
  }else{
    n.minus <- apply(matrix(theta.star,nrow=n.star,ncol=1),
                     1, function(a) sum(theta == a));
    return(sample(theta.star,1,replace=TRUE,prob=n.minus))    
  }
}
#---------------------------------------------------------------------
# Function "post.pred.y.0": draw from the posterior predictive 
#  distribution for y.0 given n^{*}, the cluster values 
#  theta_j^{*}, j=1,...,n^{*}, and hyperparameters alpha,mu,tau2,phi,
#  p(y.0|n.star,theta.star,alpha,mu,tau2,phi)
#
#Note that the posterior predictive distribution for y.0, p(y.0|data), 
#is the average of p(y.0|n.star,theta.star,alpha,mu,tau2,phi) over
#the posterior of (n.star,w,theta.star,alpha,mu,tau2,phi)
# 
#
post.pred.y.0 <- function(theta.star,n.star,theta,n,alpha,mu,tau2,phi)
{
  uu <- runif(1,0,1)
  if(uu < alpha/(alpha+n))
  {
    return(rnorm(1,mean=mu,sd=sqrt(phi+tau2)))   
  }else{
    n.minus <- apply(matrix(theta.star,nrow=n.star,ncol=1),
                     1, function(a) sum(theta == a));
    theta.0 <- sample(theta.star,1,replace=TRUE,prob=n.minus)
    return(rnorm(1,mean=theta.0,sd=sqrt(phi)))
  }
}