#************************************************************************* # R code to fit location normal Dirichlet process mixture model, * # written by Thanasis Kottas (Applied Math & Statistics, University * # of California, Santa Cruz) * #************************************************************************* # # # #--------------------------------------------------------------------------- # Function "mcmc": the main program # # Required INPUTS: # IB = number of initial iterations to be discarded (burn-in) # B = posterior sample size # (IB + B = total number of iterations) # y = data vector # a.alpha, b.alpha = prior hyperparameters for alpha # a.mu, b.mu = prior hyperparameters for mu # a.tau2, b.tau2 = prior hyperparameters for tau2 # a.phi, b.phi = prior hyperparameters for phi # # OUTPUT: files with posterior samples for alpha ("alpha.txt"), # mu ("mu.txt"), tau2 ("tau2.txt"), phi ("phi.txt"), n^{*} ("n_star.txt") # and draws from the prior predictive distribution for y.0 # ("prior_pred_y.txt"), and the posterior predictive distribution for # theta.0 ("post_pred_theta.txt") and y.0 ("post_pred_y.txt") #--------------------------------------------------------------------------- # # mcmc <- function(IB,B,y,a.alpha,b.alpha,a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi) { # # output files # out.f1 = file("alpha.txt") out.f2 = file("mu.txt") out.f3 = file("tau2.txt") out.f4 = file("phi.txt") out.f5 = file("post_pred_theta.txt") out.f6 = file("post_pred_y.txt") out.f7 = file("prior_pred_y.txt") out.f8 = file("n_star.txt") open(out.f1,"w") open(out.f2,"w") open(out.f3,"w") open(out.f4,"w") open(out.f5,"w") open(out.f6,"w") open(out.f7,"w") open(out.f8,"w") # # space allocation for the mcmc samples # n <- length(y) theta <- matrix(rep(0,n*(IB+B+1)),nrow=IB+B+1,ncol=n) theta[1,] <- rep(0,n) theta.star <- list() n.star <- rep(0,IB+B+1) y.0.post <- rep(0,B+1) y.0.prior <- rep(0,B+1) theta.0.post <- rep(0,B+1) alpha <- rep(0,IB+B+1) mu <- rep(0,IB+B+1) tau2 <- rep(0,IB+B+1) phi <- rep(0,IB+B+1) # # starting values for the Gibbs sampler # alpha[1] <- 1.0 mu[1] <- 0.0 tau2[1] <- 1.0 phi[1] <- 1.0 # #--------------------- Gibbs sampler steps --------------------------- # for(t in 2:(IB+B+1)) { # # update the theta_i, i=1,...,n, and # obtain the associated number of clusters, n^{*}, the # cluster values theta_j^{*}, j=1,...,n^{*}, and # vector of configuration indicators w=(w_1,...,w_n) # theta[t,] <- sample.theta(theta[t-1,],n,alpha[t-1],mu[t-1], tau2[t-1],phi[t-1],y) theta.star[[t]] <- union(theta[t,],NULL) n.star[t] <- length(theta.star[[t]]) w <- list() for(i in 1:(n.star[t])) { w[[i]] <- seq(1,n)[theta[t,] == (theta.star[[t]])[i]] } # # update the cluster locations theta_j^{*}, j=1,...,n^{*} # theta.star[[t]] <- sample.theta.star(w,n.star[t],mu[t-1], tau2[t-1],phi[t-1],y) for(i in 1:(n.star[t])) { theta[t,w[[i]]] <- (theta.star[[t]])[i] } # # update hyperparameters alpha, mu, tau2, phi # alpha[t] <- sample.alpha(alpha[t-1],n,n.star[t],a.alpha,b.alpha) mu[t] <- sample.mu(theta.star[[t]],n.star[t],tau2[t-1],a.mu,b.mu) tau2[t] <- sample.tau2(theta.star[[t]],n.star[t],mu[t],a.tau2,b.tau2) phi[t] <- sample.phi(theta[t,],n,y,a.phi,b.phi) # #------------- save output after burn-in ---------------------- # if(t < IB) { if ( ( t %% 100 ) == 0 ) print(paste("burn-in iteration=",t)) }else{ if ( ( t %% 100 ) == 0 ) print(paste("monitoring iteration=",t)) # # posterior predictive draws for theta.0 and y.0 # prior predictive draw for y.0 # theta.0.post[t] <- post.pred.theta.0(theta.star[[t]],n.star[t], theta[t,],n,alpha[t],mu[t],tau2[t]) y.0.post[t] <- post.pred.y.0(theta.star[[t]],n.star[t],theta[t,], n,alpha[t],mu[t],tau2[t],phi[t]) y.0.prior[t] <- prior.pred.y.0(a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi) # # write(alpha[t],file=out.f1,ncolumns=1,append=T) write(mu[t],file=out.f2,ncolumns=1,append=T) write(tau2[t],file=out.f3,ncolumns=1,append=T) write(phi[t],file=out.f4,ncolumns=1,append=T) write(theta.0.post[t],file=out.f5,ncolumns=1,append=T) write(y.0.post[t],file=out.f6,ncolumns=1,append=T) write(y.0.prior[t],file=out.f7,ncolumns=1,append=T) write(n.star[t],file=out.f8,ncolumns=1,append=T) } } } # #======================================================================= # Functions used in the program #======================================================================= # #--------------------------------------------------------------------- # Function "sample.theta": updates for all latent mixing parameters # theta_i, i=1,...,n (calls function "sample.theta.i") # sample.theta <- function(theta,n,alpha,mu,tau2,phi,y) { theta[1] <- sample.theta.i(theta[2:n],alpha,mu,tau2,phi,y[1]) for(i in 2:(n-1)) { theta[i] <- sample.theta.i(theta[c(1:(i-1),(i+1):n)],alpha, mu,tau2,phi,y[i]) } theta[n] <- sample.theta.i(theta[1:(n-1)],alpha,mu,tau2,phi,y[n]) return(theta) } # #--------------------------------------------------------------------- # Function "sample.theta.i": update for the i-th latent mixing # parameter given theta_k, for all k other than i, y_i, and all # other hyperparameters # sample.theta.i <- function(theta.wout.i,alpha,mu,tau2,phi,y.i) { th.star.wout.i <- union(theta.wout.i,NULL) n.star.wout.i <- length(th.star.wout.i) n.j.minus <- apply(matrix(th.star.wout.i,nrow=n.star.wout.i,ncol=1), 1,function(a)sum(theta.wout.i == a)) q.0 <- alpha*dnorm(y.i,mean=mu,sd=sqrt(phi+tau2)) q.j <- dnorm(y.i,mean=th.star.wout.i,sd=sqrt(phi)) ccc <- n.j.minus*q.j ddd <- q.0 + sum(ccc) uu <- runif(1,0,1) if(uu < (q.0/ddd)) { mean.h <- ((tau2*y.i)+(phi*mu))/(phi+tau2) return(rnorm(1,mean=mean.h,sd=sqrt((phi*tau2)/(phi+tau2)) )) }else{ return(sample(th.star.wout.i,1,replace=TRUE,prob=ccc)) } } # #--------------------------------------------------------------------- # Function "sample.theta.star": updates for all cluster locations # theta_j^{*}, j=1,...,n^{*} # # INPUTS: w = vector of configuration indicators # n.star = number of clusters # y = data vector # mu,tau2,phi = hyperparameters # sample.theta.star <- function(w,n.star,mu,tau2,phi,y) { theta.star <- rep(0,n.star) for(i in 1:n.star) { y.star <- y[w[[i]]] n.j <- length(y.star) mean.j <- ((mu*phi)+(n.j*mean(y.star)*tau2))/(phi+(n.j*tau2)) var.j <- (phi*tau2)/(phi+(n.j*tau2)) theta.star[i] <- rnorm(1,mean=mean.j,sd=sqrt(var.j)) } return(theta.star) } # #--------------------------------------------------------------------- # Function "sample.mu": update for hyperparameter mu # sample.mu <- function(theta.star,n.star,tau2,a.mu,b.mu) { mean.mu <- ((tau2*a.mu)+(sum(theta.star)*b.mu))/(tau2+(n.star*b.mu)) var.mu <- (tau2*b.mu)/(tau2+(n.star*b.mu)) return(rnorm(1,mean=mean.mu,sd=sqrt(var.mu))) } # #--------------------------------------------------------------------- # Function "sample.tau2": update for hyperparameter tau2 # sample.tau2 <- function(theta.star,n.star,mu,a.tau2,b.tau2) { aa.tau2 <- a.tau2 + (0.5*n.star) bb.tau2 <- b.tau2 + (0.5*sum((theta.star-mu)**2.0)) return(bb.tau2/rgamma(1,shape=aa.tau2,scale=1.0)) } # #--------------------------------------------------------------------- # Function "sample.phi": update for hyperparameter phi # sample.phi <- function(theta,n,y,a.phi,b.phi) { aa.phi <- a.phi + (0.5*n) bb.phi <- b.phi + (0.5*sum((theta-y)**2.0)) return(bb.phi/rgamma(1,shape=aa.phi,scale=1.0)) } # #--------------------------------------------------------------------- # Function "sample.alpha": update for hyperparameter alpha # (uses the augmentation method from Escobar and West, 1995, which # is based on an auxiliary beta random variable) # sample.alpha <- function(alpha.old,n,n.star,a.alpha,b.alpha) { eta <- rbeta(1,shape1=alpha.old+1.0,shape2=n) tt1 <- a.alpha + n.star - 1.0 tt2 <- b.alpha - log(eta) prob <- tt1/((n*tt2) + a.alpha + n.star - 1.0) uu <- runif(1,0,1) if(uu < prob) { return(rgamma(1,shape=tt1+1.0,rate=tt2)) }else{ return(rgamma(1,shape=tt1,rate=tt2)) } } # # #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # Functions for sampling from prior and posterior # predictive distributions #+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ # #--------------------------------------------------------------------- # Function "prior.pred.y.0": draw from the prior predictive # distribution for y.0, which is defined as the average of a # Normal(y.0|theta,phi) distribution over the prior for phi # and theta, where theta|mu,tau2 ~ Normal(mu,tau2) with the # specified priors for mu and tau2 # # prior.pred.y.0 <- function(a.mu,b.mu,a.tau2,b.tau2,a.phi,b.phi) { phi <- b.phi/rgamma(1,shape=a.phi,scale=1.0) mu <- rnorm(1,mean=a.mu,sd=sqrt(b.mu)) tau2 <- b.tau2/rgamma(1,shape=a.tau2,scale=1.0) theta <- rnorm(1,mean=mu,sd=sqrt(tau2)) return(rnorm(1,mean=theta,sd=sqrt(phi))) } #--------------------------------------------------------------------- # Function "post.pred.theta.0": draw from the posterior predictive # distribution for theta.0 given n^{*}, the cluster values # theta_j^{*}, j=1,...,n^{*}, and hyperparameters alpha,mu,tau2, # p(theta.0|n.star,theta.star,alpha,mu,tau2) # # post.pred.theta.0 <- function(theta.star,n.star,theta,n,alpha,mu,tau2) { uu <- runif(1,0,1) if(uu < alpha/(alpha+n)) { return(rnorm(1,mean=mu,sd=sqrt(tau2))) }else{ n.minus <- apply(matrix(theta.star,nrow=n.star,ncol=1), 1, function(a) sum(theta == a)); return(sample(theta.star,1,replace=TRUE,prob=n.minus)) } } #--------------------------------------------------------------------- # Function "post.pred.y.0": draw from the posterior predictive # distribution for y.0 given n^{*}, the cluster values # theta_j^{*}, j=1,...,n^{*}, and hyperparameters alpha,mu,tau2,phi, # p(y.0|n.star,theta.star,alpha,mu,tau2,phi) # #Note that the posterior predictive distribution for y.0, p(y.0|data), #is the average of p(y.0|n.star,theta.star,alpha,mu,tau2,phi) over #the posterior of (n.star,w,theta.star,alpha,mu,tau2,phi) # # post.pred.y.0 <- function(theta.star,n.star,theta,n,alpha,mu,tau2,phi) { uu <- runif(1,0,1) if(uu < alpha/(alpha+n)) { return(rnorm(1,mean=mu,sd=sqrt(phi+tau2))) }else{ n.minus <- apply(matrix(theta.star,nrow=n.star,ncol=1), 1, function(a) sum(theta == a)); theta.0 <- sample(theta.star,1,replace=TRUE,prob=n.minus) return(rnorm(1,mean=theta.0,sd=sqrt(phi))) } }