Simulation
To simulated a Dirichlet sample quickly we use the method on the Wikipedia page for the Dirichlet distribution.
rdirichlet <- function(N=1,K=c(1,1)) {
# Simulations from the Dirichlet Distribution, according to the method of Wikipedia
lk <- length(K)
sim <- matrix(0,N,lk)
gams <- matrix(0,N,lk)
for (i in 1:lk) {
gams[,i] <- matrix(rgamma(N,K[i]),N,1)
}
gamtotal <- matrix(rowSums(gams),N,lk)
sim <- gams/gamtotal
return(sim)
}
Frequentist Parameter estimation
The simplest method of parameter estimation is the method of moments:
dirichmom <- function(X) {
# Method of Moments estimates of the Dirichlet Distribution
temp <- dim(X); n <- temp[1]; m <- temp[2]
# X <- cbind(X,matrix(1-apply(X,1,sum)))
lpb <- apply(log(X),2,mean)
mom <- apply(X,2,mean)*(mean(X[,1])-mean(X[,1]^2))/(mean(X[,1]^2) - ((mean(X[,1]))^2))
return(matrix(mom))
}
The second approach is the method of maximum likelihood. This method requires the inverse of the digamma function \((\psi^{-1}(y))\).
psiinv <- function(y) {
x <- matrix(0,8,1)
if (y >= -2.22) {
x[1] <- exp(y)+0.5
} else {
x[1] <- -1/(y-digamma(1))
}
for (i in 2:8) {
x[i] <- x[i-1] - ((digamma(x[i-1])-y)/trigamma(x[i-1]))
}
xf <- x[8]
return(xf)
}
dirichML <- function(X,mom=dirichmom(X),st=180) {
# Maximum Likelihood estimates of the Dirichlet Distribution
temp <- dim(X); n <- temp[1]; m <- temp[2]
# X <- cbind(X,matrix(1-apply(X,1,sum)))
lpb <- apply(log(X),2,mean)
a <- matrix(1,st,m)
a[1,] <- t(mom)
for (i in 2:st) {
ai <- sum(a[i-1,])
for (j in 1:m) {
a[i,j] <- psiinv(digamma(ai) + lpb[j])
}
}
ml <- matrix(a[st,],m)
return(ml)
}
Both of the above are adapted from the work of Minka in his Microsoft research reports (the code is mine but the underlying math is not).
Bayesian approach
In order to implement the Bayesian approach we must have a function that returns the log posterior distribution. In this case we accommodate the Jeffreys Prior and MDIP.
lpostdirichlet <- function(X,k,prior='mdi') {
if (any(k<=0)) {
return(-Inf)
} else {
k0 <- sum(k)
if (prior=='jeffreys') {
a <- nrow(X)*(lgamma(k0) - sum(lgamma(k)))
b <- sum(log(trigamma(k))) + log(1 - trigamma(k0)*sum(trigamma(k)^(-1)))
lpost <- a + (0.5*b) + sum((k-1)*colSums(log(X)))
} else {
a <- (nrow(X) + 1)*(lgamma(k0) - sum(lgamma(k)))
b <- (digamma(k)-digamma(k0)) + colSums(log(X))
lpost <- a + sum((k-1)*b)
}
return(lpost)
}
}
Posterior mode estimation
The posterior mode can be used as a parameter estimate, and is also useful for posterior simulation.
We begin by defining a negative log likelihood function and then using the optim function.
nlpost <- function(p,x,prior) { 0-lpostdirichlet(x,p,prior) }
dirichletpostmode <- function(X,prior='mdi') {
n <- nrow(X)
m1 <- ncol(X)
pinit <- dirichmom(X)
MLoutput0 <- optim(pinit,nlpost,x=X,prior=prior,method="L-BFGS-B",lower=rep(0.0001,m1),upper=rep(Inf,m1),hessian=TRUE,control=list(maxit=400))
p <- MLoutput0$par
# Calculate Sigma to use for simulation (small adjustment for stability)
Sigma <- solve(MLoutput0$hessian+diag(rep(0.01,m1)))
return(list(p=p,Sigma=Sigma,n=n,m=m1))
}
Posterior simulation
We use the Metropolis method of simulation for the posterior. Requires the MASS library that comes with R.
library(MASS)
dirichletpostsim <- function(X,nsim=5000,prior='mdi',burnin=100,thin=2) {
postprop <- dirichletpostmode(X,prior)
k1 <- postprop$p
accepted <- 0
tried <- 0
totsim <- nsim*thin + burnin
sims <- matrix(1,totsim,postprop$m)
lp1 <- lpostdirichlet(X,k1,prior)
while (accepted < totsim) {
k2 <- mvrnorm(1,k1,postprop$Sigma)
while (any(k2<=0)) { k2 <- mvrnorm(1,k1,postprop$Sigma); tried <- tried + 1 }
lp2 <- lpostdirichlet(X,k2,prior)
tried <- tried + 1
if (lp2 - lp1 > log(runif(1))) {
accepted <- accepted + 1
k1 <- k2
lp1 <- lp2
sims[accepted,] <- k2
}
}
cat('\n Acceptance Rate: ',(accepted/tried),' \n') # comment for simulation studies!
sims <- sims[(-burnin:-1),]
return(sims[(1:nsim)*thin,])
}
Example
In this example we show how to use the code above. We simulate a sample and then apply the parameter estimation techniques in order.
sampl <- rdirichlet(48,c(4,5,6,7,8))
cat('\nMethod of Moments estimates:\n',dirichmom(sampl))
##
## Method of Moments estimates:
## 6.284775 8.050515 10.04443 11.53222 15.09932
cat('\nMethod of Maximum Likelihood estimates:\n',dirichML(sampl))
##
## Method of Maximum Likelihood estimates:
## 4.053437 4.999222 6.336986 7.107181 9.321331
cat('\nPosterior mode estimates:\n',dirichletpostmode(sampl)$p)
##
## Posterior mode estimates:
## 4.11719 5.082718 6.448414 7.234685 9.495065
cat('\nPosterior estimates with intervals based on simulation:\n')
##
## Posterior estimates with intervals based on simulation:
sims <- dirichletpostsim(sampl)
##
## Acceptance Rate: 0.3177599
resultsmat <- matrix(apply(sims,2,function(x) { c(mean(x),quantile(x,c(0.025,0.5,0.975))) }),4)
rownames(resultsmat) <- c('Posterior Mean','Lower (0.025)','Median (0.5)','Upper (0.975)')
print(resultsmat)
## [,1] [,2] [,3] [,4] [,5]
## Posterior Mean 4.274958 5.278005 6.687980 7.512944 9.875796
## Lower (0.025) 3.375960 4.153065 5.285310 5.966891 7.883528
## Median (0.5) 4.266262 5.246442 6.668680 7.503963 9.854262
## Upper (0.975) 5.275904 6.503832 8.197505 9.170511 12.089308
That’s all for now.