This document walks through example simulation code used to produce the results obtained in ‘Penalized model-based clustering of fMRI’ submitted to the journal Biostatistics. First, we install and load the version of the R package used for the submitted manuscript:
Simulating Data
We simulate an example data set for \(K=100\) total subjects belong to one of \(G=2\) equally sized clusters which share roughly \(20 \%\) of network connections. For each subject, we generate \(n=177\) observations for \(p=10\) variables with approximately \(\rho = 10\%\) of network connections differing from their true cluster-level network.
Tuning Parameter Selection
We now select the optimal sets of tuning parameters for 3 different analysis approaches: our random covariance clustering model (RCCM), a Ward clustering & group graphical lasso (GGL) approach, and a graphical lasso (GLasso) & K-means clustering approach. The scale of which tuning parameters work best for each respective method vary, although in practice we’ve found that the optimal lasso penalty parameter for RCCM, \(\lambda_1\), tends to be \(\approx 100\) times the magnitude of what works well for GGL and GLasso. Similarly, we found that the optimal tuning parameter for RCCM that encourages similarity within clusters, denoted by \(\lambda_2\), tends to be \(\approx 5000\) times the magnitude of what works well for GGL when \(p=10\), but this did vary by the number of variables. In other words, if \(\lambda_1 = 50\) works well for RCCM for a given data set, then \(\lambda_1 = 0.50\) tends to work well for GLasso and GGL, and if \(\lambda_2 = 5000\) works well for RCCM, then \(\lambda_2 = 1\) works fairly well for GGL.
Modified stARS Algorithm
RCCM
# Display help file
?starsRccm
# Grid of tuning parameters to search over
lambdas <- expand.grid(lambda1 = c(1, 5, 15, 25, 35, 40),
lambda2 = c(1000, 3000, 5000), lambda3 = 20)
# Find optimal tuning parameter set using modified stARS with 10 bootstrap samples
optTune <- starsRccm(datf = myData$simDat, lambs = lambdas, G = G, N = 10, method = "RCCM")
# Displaying results
optTune %>%
knitr::kable(digits = 3, row.names = F) %>% kable_styling(bootstrap_options = c("hover", "responsive"))
lambda1
|
lambda2
|
lambda3
|
sparsity
|
25
|
3000
|
20
|
0.833
|
Ward & GGL
lambda1
|
lambda2
|
lambda3
|
sparsity
|
25
|
1000
|
NA
|
0.985
|
GLasso & K-means
lambda1
|
lambda2
|
lambda3
|
sparsity
|
25
|
NA
|
NA
|
0.982
|
Cross-Validation
# Display help file
?cvTune
# Grid of tuning parameters to search over
lambdas <- expand.grid(lambda1 = c(1, 5, 15, 25, 35, 40),
lambda2 = c(1000, 3000, 5000), lambda3 = 20)
# Find optimal tuning parameter set using 5-fold CV
optTuneCV <- cvTune(x = myData$simDat, G = G, lambs = lambdas,
methods = c("RCCM", "GGL", "GLasso"), folds = 5)
# Displaying selecting tuning parameters
optTuneCV %>%
knitr::kable(digits = 3, row.names = F) %>% kable_styling(bootstrap_options = c("hover", "responsive"))
lambda1
|
lambda2
|
lambda3
|
nll
|
method
|
5
|
5000
|
20
|
14115.16
|
GGL
|
15
|
1000
|
20
|
14135.52
|
GLasso
|
15
|
3000
|
20
|
14020.84
|
RCCM
|
Analysis
We then analyze our simulated data set using the optimally selected tuning parameters for each method.
Ward & GGL
# Using Ward & GGL approach
# Function for calculating pair-wise Frob norm differences and then clustering
MatClust <- function(mats, G) {
K <- dim(mats)[3]
combos <- expand.grid(s1 = 1:K, s2 = 1:K)
distMat <- matrix(NA, nrow = K, ncol = K)
for(r in 1:K) {
for(s in 1:K) {
distMat[r, s] <- norm(mats[, , r] - mats[, , s], type = 'F')
}
}
cl0 <- cutree(hclust(d = as.dist(distMat), method = "ward.D"), k = G)
wgk <- matrix(NA, nrow = G, ncol = K)
for(i in 1:G) {
for(j in 1:K) {
wgk[i, j] <- ifelse(cl0[j] == i, 1, 0)
}
}
return(wgk)
}
K <- length(myData$simDat)
Sl <- sapply(myData$simDat, cov, simplify = "array")
gl <- sapply(1:K, simplify = "array", FUN = function(x){glasso::glasso(Sl[, , x], rho = 0.001,
penalize.diagonal = FALSE)$wi})
GGLResults <- lapply(X = c("stARS", "CV"), FUN = function(tune) {
lambda1 <- ifelse(tune == "stARS", optWardggl$lambda1[1],
optTuneCV[which(optTuneCV$method == "GGL"), ]$lambda1)
lambda2 <- ifelse(tune == "stARS", optWardggl$lambda2[1],
optTuneCV[which(optTuneCV$method == "GGL"), ]$lambda2)
# Estimating cluster memberships using Ward clustering based on dissimilarity matrix of Frob norm differences
GGLres <- list()
GGLres$weights <- MatClust(gl, G = G)
# Analyzing using GGL within each estimated cluster
GGLres$res <- unlist(lapply(FUN = function(g) {
prec <- JGL::JGL(Y = myData$simDat[which(as.logical(GGLres$weights[g, ]))], penalty = "group",
penalize.diagonal = FALSE, lambda1 = lambda1 / 100,
lambda2 = lambda2 / 50000, return.whole.theta = TRUE)$theta
return(setNames(prec, c(which(as.logical(GGLres$weights[g, ])))))}, X = 1:G), recursive = F)
GGLzHat <- apply(GGLres$weights, MARGIN = 2, FUN = which.max)
# Estimated group-level network for GGL. Edge present if >= 50% of subjects have it in group
GGLres$Omegags <- array(NA, dim = c(p, p, G))
for(g in 1:G) {
GGLres$Omegags[, , g] <- round(apply(simplify2array(lapply(GGLres$res[which(GGLzHat == g)], FUN = adj)),
MARGIN = 1:2, FUN = mean))
}
return(GGLres)})
GLasso & K-means
# Using GLasso & K-means clustering
GLassoResults <- lapply(X = c("stARS", "CV"), FUN = function(tune) {
GLassores <- list()
lambda1 <- ifelse(tune == "stARS", optGL$lambda1[1],
optTuneCV[which(optTuneCV$method == "RCCM"), ]$lambda1)
GLassores$res <- lapply(myData$simDat, FUN = function(x) {
glasso::glasso(cov(x), rho = lambda1 / 100, penalize.diagonal = FALSE)$wi})
# Creating matrix of vectorized precision matrix estimates
vMat <- do.call(rbind, lapply(X = GLassores$res, FUN = as.numeric))
# Finding estimated cluster memberships using k-means clustering
GLassores$weights <- as.integer(factor(kmeans(x = vMat, centers = G)$cluster, levels = c(1:G)))
return(list(GLassores))})
Results
We summarize the performance of each method using both tuning parameter selection approaches. For evaluating clustering performance, we calculate the the rand index (RI) and adjusted rand index (RIadj). For evaluating performance in terms of edge detection we calculate the true positive rate (TPR), false positive rate (FPR), precision, and F1 scores for the subject-level networks, subscripted with \(k\), and cluster-level networks, subscripted with \(g\).
# Function for summarizing edge detection performances
performanceSummary <- function(Omegaks, Omegags, zs, Omega0ks = myData$Omegaks, Omega0gs = myData$Omega0s, z0s = myData$zgks) {
# Calculating Rand indexes and adjusted rand indexes
RI <- randCalc(zs, z0s)
RIadj <- mclust::adjustedRandIndex(zs, z0s)
# Calculating Precision Matrix Error, True Positive Rate, and False Positive Rates
subjSum <- sum(sapply(1:K, FUN = function(k){sum((adj(Omegaks[, , k]) +
adj(Omega0ks[, , k]))[lower.tri(Omega0ks[, , k], diag = FALSE)] == 2)}))
posskEdges <- sum(sapply(1:K, FUN = function(k){sum(adj(Omega0ks[, , k])[lower.tri(Omega0ks[, , k], diag = FALSE)] == 1)}))
TPRk <- subjSum / posskEdges
subjSum0 <- sum(sapply(1:K, FUN = function(k){sum((-1*adj(Omegaks[, , k]) +
adj(Omega0ks[, , k]))[lower.tri(Omega0ks[, , k], diag = FALSE)] == -1)}))
possk0s <- sum(sapply(1:K, FUN = function(k){sum(adj(Omega0ks[, , k])[lower.tri(Omega0ks[, , k], diag = FALSE)] == 0)}))
FPRk <- subjSum0 / possk0s
PrecisionK <- subjSum / (subjSum + subjSum0)
F1k <- 2*(PrecisionK*TPRk) / (PrecisionK + TPRk)
if(is.null(Omegags) == FALSE) {
grpSum <- sum(sapply(1:G, FUN = function(g){sum((adj(Omegags[, , g]) +
adj(Omega0gs[, , g]))[lower.tri(Omega0gs[, , g], diag = FALSE)] == 2)}))
possgEdges <- sum(sapply(1:G, FUN = function(g){sum(adj(Omega0gs[, , g])[lower.tri(Omega0gs[, , g], diag = FALSE)] == 1)}))
TPRg <- grpSum / possgEdges
grpSum0 <- sum(sapply(1:G, FUN = function(g){sum((-1*adj(Omegags[, , g]) +
adj(Omega0gs[, , g]))[lower.tri(Omega0gs[, , g], diag = FALSE)] == -1)}))
possg0s <- sum(sapply(1:G, FUN = function(g){sum(adj(Omega0gs[, , g])[lower.tri(Omega0gs[, , g], diag = FALSE)] == 0)}))
PrecisionG <- grpSum / (grpSum + grpSum0)
F1g <- 2*(PrecisionG*TPRg) / (PrecisionG + TPRg)
FPRg <- grpSum0 / possg0s
} else {
TPRg <- NA
FPRg <- NA
PrecisionG <- NA
F1g <- NA
}
return(data.frame(RI = RI, RIadj = RIadj, TPRk = TPRk, FPRk = FPRk,
PrecisionK = PrecisionK, F1k = F1k,
TPRg = TPRg, FPRg = FPRg, PrecisionG = PrecisionG, F1g = F1g))
}
stARS
RCCM
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
1
|
1
|
0.953
|
0.065
|
0.745
|
0.836
|
1
|
0.12
|
0.625
|
0.769
|
Ward & GGL
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
1
|
1
|
0.067
|
0
|
1
|
0.125
|
0
|
0
|
NaN
|
NaN
|
GLasso & K-means
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
0.533
|
0.078
|
0.077
|
0
|
1
|
0.142
|
NA
|
NA
|
NA
|
NA
|
5-fold CV
RCCM
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
1
|
1
|
1
|
0.461
|
0.302
|
0.464
|
1
|
0.787
|
0.203
|
0.337
|
Ward & GGL
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
1
|
1
|
0.8
|
0.465
|
0.256
|
0.388
|
0.933
|
0.333
|
0.359
|
0.519
|
GLasso & K-means
RI
|
RIadj
|
TPRk
|
FPRk
|
PrecisionK
|
F1k
|
TPRg
|
FPRg
|
PrecisionG
|
F1g
|
0.704
|
0.409
|
0.367
|
0.05
|
0.595
|
0.454
|
NA
|
NA
|
NA
|
NA
|