Overview
This document illustrates prediction in a linear mixed model (LMM) for a simple RCBD example:
- 4 genotypes (random)
- 3 blocks (fixed)
- 12 observations
We:
- Read the data and define design matrices
- Estimate variance components from ANOVA (method-of-moments)
- Build Mixed Model Equations (MME)
- Solve for fixed effects and BLUPs
- Compute predicted values and standard errors for genotype means
- Compute reliability
data <- read.csv("../data/example_1.csv") |>
mutate(gen = as.factor(gen), block = as.factor(block))
head(data)
block gen yield
1 1 g1 7.4
2 2 g1 6.5
3 3 g1 5.6
4 1 g2 9.8
5 2 g2 6.8
6 3 g2 6.2
'data.frame': 12 obs. of 3 variables:
$ block: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
$ gen : Factor w/ 4 levels "g1","g2","g3",..: 1 1 1 2 2 2 3 3 3 4 ...
$ yield: num 7.4 6.5 5.6 9.8 6.8 6.2 7.3 6.1 6.4 9.5 ...
n <- 12
n_blks <- 3
n_gens <- 4
Variance components
mod <- lm(formula = yield ~ 1 + block + gen, data = data)
aov_table <- as.data.frame(anova(mod))
aov_table
Df Sum Sq Mean Sq F value Pr(>F)
block 2 9.78 4.89 12.225 0.007650536
gen 3 6.63 2.21 5.525 0.036730328
Residuals 6 2.40 0.40 NA NA
mse_g <- aov_table["gen", "Mean Sq"]
var_e <- aov_table["Residuals", "Mean Sq"]
var_g <- (mse_g - var_e) / n_blks
var_g
Design matrices and MME
X <- model.matrix(yield ~ 1 + block, data)
Z <- model.matrix(yield ~ -1 + gen, data)
y <- matrix(data[, "yield"]) |> na.omit()
print(X)
(Intercept) block2 block3
1 1 0 0
2 1 1 0
3 1 0 1
4 1 0 0
5 1 1 0
6 1 0 1
7 1 0 0
8 1 1 0
9 1 0 1
10 1 0 0
11 1 1 0
12 1 0 1
attr(,"assign")
[1] 0 1 1
attr(,"contrasts")
attr(,"contrasts")$block
[1] "contr.treatment"
[,1]
[1,] 7.4
[2,] 6.5
[3,] 5.6
[4,] 9.8
[5,] 6.8
[6,] 6.2
[7,] 7.3
[8,] 6.1
[9,] 6.4
[10,] 9.5
[11,] 8.0
[12,] 7.4
G <- diag(x = var_g, nrow = n_gens)
R <- diag(x = var_e, nrow = n)
V <- Z %*% G %*% t(Z) + R
# Mixed Model Equations
C11 <- t(X) %*% chol2inv(chol(R)) %*% X
C12 <- t(X) %*% chol2inv(chol(R)) %*% Z
C21 <- t(Z) %*% chol2inv(chol(R)) %*% X
C22 <- t(Z) %*% chol2inv(chol(R)) %*% Z + solve(G)
# Coefficient matrix (LHS)
C <- as.matrix(
rbind(
cbind(C11, C12),
cbind(C21, C22)
)
)
# RHS
rhs <- rbind(
t(X) %*% solve(R) %*% y,
t(Z) %*% solve(R) %*% y
)
# Solution
C_inv <- chol2inv(chol(C))
rownames(C_inv) <- colnames(C_inv) <- rownames(C)
print(round(C_inv, 4))
(Intercept) block2 block3 geng1 geng2 geng3 geng4
(Intercept) 0.2508 -0.1 -0.1 -0.1508 -0.1508 -0.1508 -0.1508
block2 -0.1000 0.2 0.1 0.0000 0.0000 0.0000 0.0000
block3 -0.1000 0.1 0.2 0.0000 0.0000 0.0000 0.0000
geng1 -0.1508 0.0 0.0 0.2327 0.1235 0.1235 0.1235
geng2 -0.1508 0.0 0.0 0.1235 0.2327 0.1235 0.1235
geng3 -0.1508 0.0 0.0 0.1235 0.1235 0.2327 0.1235
geng4 -0.1508 0.0 0.0 0.1235 0.1235 0.1235 0.2327
[,1]
(Intercept) 8.5000000
block2 -1.6500000
block3 -2.1000000
geng1 -0.6142534
geng2 0.2866516
geng3 -0.5323529
geng4 0.8599548
Reliability random genotypic effect
PEV <- C_inv[4:7, 4:7]
r2_u <- 1 - diag(PEV) / var_g
r2_u
geng1 geng2 geng3 geng4
0.6142534 0.6142534 0.6142534 0.6142534
Linear combination of Fixed and Random Effects
# L
L <- cbind(
matrix(1, nrow = n_gens, ncol = 1), # Intercept
matrix(1 / n_blks, nrow = n_gens, ncol = n_blks - 1), # Average block
diag(n_gens) # Identity for gens
)
L
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1 0.3333333 0.3333333 1 0 0 0
[2,] 1 0.3333333 0.3333333 0 1 0 0
[3,] 1 0.3333333 0.3333333 0 0 1 0
[4,] 1 0.3333333 0.3333333 0 0 0 1
# Prediction
pv <- L %*% ans
pv
[,1]
[1,] 6.635747
[2,] 7.536652
[3,] 6.717647
[4,] 8.109955
# Standard Error
sse2 <- L %*% C_inv %*% t(L)
sse2
[,1] [,2] [,3] [,4]
[1,] 0.115233786 0.006033183 0.006033183 0.006033183
[2,] 0.006033183 0.115233786 0.006033183 0.006033183
[3,] 0.006033183 0.006033183 0.115233786 0.006033183
[4,] 0.006033183 0.006033183 0.006033183 0.115233786
std <- sqrt(diag(sse2))
std
[1] 0.339461 0.339461 0.339461 0.339461
data.frame("predicted.values" = pv, std)
predicted.values std
1 6.635747 0.339461
2 7.536652 0.339461
3 6.717647 0.339461
4 8.109955 0.339461
Reliability linear combination
# w = Lb + Mu
var_uhat <- G - PEV
var_beta <- C_inv[1:3, 1:3]
cov_ubeta <- -C_inv[4:7, 1:3]
L <- cbind(
matrix(1, nrow = n_gens, ncol = 1), # Intercept
matrix(1 / n_blks, nrow = n_gens, ncol = n_blks - 1) # Average block
)
M <- diag(n_gens)
var_what <- L %*% var_beta %*% t(L) + M %*% var_uhat %*% t(M)
var_w <- M %*% G %*% t(M)
cov_wwhat <- M %*% cov_ubeta %*% t(L) + M %*% var_uhat %*% t(M)
r2_w <- diag(cov_wwhat)^2 / (diag(var_w) * diag(var_what))
r2_w
[1] 0.8123244 0.8123244 0.8123244 0.8123244
data.frame("predicted.values" = pv, std, r2_w, r2_u) |>
mutate_if(is.numeric, round, 3) |>
gt()
| 6.636 |
0.339 |
0.812 |
0.614 |
| 7.537 |
0.339 |
0.812 |
0.614 |
| 6.718 |
0.339 |
0.812 |
0.614 |
| 8.110 |
0.339 |
0.812 |
0.614 |