Predict LMM

Published

February 17, 2026

Overview

This document illustrates prediction in a linear mixed model (LMM) for a simple RCBD example:

  • 4 genotypes (random)
  • 3 blocks (fixed)
  • 12 observations

We:

  1. Read the data and define design matrices
  2. Estimate variance components from ANOVA (method-of-moments)
  3. Build Mixed Model Equations (MME)
  4. Solve for fixed effects and BLUPs
  5. Compute predicted values and standard errors for genotype means
  6. Compute reliability
data <- read.csv("../data/example_1.csv") |>
  mutate(gen = as.factor(gen), block = as.factor(block))
head(data)
  block gen yield
1     1  g1   7.4
2     2  g1   6.5
3     3  g1   5.6
4     1  g2   9.8
5     2  g2   6.8
6     3  g2   6.2
str(data)
'data.frame':   12 obs. of  3 variables:
 $ block: Factor w/ 3 levels "1","2","3": 1 2 3 1 2 3 1 2 3 1 ...
 $ gen  : Factor w/ 4 levels "g1","g2","g3",..: 1 1 1 2 2 2 3 3 3 4 ...
 $ yield: num  7.4 6.5 5.6 9.8 6.8 6.2 7.3 6.1 6.4 9.5 ...
n <- 12
n_blks <- 3
n_gens <- 4

Variance components

mod <- lm(formula = yield ~ 1 + block + gen, data = data)
aov_table <- as.data.frame(anova(mod))
aov_table
          Df Sum Sq Mean Sq F value      Pr(>F)
block      2   9.78    4.89  12.225 0.007650536
gen        3   6.63    2.21   5.525 0.036730328
Residuals  6   2.40    0.40      NA          NA
mse_g <- aov_table["gen", "Mean Sq"]
var_e <- aov_table["Residuals", "Mean Sq"]
var_g <- (mse_g - var_e) / n_blks
var_g
[1] 0.6033333

Design matrices and MME

X <- model.matrix(yield ~ 1 + block, data)
Z <- model.matrix(yield ~ -1 + gen, data)
y <- matrix(data[, "yield"]) |> na.omit()
print(X)
   (Intercept) block2 block3
1            1      0      0
2            1      1      0
3            1      0      1
4            1      0      0
5            1      1      0
6            1      0      1
7            1      0      0
8            1      1      0
9            1      0      1
10           1      0      0
11           1      1      0
12           1      0      1
attr(,"assign")
[1] 0 1 1
attr(,"contrasts")
attr(,"contrasts")$block
[1] "contr.treatment"
print(y)
      [,1]
 [1,]  7.4
 [2,]  6.5
 [3,]  5.6
 [4,]  9.8
 [5,]  6.8
 [6,]  6.2
 [7,]  7.3
 [8,]  6.1
 [9,]  6.4
[10,]  9.5
[11,]  8.0
[12,]  7.4
G <- diag(x = var_g, nrow = n_gens)
R <- diag(x = var_e, nrow = n)
V <- Z %*% G %*% t(Z) + R

# Mixed Model Equations
C11 <- t(X) %*% chol2inv(chol(R)) %*% X
C12 <- t(X) %*% chol2inv(chol(R)) %*% Z
C21 <- t(Z) %*% chol2inv(chol(R)) %*% X
C22 <- t(Z) %*% chol2inv(chol(R)) %*% Z + solve(G)

# Coefficient matrix (LHS)
C <- as.matrix(
  rbind(
    cbind(C11, C12),
    cbind(C21, C22)
  )
)

# RHS
rhs <- rbind(
  t(X) %*% solve(R) %*% y,
  t(Z) %*% solve(R) %*% y
)

# Solution
C_inv <- chol2inv(chol(C))
rownames(C_inv) <- colnames(C_inv) <- rownames(C)
print(round(C_inv, 4))
            (Intercept) block2 block3   geng1   geng2   geng3   geng4
(Intercept)      0.2508   -0.1   -0.1 -0.1508 -0.1508 -0.1508 -0.1508
block2          -0.1000    0.2    0.1  0.0000  0.0000  0.0000  0.0000
block3          -0.1000    0.1    0.2  0.0000  0.0000  0.0000  0.0000
geng1           -0.1508    0.0    0.0  0.2327  0.1235  0.1235  0.1235
geng2           -0.1508    0.0    0.0  0.1235  0.2327  0.1235  0.1235
geng3           -0.1508    0.0    0.0  0.1235  0.1235  0.2327  0.1235
geng4           -0.1508    0.0    0.0  0.1235  0.1235  0.1235  0.2327
ans <- C_inv %*% rhs
ans
                  [,1]
(Intercept)  8.5000000
block2      -1.6500000
block3      -2.1000000
geng1       -0.6142534
geng2        0.2866516
geng3       -0.5323529
geng4        0.8599548

Reliability random genotypic effect

PEV <- C_inv[4:7, 4:7]
r2_u <- 1 - diag(PEV) / var_g
r2_u
    geng1     geng2     geng3     geng4 
0.6142534 0.6142534 0.6142534 0.6142534 

Linear combination of Fixed and Random Effects

# L
L <- cbind(
  matrix(1, nrow = n_gens, ncol = 1),                   # Intercept
  matrix(1 / n_blks, nrow = n_gens, ncol = n_blks - 1), # Average block
  diag(n_gens)                                          # Identity for gens
)
L
     [,1]      [,2]      [,3] [,4] [,5] [,6] [,7]
[1,]    1 0.3333333 0.3333333    1    0    0    0
[2,]    1 0.3333333 0.3333333    0    1    0    0
[3,]    1 0.3333333 0.3333333    0    0    1    0
[4,]    1 0.3333333 0.3333333    0    0    0    1
# Prediction
pv <- L %*% ans
pv
         [,1]
[1,] 6.635747
[2,] 7.536652
[3,] 6.717647
[4,] 8.109955
# Standard Error
sse2 <- L %*% C_inv %*% t(L)
sse2
            [,1]        [,2]        [,3]        [,4]
[1,] 0.115233786 0.006033183 0.006033183 0.006033183
[2,] 0.006033183 0.115233786 0.006033183 0.006033183
[3,] 0.006033183 0.006033183 0.115233786 0.006033183
[4,] 0.006033183 0.006033183 0.006033183 0.115233786
std <- sqrt(diag(sse2))
std
[1] 0.339461 0.339461 0.339461 0.339461
data.frame("predicted.values" = pv, std)
  predicted.values      std
1         6.635747 0.339461
2         7.536652 0.339461
3         6.717647 0.339461
4         8.109955 0.339461

Reliability linear combination

# w = Lb + Mu
var_uhat <- G - PEV
var_beta <- C_inv[1:3, 1:3]
cov_ubeta <- -C_inv[4:7, 1:3]

L <- cbind(
  matrix(1, nrow = n_gens, ncol = 1),                   # Intercept
  matrix(1 / n_blks, nrow = n_gens, ncol = n_blks - 1)  # Average block
)
M <- diag(n_gens)
var_what <- L %*% var_beta %*% t(L) + M %*% var_uhat %*% t(M)
var_w <- M %*% G %*% t(M)
cov_wwhat <- M %*% cov_ubeta %*% t(L) + M %*% var_uhat %*% t(M)
r2_w <- diag(cov_wwhat)^2 / (diag(var_w) * diag(var_what))
r2_w
[1] 0.8123244 0.8123244 0.8123244 0.8123244
data.frame("predicted.values" = pv, std, r2_w, r2_u) |>
  mutate_if(is.numeric, round, 3) |> 
  gt()
predicted.values std r2_w r2_u
6.636 0.339 0.812 0.614
7.537 0.339 0.812 0.614
6.718 0.339 0.812 0.614
8.110 0.339 0.812 0.614