Data
# RCBD example: 4 gens, 3 blocks, 12 observations
data <- read.csv ("../data/example_1.csv" ) |>
mutate (gen = as.factor (gen), block = as.factor (block))
head (data)
block gen yield
1 1 g1 7.4
2 2 g1 6.5
3 3 g1 5.6
4 1 g2 9.8
5 2 g2 6.8
6 3 g2 6.2
# A numeric covariate derived from blocks (for illustration only)
data$ covariate <- as.numeric (data$ block)
data$ covariate_s <- scale (data$ covariate, scale = FALSE )
Two models: raw vs centered covariate
mod_1 <- lm (formula = yield ~ 1 + covariate + gen, data = data)
mod_2 <- lm (formula = yield ~ 1 + covariate_s + gen, data = data)
tibble (
term = names (coef (mod_1)),
coef_1 = unname (coef (mod_1)), se_1 = sqrt (diag (vcov (mod_1))),
coef_2 = unname (coef (mod_2)), se_2 = sqrt (diag (vcov (mod_2)))
) |>
mutate_if (is.numeric, round, 2 ) |>
gt () |>
fmt_number (columns = c (coef_1, coef_2), decimals = 3 ) |>
tab_header (title = "Coefficient estimates and SEs" )
term
coef_1
se_1
coef_2
se_2
(Intercept)
8.600
0.63
6.500
0.40
covariate
−1.050
0.24
−1.050
0.24
geng2
1.100
0.57
1.100
0.57
geng3
0.100
0.57
0.100
0.57
geng4
1.800
0.57
1.800
0.57
Estimated marginal means (EMMs) for genotypes
Marginal means with model 1
mm_1 <- emmeans (mod_1, ~ gen)
L_1 <- mm_1@ linfct
rownames (L_1) <- paste0 ("gen_" , 1 : 4 )
print (L_1)
(Intercept) covariate geng2 geng3 geng4
gen_1 1 2 0 0 0
gen_2 1 2 1 0 0
gen_3 1 2 0 1 0
gen_4 1 2 0 0 1
Marginal means with model 2
mm_2 <- emmeans (mod_2, ~ gen)
L_2 <- mm_2@ linfct
rownames (L_2) <- paste0 ("gen_" , 1 : 4 )
print (L_2)
(Intercept) covariate_s geng2 geng3 geng4
gen_1 1 0 0 0 0
gen_2 1 0 1 0 0
gen_3 1 0 0 1 0
gen_4 1 0 0 0 1
We can clearly see the differences in the construction of the \(L\) matrix in order to get proper estimations of the marginal means. In mod_1, the covariate column equals the mean of the uncentered covariate (here, 2) for each gen row. In mod_2, the covariate column is 0 because we centered it.
C_1 <- mm_1@ V
print (round (C_1, 2 ))
(Intercept) covariate geng2 geng3 geng4
(Intercept) 0.40 -0.12 -0.16 -0.16 -0.16
covariate -0.12 0.06 0.00 0.00 0.00
geng2 -0.16 0.00 0.32 0.16 0.16
geng3 -0.16 0.00 0.16 0.32 0.16
geng4 -0.16 0.00 0.16 0.16 0.32
C_2 <- mm_2@ V
print (round (C_2, 2 ))
(Intercept) covariate_s geng2 geng3 geng4
(Intercept) 0.16 0.00 -0.16 -0.16 -0.16
covariate_s 0.00 0.06 0.00 0.00 0.00
geng2 -0.16 0.00 0.32 0.16 0.16
geng3 -0.16 0.00 0.16 0.32 0.16
geng4 -0.16 0.00 0.16 0.16 0.32
The variance–covariance matrix of the model coefficients is also influenced by the scaling of the covariate, as we observed earlier through the standard errors. When the covariate is centered, the covariance between the intercept and the covariate becomes zero, reflecting the removal of linear dependence between the intercept and the covariate.
Intercept–covariate
Correlated
Uncorrelated (0)
Covariate–genotypes
0
0
Genotype–genotype
unchanged
unchanged
EMM_1 <- L_1 %*% mm_1@ bhat
var_EMM_1 <- L_1 %*% C_1 %*% t (L_1)
se_EMM_1 <- sqrt (diag (var_EMM_1))
data.frame (
Gen = rownames (EMM_1),
EMM_1 = EMM_1,
var_EMM_1 = diag (var_EMM_1),
se_EMM_1
) |> gt ()
gen_1
6.5
0.16
0.4
gen_2
7.6
0.16
0.4
gen_3
6.6
0.16
0.4
gen_4
8.3
0.16
0.4
EMM_2 <- L_2 %*% mm_2@ bhat
var_EMM_2 <- L_2 %*% C_2 %*% t (L_2)
se_EMM_2 <- sqrt (diag (var_EMM_2))
data.frame (
Gen = rownames (EMM_2),
EMM_2 = EMM_2,
var_EMM_2 = diag (var_EMM_2),
se_EMM_2
) |> gt ()
gen_1
6.5
0.16
0.4
gen_2
7.6
0.16
0.4
gen_3
6.6
0.16
0.4
gen_4
8.3
0.16
0.4
This vignette outlines practical considerations for incorporating covariates in linear models, including the impact of centering on the intercept, implications for standard errors, and consequences for the estimation of marginal means.