In this document, I will demonstrate how to implement the analyses discussed in the lecture using R.
First, we need to load some extra packages.
library(MLmetrics) # For MSEs
library(dplyr) # For data processing
library(magrittr) # For fancy pipes
library(rockchalk) # For interaction probing
Now, we’ll read in and prepare some data.
set.seed(235711)
<- "../../data/"
dataDir
<- readRDS(paste0(dataDir, "mtcars.rds"))
cars <- readRDS(paste0(dataDir, "insurance0.rds"))
ins0 <- readRDS(paste0(dataDir, "insurance1.rds"))
ins1 <- readRDS(paste0(dataDir, "ginzberg.rds"))
ginz <- readRDS(paste0(dataDir, "bfi_scored.rds"))
bfi
## Load the iris data
data(iris)
## Sample 100 rows to unbalance group sizes:
%<>% slice_sample(n = 100, replace = TRUE) iris
Simple Linear Regression
First we’ll fit a simple linear regression (SLR) model wherein we use
the cars
data to regress qsec
(quarter mile
lap time in seconds) onto hp
(horsepower rating of the
engine).
## Fit a simple linear regression model:
<- lm(qsec ~ hp, data = cars)
out1
## Summarize the results:
summary(out1)
##
## Call:
## lm(formula = qsec ~ hp, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1766 -0.6975 0.0348 0.6520 4.0972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.556354 0.542424 37.897 < 2e-16 ***
## hp -0.018458 0.003359 -5.495 5.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.282 on 30 degrees of freedom
## Multiple R-squared: 0.5016, Adjusted R-squared: 0.485
## F-statistic: 30.19 on 1 and 30 DF, p-value: 5.766e-06
Our fitted lm
object is a list containing many
elements.
## See what's inside the fitted lm object:
ls(out1)
## [1] "assign" "call" "coefficients" "df.residual"
## [5] "effects" "fitted.values" "model" "qr"
## [9] "rank" "residuals" "terms" "xlevels"
The output of summary()
is just another object that we
can save to a variable.
<- summary(out1)
s1 ls(s1)
## [1] "adj.r.squared" "aliased" "call" "coefficients"
## [5] "cov.unscaled" "df" "fstatistic" "r.squared"
## [9] "residuals" "sigma" "terms"
s1
##
## Call:
## lm(formula = qsec ~ hp, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1766 -0.6975 0.0348 0.6520 4.0972
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 20.556354 0.542424 37.897 < 2e-16 ***
## hp -0.018458 0.003359 -5.495 5.77e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.282 on 30 degrees of freedom
## Multiple R-squared: 0.5016, Adjusted R-squared: 0.485
## F-statistic: 30.19 on 1 and 30 DF, p-value: 5.766e-06
We can access particular pieces of the model output.
## Access the R^2 slot in the summary object:
$r.squared s1
## [1] 0.5015804
## Extract coefficients:
coef(out1)
## (Intercept) hp
## 20.55635402 -0.01845831
## Extract residuals:
<- resid(out1)
res3 res3
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## -2.06593942 -1.50593942 -0.22973076 0.91406058
## Hornet Sportabout Valiant Duster 360 Merc 240D
## -0.30614897 1.60176901 -0.19406695 0.58806148
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 4.09718587 0.01401867 0.61401867 0.16614260
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 0.36614260 0.76614260 1.20760047 1.23218361
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 1.10905833 0.13189474 -1.07652166 0.54343643
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## 1.24410249 -0.91760683 -0.48760683 -0.62406695
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## -0.27614897 -0.43810526 -2.17664739 -1.57056447
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -1.18335897 -1.82614897 0.22718136 0.05560227
## Extract fitted values (two ways):
.1 <- fitted(out1)
yHat1.2 <- predict(out1)
yHat1
.1 yHat1
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## 18.52594 18.52594 18.83973 18.52594
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 17.32615 18.61823 16.03407 19.41194
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 18.80281 18.28598 18.28598 17.23386
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 17.23386 17.23386 16.77240 16.58782
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 16.31094 19.33811 19.59652 19.35656
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## 18.76590 17.78761 17.78761 16.03407
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 17.32615 19.33811 18.87665 18.47056
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## 15.68336 17.32615 14.37282 18.54440
.2 yHat1
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## 18.52594 18.52594 18.83973 18.52594
## Hornet Sportabout Valiant Duster 360 Merc 240D
## 17.32615 18.61823 16.03407 19.41194
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 18.80281 18.28598 18.28598 17.23386
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## 17.23386 17.23386 16.77240 16.58782
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## 16.31094 19.33811 19.59652 19.35656
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## 18.76590 17.78761 17.78761 16.03407
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## 17.32615 19.33811 18.87665 18.47056
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## 15.68336 17.32615 14.37282 18.54440
## Compare:
.1 - yHat1.2 yHat1
## Mazda RX4 Mazda RX4 Wag Datsun 710 Hornet 4 Drive
## 3.552714e-15 0.000000e+00 -3.552714e-15 0.000000e+00
## Hornet Sportabout Valiant Duster 360 Merc 240D
## -3.552714e-15 0.000000e+00 0.000000e+00 0.000000e+00
## Merc 230 Merc 280 Merc 280C Merc 450SE
## 0.000000e+00 -3.552714e-15 -3.552714e-15 -3.552714e-15
## Merc 450SL Merc 450SLC Cadillac Fleetwood Lincoln Continental
## -3.552714e-15 -3.552714e-15 -3.552714e-15 -3.552714e-15
## Chrysler Imperial Fiat 128 Honda Civic Toyota Corolla
## -3.552714e-15 0.000000e+00 0.000000e+00 -3.552714e-15
## Toyota Corona Dodge Challenger AMC Javelin Camaro Z28
## 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00
## Pontiac Firebird Fiat X1-9 Porsche 914-2 Lotus Europa
## -3.552714e-15 0.000000e+00 0.000000e+00 -3.552714e-15
## Ford Pantera L Ferrari Dino Maserati Bora Volvo 142E
## -1.776357e-15 -3.552714e-15 -1.776357e-15 0.000000e+00
Multiple Linear Regression
Now, we fit a multiple linear regression (MLR) model by
adding wt
(the car’s weight) and carb
(the
number of carburetors) into the model as additional predictors.
## Fit a multiple linear regression model:
<- lm(qsec ~ hp + wt + carb, data = cars)
out2
## Summarize the model:
<- summary(out2)
s2 s2
##
## Call:
## lm(formula = qsec ~ hp + wt + carb, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6403 -0.5012 -0.0832 0.2884 3.7455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.960702 0.672880 28.178 < 2e-16 ***
## hp -0.022747 0.005174 -4.396 0.000144 ***
## wt 0.896234 0.265372 3.377 0.002166 **
## carb -0.234171 0.182798 -1.281 0.210689
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.078 on 28 degrees of freedom
## Multiple R-squared: 0.6713, Adjusted R-squared: 0.6361
## F-statistic: 19.06 on 3 and 28 DF, p-value: 6.203e-07
## Extract R^2:
$r.squared s2
## [1] 0.6712944
## Extract F-stat:
$fstatistic s2
## value numdf dendf
## 19.06087 3.00000 28.00000
## Extract coefficients:
<- coef(out2)) (cf
## (Intercept) hp wt carb
## 18.96070158 -0.02274737 0.89623413 -0.23417108
The confint()
function computes confidence intervals
(CIs) for the coefficients.
## Compute confidence intervals for coefficients:
confint(out2)
## 2.5 % 97.5 %
## (Intercept) 17.58236974 20.33903343
## hp -0.03334596 -0.01214878
## wt 0.35264355 1.43982471
## carb -0.60861505 0.14027290
confint(out2, parm = "hp")
## 2.5 % 97.5 %
## hp -0.03334596 -0.01214878
confint(out2, parm = c("(Intercept)", "wt"), level = 0.99)
## 0.5 % 99.5 %
## (Intercept) 17.1013580 20.820045
## wt 0.1629407 1.629528
We can also compute our own statistics based on the fitted model object.
## Manually compute standard errors and t-statistics:
<- out2 %>% vcov() %>% diag() %>% sqrt()
se <- cf / se
t
s2
##
## Call:
## lm(formula = qsec ~ hp + wt + carb, data = cars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.6403 -0.5012 -0.0832 0.2884 3.7455
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.960702 0.672880 28.178 < 2e-16 ***
## hp -0.022747 0.005174 -4.396 0.000144 ***
## wt 0.896234 0.265372 3.377 0.002166 **
## carb -0.234171 0.182798 -1.281 0.210689
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.078 on 28 degrees of freedom
## Multiple R-squared: 0.6713, Adjusted R-squared: 0.6361
## F-statistic: 19.06 on 3 and 28 DF, p-value: 6.203e-07
t
## (Intercept) hp wt carb
## 28.178437 -4.396422 3.377270 -1.281040
Model Comparison
Now, we’ll compare the SLR and MLR models.
## Change in R^2:
$r.squared - s1$r.squared s2
## [1] 0.169714
## Significant increase in R^2?
anova(out1, out2)
## Compare MSE values:
MSE(y_pred = predict(out1), y_true = cars$qsec)
## [1] 1.541801
MSE(y_pred = predict(out2), y_true = cars$qsec)
## [1] 1.016811
## Information criteria:
AIC(out1, out2)
BIC(out1, out2)
If we want to update and rerun an existing lm()
model,
we can use the update()
function to do so without a bunch
of copy-pasting.
## The long way:
<- lm(qsec ~ hp, data = mtcars)
out1 <- lm(qsec ~ hp + wt, data = mtcars)
out2 <- lm(qsec ~ hp + wt + gear + carb, data = mtcars)
out3
## The short way:
.1 <- update(out1, ". ~ . + wt")
out2.1 <- update(out2.1, ". ~ . + gear + carb")
out3.2 <- update(out1, ". ~ . + wt + gear + carb")
out3
## We can also remove variables:
.1 <- update(out2, ". ~ . - wt")
out1
summary(out2)
##
## Call:
## lm(formula = qsec ~ hp + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8283 -0.4055 -0.1464 0.3519 3.7030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.825585 0.671867 28.020 < 2e-16 ***
## hp -0.027310 0.003795 -7.197 6.36e-08 ***
## wt 0.941532 0.265897 3.541 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.09 on 29 degrees of freedom
## Multiple R-squared: 0.652, Adjusted R-squared: 0.628
## F-statistic: 27.17 on 2 and 29 DF, p-value: 2.251e-07
summary(out2.1)
##
## Call:
## lm(formula = qsec ~ hp + wt, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.8283 -0.4055 -0.1464 0.3519 3.7030
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.825585 0.671867 28.020 < 2e-16 ***
## hp -0.027310 0.003795 -7.197 6.36e-08 ***
## wt 0.941532 0.265897 3.541 0.00137 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.09 on 29 degrees of freedom
## Multiple R-squared: 0.652, Adjusted R-squared: 0.628
## F-statistic: 27.17 on 2 and 29 DF, p-value: 2.251e-07
all.equal(out1, out1.1)
## [1] TRUE
all.equal(out2, out2.1)
## [1] TRUE
all.equal(out3, out3.1)
## [1] TRUE
all.equal(out3, out3.2)
## [1] TRUE
## We can rerun the same model on new data:
<- mtcars[1 : 15, ]
mtcars2
<- update(out3, data = mtcars2)
out4
summary(out3)
##
## Call:
## lm(formula = qsec ~ hp + wt + gear + carb, data = mtcars)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.7810 -0.5349 -0.1265 0.3214 3.6687
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.143046 2.423001 7.488 4.7e-08 ***
## hp -0.022499 0.005304 -4.242 0.000233 ***
## wt 0.996423 0.392208 2.541 0.017129 *
## gear 0.166134 0.472310 0.352 0.727758
## carb -0.288823 0.242148 -1.193 0.243341
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.095 on 27 degrees of freedom
## Multiple R-squared: 0.6728, Adjusted R-squared: 0.6243
## F-statistic: 13.88 on 4 and 27 DF, p-value: 2.842e-06
summary(out4)
##
## Call:
## lm(formula = qsec ~ hp + wt + gear + carb, data = mtcars2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.39320 -0.65268 -0.07605 0.50496 2.47398
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.714599 7.168210 1.076 0.3071
## hp -0.008156 0.014654 -0.557 0.5901
## wt 1.970702 0.681556 2.891 0.0161 *
## gear 2.417899 1.502550 1.609 0.1387
## carb -1.196560 0.483448 -2.475 0.0328 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.109 on 10 degrees of freedom
## Multiple R-squared: 0.7179, Adjusted R-squared: 0.6051
## F-statistic: 6.363 on 4 and 10 DF, p-value: 0.008195
Categorical Predictors
Factor Variables
R represents nominal variables as factors.
## Look at the 'Species' factor:
$Species iris
## [1] versicolor setosa setosa setosa virginica virginica
## [7] setosa virginica versicolor versicolor versicolor virginica
## [13] setosa versicolor setosa versicolor versicolor virginica
## [19] setosa setosa versicolor versicolor versicolor virginica
## [25] versicolor versicolor virginica setosa versicolor setosa
## [31] versicolor virginica setosa virginica virginica virginica
## [37] versicolor versicolor setosa versicolor virginica setosa
## [43] versicolor versicolor virginica setosa virginica setosa
## [49] setosa virginica versicolor versicolor versicolor setosa
## [55] virginica setosa versicolor virginica setosa setosa
## [61] setosa versicolor virginica setosa setosa setosa
## [67] virginica setosa setosa virginica virginica virginica
## [73] setosa virginica virginica versicolor setosa setosa
## [79] versicolor setosa setosa versicolor setosa virginica
## [85] versicolor setosa setosa virginica virginica versicolor
## [91] virginica setosa setosa versicolor versicolor setosa
## [97] virginica setosa versicolor setosa
## Levels: setosa versicolor virginica
is.factor(iris$Species)
## [1] TRUE
str(iris$Species)
## Factor w/ 3 levels "setosa","versicolor",..: 2 1 1 1 3 3 1 3 2 2 ...
summary(iris$Species)
## setosa versicolor virginica
## 39 32 29
Factors have special attributes to allow them to encode grouping information.
attributes(iris$Species)
## $levels
## [1] "setosa" "versicolor" "virginica"
##
## $class
## [1] "factor"
attributes(iris$Petal.Length)
## NULL
attributes(iris)
## $names
## [1] "Sepal.Length" "Sepal.Width" "Petal.Length" "Petal.Width" "Species"
##
## $class
## [1] "data.frame"
##
## $row.names
## [1] 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## [19] 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## [37] 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54
## [55] 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
## [73] 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## [91] 91 92 93 94 95 96 97 98 99 100
## Factors have labeled levels:
levels(iris$Species)
## [1] "setosa" "versicolor" "virginica"
nlevels(iris$Species)
## [1] 3
Although the underlying data are represented numerically, factors are not numeric variables. So, we cannot use a factor as a numeric vector.
mean(iris$Species)
## [1] NA
var(iris$Species)
## Error in var(iris$Species): Calling var(x) on a factor x is defunct.
## Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.
$Species - iris$Species iris
## [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
## [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
Dummy Codes
If we use a factor variable as a predictor in the lm()
function, the function will automatically generate a set of dummy
codes.
## Use a factor variable as a predictor:
<- lm(Petal.Length ~ Species, data = iris)
out1 summary(out1)
##
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22759 -0.16206 0.02051 0.22051 0.97241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.37949 0.06712 20.55 <2e-16 ***
## Speciesversicolor 2.77676 0.09997 27.78 <2e-16 ***
## Speciesvirginica 4.34810 0.10277 42.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9503
## F-statistic: 947.7 on 2 and 97 DF, p-value: < 2.2e-16
The lm()
function only knows how to create these codes
because of the factor’s contrasts.
## Check the contrasts:
contrasts(iris$Species)
## versicolor virginica
## setosa 0 0
## versicolor 1 0
## virginica 0 1
If we want to influence the factors eventual coding, we need to modify its contrasts.
## Change the reference group:
$Species2 <- relevel(iris$Species, ref = "virginica")
iris
levels(iris$Species)
## [1] "setosa" "versicolor" "virginica"
levels(iris$Species2)
## [1] "virginica" "setosa" "versicolor"
## How are the contrasts affected?
contrasts(iris$Species)
## versicolor virginica
## setosa 0 0
## versicolor 1 0
## virginica 0 1
contrasts(iris$Species2)
## setosa versicolor
## virginica 0 0
## setosa 1 0
## versicolor 0 1
## Which carries through to the models:
<- lm(Petal.Length ~ Species2, data = iris)
out2 summary(out1)
##
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22759 -0.16206 0.02051 0.22051 0.97241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.37949 0.06712 20.55 <2e-16 ***
## Speciesversicolor 2.77676 0.09997 27.78 <2e-16 ***
## Speciesvirginica 4.34810 0.10277 42.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9503
## F-statistic: 947.7 on 2 and 97 DF, p-value: < 2.2e-16
summary(out2)
##
## Call:
## lm(formula = Petal.Length ~ Species2, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22759 -0.16206 0.02051 0.22051 0.97241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.72759 0.07783 73.59 <2e-16 ***
## Species2setosa -4.34810 0.10277 -42.31 <2e-16 ***
## Species2versicolor -1.57134 0.10746 -14.62 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9503
## F-statistic: 947.7 on 2 and 97 DF, p-value: < 2.2e-16
Significance Testing
To test the overall significance of a grouping factor, we generally need to compare a full model that contains the dummy codes representing that factor to a restricted model that excludes the dummy codes.
## Test the partial effect of Species:
<- lm(Petal.Length ~ Petal.Width, data = iris)
out0 <- update(out0, ". ~ . + Species")
out1
<- summary(out0)) (s0
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.11608 -0.27890 -0.00569 0.20170 1.05551
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.92371 0.06945 13.30 <2e-16 ***
## Petal.Width 2.38799 0.05210 45.83 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.399 on 98 degrees of freedom
## Multiple R-squared: 0.9554, Adjusted R-squared: 0.955
## F-statistic: 2101 on 1 and 98 DF, p-value: < 2.2e-16
<- summary(out1)) (s1
##
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.84382 -0.16146 -0.00899 0.15701 0.96795
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.08417 0.06356 17.056 < 2e-16 ***
## Petal.Width 1.29409 0.16001 8.087 1.86e-12 ***
## Speciesversicolor 1.36145 0.19139 7.113 2.03e-10 ***
## Speciesvirginica 2.05970 0.29396 7.007 3.36e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3249 on 96 degrees of freedom
## Multiple R-squared: 0.971, Adjusted R-squared: 0.9701
## F-statistic: 1073 on 3 and 96 DF, p-value: < 2.2e-16
## Compute R^2 difference and test its significance:
$r.squared - s0$r.squared s1
## [1] 0.01561812
anova(out0, out1)
For models that contain the grouping factor as the only predictor, we can use an intercept-only model as the restricted model.
## Test the effect of Species:
<- lm(Petal.Length ~ 1, data = iris)
out0 <- update(out0, ". ~ . + Species")
out1
<- summary(out0)) (s0
##
## Call:
## lm(formula = Petal.Length ~ 1, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.529 -2.129 0.471 1.396 3.171
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.529 0.188 18.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.88 on 99 degrees of freedom
<- summary(out1)) (s1
##
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.22759 -0.16206 0.02051 0.22051 0.97241
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.37949 0.06712 20.55 <2e-16 ***
## Speciesversicolor 2.77676 0.09997 27.78 <2e-16 ***
## Speciesvirginica 4.34810 0.10277 42.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9503
## F-statistic: 947.7 on 2 and 97 DF, p-value: < 2.2e-16
anova(out0, out1)
We don’t need to go through all the trouble of fitting an intercept-only model, though.
$r.squared - s0$r.squared s1
## [1] 0.9513143
## An intercept only model has R^2 = 0:
$r.squared s0
## [1] 0
## So, the model comparison above is equivalent to the ordinary F-test from the
## full model:
$fstatistic s1
## value numdf dendf
## 947.6864 2.0000 97.0000
Prediction
Now, we’ll use the insurance data (ins0
,
ins1
) to explore the process of generating and evaluating
predictions.
## Estimate three models:
<- lm(charges ~ age + sex, data = ins0)
out1 <- update(out1, ". ~ . + region + children")
out2 <- update(out2, ". ~ . + bmi + smoker")
out3
## Check that everything worked:
summary(out1)
##
## Call:
## lm(formula = charges ~ age + sex, data = ins0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -8860 -6996 -5710 5732 47215
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2357.02 1160.32 2.031 0.0425 *
## age 262.93 26.42 9.953 <2e-16 ***
## sexmale 1401.06 742.93 1.886 0.0596 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11740 on 997 degrees of freedom
## Multiple R-squared: 0.0935, Adjusted R-squared: 0.09169
## F-statistic: 51.42 on 2 and 997 DF, p-value: < 2.2e-16
summary(out2)
##
## Call:
## lm(formula = charges ~ age + sex + region + children, data = ins0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9608 -7084 -5495 5159 46840
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1777.62 1370.37 1.297 0.1949
## age 261.45 26.37 9.916 <2e-16 ***
## sexmale 1328.47 741.98 1.790 0.0737 .
## regionnorthwest -531.29 1069.59 -0.497 0.6195
## regionsoutheast 1033.78 1038.16 0.996 0.3196
## regionsouthwest -970.75 1074.93 -0.903 0.3667
## children 698.36 315.17 2.216 0.0269 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11710 on 993 degrees of freedom
## Multiple R-squared: 0.1017, Adjusted R-squared: 0.0963
## F-statistic: 18.74 on 6 and 993 DF, p-value: < 2.2e-16
summary(out3)
##
## Call:
## lm(formula = charges ~ age + sex + region + children + bmi +
## smoker, data = ins0)
##
## Residuals:
## Min 1Q Median 3Q Max
## -11643.9 -2961.6 -952.1 1613.0 29932.2
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -12121.96 1139.72 -10.636 < 2e-16 ***
## age 244.95 13.81 17.733 < 2e-16 ***
## sexmale 174.88 386.92 0.452 0.65139
## regionnorthwest -181.38 556.73 -0.326 0.74465
## regionsoutheast -1266.36 555.31 -2.280 0.02279 *
## regionsouthwest -816.41 561.38 -1.454 0.14619
## children 445.95 164.08 2.718 0.00668 **
## bmi 355.46 32.82 10.832 < 2e-16 ***
## smokeryes 24059.56 475.74 50.573 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6094 on 991 degrees of freedom
## Multiple R-squared: 0.7573, Adjusted R-squared: 0.7553
## F-statistic: 386.4 on 8 and 991 DF, p-value: < 2.2e-16
We use the predict()
function to generate predictions
from a fitted model. As we saw above, if we provide the fitted model
object as the only argument to predict()
, we simply get the
model-implied outcomes (i.e., \(\hat{Y}\)).
## Generate model-implied outcomes (i.e., y-hats):
<- predict(out1)
p1 <- predict(out2)
p2 <- predict(out3) p3
We can use these \(\hat{Y}\) values
to compute mean squared errors (MSEs) with the
MLmetrics::MSE()
function.
## Generate MSEs:
MSE(y_pred = p1, y_true = ins$charges)
## [1] 156682904
MSE(y_pred = p2, y_true = ins$charges)
## [1] 157852019
MSE(y_pred = p3, y_true = ins$charges)
## [1] 265069105
We can generate out-of-sample predictions for new data by supplying a
new data frame to the newdata
argument in
predict()
.
## Generate out-of-sample (i.e., true) predictions:
.2 <- predict(out1, newdata = ins1)
p1.2 <- predict(out2, newdata = ins1)
p2.2 <- predict(out3, newdata = ins1)
p3
## Generate MSEs:
MSE(y_pred = p1.2, y_true = ins1$charges)
## [1] 117084071
MSE(y_pred = p2.2, y_true = ins1$charges)
## [1] 115398905
MSE(y_pred = p3.2, y_true = ins1$charges)
## [1] 36285568
We can compute confidence or prediction intervals for predictions by
specifying an appropriate value for the interval
argument
in predict()
.
## Get confidence and prediction intervals:
.3 <- predict(out3, newdata = ins1, interval = "confidence")
p3.4 <- predict(out3, newdata = ins1, interval = "prediction")
p3
head(p3.3)
## fit lwr upr
## 181 12243.075 11156.2610 13329.889
## 901 8058.695 6957.3947 9159.996
## 32 1641.097 537.1289 2745.066
## 694 2158.776 1089.5937 3227.959
## 79 7416.066 6122.0602 8710.071
## 230 9507.529 8510.2586 10504.800
head(p3.4)
## fit lwr upr
## 181 12243.075 234.7479 24251.40
## 901 8058.695 -3950.9514 20068.34
## 32 1641.097 -10368.7943 13650.99
## 694 2158.776 -9847.9677 14165.52
## 79 7416.066 -4612.7831 19444.91
## 230 9507.529 -2493.0248 21508.08
Moderation
Finally, we’ll consider methods of testing for moderation and probing significant interactions.
Contiuous Moderators
We’ll start by using the Ginzberg depression data (ginz
)
to estimate a moderated regression model with a continuous moderator
variable.
## Focal effect:
<- lm(depression ~ fatalism, data = ginz)
out0 summary(out0)
##
## Call:
## lm(formula = depression ~ fatalism, data = ginz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.69096 -0.25544 -0.03114 0.20540 1.17070
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.34263 0.09408 3.642 0.000479 ***
## fatalism 0.65737 0.08425 7.802 1.97e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3791 on 80 degrees of freedom
## Multiple R-squared: 0.4321, Adjusted R-squared: 0.425
## F-statistic: 60.88 on 1 and 80 DF, p-value: 1.97e-11
## Additive model:
<- lm(depression ~ fatalism + simplicity, data = ginz)
out1 summary(out1)
##
## Call:
## lm(formula = depression ~ fatalism + simplicity, data = ginz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.67818 -0.23270 -0.05276 0.16817 1.10094
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20269 0.09472 2.140 0.035455 *
## fatalism 0.41777 0.10064 4.151 8.28e-05 ***
## simplicity 0.37953 0.10064 3.771 0.000312 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3512 on 79 degrees of freedom
## Multiple R-squared: 0.5188, Adjusted R-squared: 0.5066
## F-statistic: 42.58 on 2 and 79 DF, p-value: 2.837e-13
## Moderated model:
<- lm(depression ~ fatalism * simplicity, data = ginz)
out2 summary(out2)
##
## Call:
## lm(formula = depression ~ fatalism * simplicity, data = ginz)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.55988 -0.20390 -0.03806 0.15617 1.01101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.2962 0.1918 -1.544 0.12665
## fatalism 0.8259 0.1685 4.902 5.05e-06 ***
## simplicity 0.9372 0.2121 4.418 3.17e-05 ***
## fatalism:simplicity -0.4039 0.1370 -2.949 0.00421 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3353 on 78 degrees of freedom
## Multiple R-squared: 0.567, Adjusted R-squared: 0.5504
## F-statistic: 34.05 on 3 and 78 DF, p-value: 3.587e-14
Probing the Interaction
Based on the results above, we can conclude that simplistic thinking significantly moderates the effect of fatalism on depression (\(\beta = -0.404\), \(t[78] = -2.95\), \(p = 0.004\)). So, we probably want to probe that interaction to get a better sense of what’s happening.
We can use the rockchalk::plotSlopes()
function to
generate simple slopes plots.
## First we use 'plotSlopes' to estimate the simple slopes:
<- plotSlopes(out2,
plotOut1 plotx = "fatalism",
modx = "simplicity",
modxVals = "std.dev",
plotPoints = TRUE)
## We can also get simple slopes at the quartiles of simplicity's distribution:
<- plotSlopes(out2,
plotOut2 plotx = "fatalism",
modx = "simplicity",
modxVals = "quantile",
plotPoints = TRUE)
## Or we can manually pick some values:
range(ginz$simplicity)
## [1] 0.25068 2.85408
<- plotSlopes(out2,
plotOut3 plotx = "fatalism",
modx = "simplicity",
modxVals = seq(0.5, 2.5, 0.5),
plotPoints = TRUE)
We can also test the significance of the simple slopes plotted above. Doing so is call simple slopes analysis and is generally a good thing to do after finding a significant interaction.
We can use the rockchalk::testSlopes()
function to
easily implement the tests.
## Test the simple slopes via the 'testSlopes' function:
<- testSlopes(plotOut1) testOut1
## Values of simplicity OUTSIDE this interval:
## lo hi
## 1.476340 4.346491
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant
ls(testOut1)
## [1] "hypotests" "jn" "pso"
$hypotests testOut1
<- testSlopes(plotOut2) testOut2
## Values of simplicity OUTSIDE this interval:
## lo hi
## 1.476340 4.346491
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant
$hypotests testOut2
<- testSlopes(plotOut3) testOut3
## Values of simplicity OUTSIDE this interval:
## lo hi
## 1.476340 4.346491
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant
$hypotests testOut3
Binary Categorical Moderators
Of course, we’re not limited to continuous moderators only. We can
easily estimate models with binary moderator variables. Below, we’ll use
the bfi
data to see if gender (actually,
biological sex) moderates the effect of agreeableness on
neuroticism.
## Focal effect:
<- lm(neuro ~ agree, data = bfi)
out0 summary(out0)
##
## Call:
## lm(formula = neuro ~ agree, data = bfi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9598 -0.9155 -0.0712 0.8297 3.1683
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.30315 0.11693 36.802 <2e-16 ***
## agree -0.24525 0.02468 -9.938 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.167 on 2798 degrees of freedom
## Multiple R-squared: 0.03409, Adjusted R-squared: 0.03375
## F-statistic: 98.76 on 1 and 2798 DF, p-value: < 2.2e-16
## Additive model:
<- lm(neuro ~ agree + gender, data = bfi)
out1 summary(out1)
##
## Call:
## lm(formula = neuro ~ agree + gender, data = bfi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.9032 -0.8859 -0.0635 0.8227 3.2286
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.23224 0.11552 36.637 <2e-16 ***
## agree -0.29217 0.02487 -11.750 <2e-16 ***
## genderfemale 0.43061 0.04731 9.102 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.15 on 2797 degrees of freedom
## Multiple R-squared: 0.06188, Adjusted R-squared: 0.06121
## F-statistic: 92.24 on 2 and 2797 DF, p-value: < 2.2e-16
## Moderated model:
<- lm(neuro ~ agree * gender, data = bfi)
out2 summary(out2)
##
## Call:
## lm(formula = neuro ~ agree * gender, data = bfi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0034 -0.8689 -0.0623 0.8080 3.1803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3.88591 0.18371 21.152 < 2e-16 ***
## agree -0.21325 0.04097 -5.206 2.07e-07 ***
## genderfemale 0.99651 0.23829 4.182 2.98e-05 ***
## agree:genderfemale -0.12484 0.05152 -2.423 0.0155 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.149 on 2796 degrees of freedom
## Multiple R-squared: 0.06384, Adjusted R-squared: 0.06284
## F-statistic: 63.56 on 3 and 2796 DF, p-value: < 2.2e-16
In the case of categorical moderators, the estimate for the conditional focal effect automatically represents the simple slope in the reference group. So, we don’t need to do anything fancy to probe the interaction.
## Test 'female' simple slope by changing reference group:
%<>% mutate(gender = relevel(gender, ref = "female"))
bfi
.1 <- lm(neuro ~ agree * gender, data = bfi)
out2summary(out2.1)
##
## Call:
## lm(formula = neuro ~ agree * gender, data = bfi)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0034 -0.8689 -0.0623 0.8080 3.1803
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.88242 0.15176 32.171 < 2e-16 ***
## agree -0.33809 0.03125 -10.820 < 2e-16 ***
## gendermale -0.99651 0.23829 -4.182 2.98e-05 ***
## agree:gendermale 0.12484 0.05152 2.423 0.0155 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.149 on 2796 degrees of freedom
## Multiple R-squared: 0.06384, Adjusted R-squared: 0.06284
## F-statistic: 63.56 on 3 and 2796 DF, p-value: < 2.2e-16
If we want to, we can also use rockchalk.
plotSlopes(out2, plotx = "agree", modx = "gender") %>% testSlopes()
## These are the straight-line "simple slopes" of the variable agree
## for the selected moderator values.
## "gender" slope Std. Error t value Pr(>|t|)
## male agree -0.2132496 0.04096616 -5.205507 2.074922e-07
## female agree:genderfemale -0.3380877 0.03124533 -10.820426 9.265872e-27
Nominal Categorical Moderators (G > 2)
Finally, the situation doesn’t really get any more complicated for multinomial moderators.
## Moderated model:
<- lm(Petal.Width ~ Sepal.Width * Species, data = iris)
out1 summary(out1)
##
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40713 -0.06840 -0.01969 0.09589 0.37311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.04539 0.25635 0.177 0.85983
## Sepal.Width 0.05447 0.07597 0.717 0.47517
## Speciesversicolor -0.25807 0.36640 -0.704 0.48295
## Speciesvirginica 0.63112 0.38406 1.643 0.10366
## Sepal.Width:Speciesversicolor 0.48994 0.11954 4.099 8.81e-05 ***
## Sepal.Width:Speciesvirginica 0.38504 0.12138 3.172 0.00204 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared: 0.9562, Adjusted R-squared: 0.9539
## F-statistic: 410.3 on 5 and 94 DF, p-value: < 2.2e-16
## Test for significant moderation:
<- lm(Petal.Width ~ Sepal.Width + Species, data = iris)
out0 summary(out0)
##
## Call:
## lm(formula = Petal.Width ~ Sepal.Width + Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.43465 -0.09643 0.01322 0.10208 0.41322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.79303 0.18433 -4.302 4.07e-05 ***
## Sepal.Width 0.30426 0.05424 5.609 1.96e-07 ***
## Speciesversicolor 1.25726 0.05187 24.237 < 2e-16 ***
## Speciesvirginica 1.87574 0.04808 39.014 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1799 on 96 degrees of freedom
## Multiple R-squared: 0.947, Adjusted R-squared: 0.9454
## F-statistic: 572.3 on 3 and 96 DF, p-value: < 2.2e-16
anova(out0, out1)
## Test different simple slopes by changing reference group:
%<>% mutate(Species = relevel(Species, ref = "virginica"))
iris .1 <- lm(Petal.Width ~ Sepal.Width * Species, data = iris)
out1
%<>% mutate(Species = relevel(Species, ref = "versicolor"))
iris .2 <- lm(Petal.Width ~ Sepal.Width * Species, data = iris)
out1
summary(out1)
##
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40713 -0.06840 -0.01969 0.09589 0.37311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.04539 0.25635 0.177 0.85983
## Sepal.Width 0.05447 0.07597 0.717 0.47517
## Speciesversicolor -0.25807 0.36640 -0.704 0.48295
## Speciesvirginica 0.63112 0.38406 1.643 0.10366
## Sepal.Width:Speciesversicolor 0.48994 0.11954 4.099 8.81e-05 ***
## Sepal.Width:Speciesvirginica 0.38504 0.12138 3.172 0.00204 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared: 0.9562, Adjusted R-squared: 0.9539
## F-statistic: 410.3 on 5 and 94 DF, p-value: < 2.2e-16
summary(out1.1)
##
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40713 -0.06840 -0.01969 0.09589 0.37311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.67652 0.28599 2.366 0.02006 *
## Sepal.Width 0.43951 0.09467 4.643 1.11e-05 ***
## Speciessetosa -0.63112 0.38406 -1.643 0.10366
## Speciesversicolor -0.88920 0.38771 -2.293 0.02405 *
## Sepal.Width:Speciessetosa -0.38504 0.12138 -3.172 0.00204 **
## Sepal.Width:Speciesversicolor 0.10490 0.13221 0.793 0.42952
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared: 0.9562, Adjusted R-squared: 0.9539
## F-statistic: 410.3 on 5 and 94 DF, p-value: < 2.2e-16
summary(out1.2)
##
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.40713 -0.06840 -0.01969 0.09589 0.37311
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.21268 0.26178 -0.812 0.419
## Sepal.Width 0.54441 0.09229 5.899 5.74e-08 ***
## Speciesvirginica 0.88920 0.38771 2.293 0.024 *
## Speciessetosa 0.25807 0.36640 0.704 0.483
## Sepal.Width:Speciesvirginica -0.10490 0.13221 -0.793 0.430
## Sepal.Width:Speciessetosa -0.48994 0.11954 -4.099 8.81e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared: 0.9562, Adjusted R-squared: 0.9539
## F-statistic: 410.3 on 5 and 94 DF, p-value: < 2.2e-16
## Do the same test using 'rockchalk':
<- plotSlopes(model = out1,
plotOut1 plotx = "Sepal.Width",
modx = "Species",
plotPoints = FALSE)
<- testSlopes(plotOut1) testOut1
## These are the straight-line "simple slopes" of the variable Sepal.Width
## for the selected moderator values.
## "Species" slope Std. Error t value
## setosa Sepal.Width 0.0544667 0.07596798 0.7169693
## versicolor Sepal.Width:Speciesversicolor 0.5444098 0.09229194 5.8987799
## virginica Sepal.Width:Speciesvirginica 0.4395070 0.09466868 4.6425809
## Pr(>|t|)
## setosa 4.751708e-01
## versicolor 5.735785e-08
## virginica 1.114651e-05
$hypotests testOut1
End of Demonstration