In this document, I will demonstrate how to implement the analyses discussed in the lecture using R.

First, we need to load some extra packages.

library(MLmetrics) # For MSEs
library(dplyr)     # For data processing
library(magrittr)  # For fancy pipes
library(rockchalk) # For interaction probing

Now, we’ll read in and prepare some data.

set.seed(235711)

dataDir <- "../../data/"

cars <- readRDS(paste0(dataDir, "mtcars.rds"))
ins0 <- readRDS(paste0(dataDir, "insurance0.rds")) 
ins1 <- readRDS(paste0(dataDir, "insurance1.rds")) 
ginz <- readRDS(paste0(dataDir, "ginzberg.rds"))
bfi  <- readRDS(paste0(dataDir, "bfi_scored.rds"))

## Load the iris data
data(iris)

## Sample 100 rows to unbalance group sizes:
iris %<>% slice_sample(n = 100, replace = TRUE)

Simple Linear Regression

First we’ll fit a simple linear regression (SLR) model wherein we use the cars data to regress qsec (quarter mile lap time in seconds) onto hp (horsepower rating of the engine).

## Fit a simple linear regression model:
out1 <- lm(qsec ~ hp, data = cars)

## Summarize the results:
summary(out1)

## 
## Call:
## lm(formula = qsec ~ hp, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1766 -0.6975  0.0348  0.6520  4.0972 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 20.556354   0.542424  37.897  < 2e-16 ***
## hp          -0.018458   0.003359  -5.495 5.77e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.282 on 30 degrees of freedom
## Multiple R-squared:  0.5016, Adjusted R-squared:  0.485 
## F-statistic: 30.19 on 1 and 30 DF,  p-value: 5.766e-06

Our fitted lm object is a list containing many elements.

## See what's inside the fitted lm object:
ls(out1)

##  [1] "assign"        "call"          "coefficients"  "df.residual"  
##  [5] "effects"       "fitted.values" "model"         "qr"           
##  [9] "rank"          "residuals"     "terms"         "xlevels"

The output of summary() is just another object that we can save to a variable.

s1 <- summary(out1)
ls(s1)

##  [1] "adj.r.squared" "aliased"       "call"          "coefficients" 
##  [5] "cov.unscaled"  "df"            "fstatistic"    "r.squared"    
##  [9] "residuals"     "sigma"         "terms"

s1

## 
## Call:
## lm(formula = qsec ~ hp, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1766 -0.6975  0.0348  0.6520  4.0972 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 20.556354   0.542424  37.897  < 2e-16 ***
## hp          -0.018458   0.003359  -5.495 5.77e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.282 on 30 degrees of freedom
## Multiple R-squared:  0.5016, Adjusted R-squared:  0.485 
## F-statistic: 30.19 on 1 and 30 DF,  p-value: 5.766e-06

We can access particular pieces of the model output.

## Access the R^2 slot in the summary object:
s1$r.squared

## [1] 0.5015804

## Extract coefficients:
coef(out1)

## (Intercept)          hp 
## 20.55635402 -0.01845831

## Extract residuals:
res3 <- resid(out1)
res3

##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##         -2.06593942         -1.50593942         -0.22973076          0.91406058 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##         -0.30614897          1.60176901         -0.19406695          0.58806148 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##          4.09718587          0.01401867          0.61401867          0.16614260 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##          0.36614260          0.76614260          1.20760047          1.23218361 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##          1.10905833          0.13189474         -1.07652166          0.54343643 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##          1.24410249         -0.91760683         -0.48760683         -0.62406695 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##         -0.27614897         -0.43810526         -2.17664739         -1.57056447 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##         -1.18335897         -1.82614897          0.22718136          0.05560227

## Extract fitted values (two ways):
yHat1.1 <- fitted(out1)
yHat1.2 <- predict(out1)

yHat1.1

##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##            18.52594            18.52594            18.83973            18.52594 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##            17.32615            18.61823            16.03407            19.41194 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##            18.80281            18.28598            18.28598            17.23386 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##            17.23386            17.23386            16.77240            16.58782 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##            16.31094            19.33811            19.59652            19.35656 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##            18.76590            17.78761            17.78761            16.03407 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##            17.32615            19.33811            18.87665            18.47056 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##            15.68336            17.32615            14.37282            18.54440

yHat1.2

##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##            18.52594            18.52594            18.83973            18.52594 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##            17.32615            18.61823            16.03407            19.41194 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##            18.80281            18.28598            18.28598            17.23386 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##            17.23386            17.23386            16.77240            16.58782 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##            16.31094            19.33811            19.59652            19.35656 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##            18.76590            17.78761            17.78761            16.03407 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##            17.32615            19.33811            18.87665            18.47056 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##            15.68336            17.32615            14.37282            18.54440

## Compare:
yHat1.1 - yHat1.2

##           Mazda RX4       Mazda RX4 Wag          Datsun 710      Hornet 4 Drive 
##        3.552714e-15        0.000000e+00       -3.552714e-15        0.000000e+00 
##   Hornet Sportabout             Valiant          Duster 360           Merc 240D 
##       -3.552714e-15        0.000000e+00        0.000000e+00        0.000000e+00 
##            Merc 230            Merc 280           Merc 280C          Merc 450SE 
##        0.000000e+00       -3.552714e-15       -3.552714e-15       -3.552714e-15 
##          Merc 450SL         Merc 450SLC  Cadillac Fleetwood Lincoln Continental 
##       -3.552714e-15       -3.552714e-15       -3.552714e-15       -3.552714e-15 
##   Chrysler Imperial            Fiat 128         Honda Civic      Toyota Corolla 
##       -3.552714e-15        0.000000e+00        0.000000e+00       -3.552714e-15 
##       Toyota Corona    Dodge Challenger         AMC Javelin          Camaro Z28 
##        0.000000e+00        0.000000e+00        0.000000e+00        0.000000e+00 
##    Pontiac Firebird           Fiat X1-9       Porsche 914-2        Lotus Europa 
##       -3.552714e-15        0.000000e+00        0.000000e+00       -3.552714e-15 
##      Ford Pantera L        Ferrari Dino       Maserati Bora          Volvo 142E 
##       -1.776357e-15       -3.552714e-15       -1.776357e-15        0.000000e+00

Multiple Linear Regression

Now, we fit a multiple linear regression (MLR) model by adding wt (the car’s weight) and carb (the number of carburetors) into the model as additional predictors.

## Fit a multiple linear regression model:
out2 <- lm(qsec ~ hp + wt + carb, data = cars)

## Summarize the model:
s2 <- summary(out2)
s2

## 
## Call:
## lm(formula = qsec ~ hp + wt + carb, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6403 -0.5012 -0.0832  0.2884  3.7455 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.960702   0.672880  28.178  < 2e-16 ***
## hp          -0.022747   0.005174  -4.396 0.000144 ***
## wt           0.896234   0.265372   3.377 0.002166 ** 
## carb        -0.234171   0.182798  -1.281 0.210689    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.078 on 28 degrees of freedom
## Multiple R-squared:  0.6713, Adjusted R-squared:  0.6361 
## F-statistic: 19.06 on 3 and 28 DF,  p-value: 6.203e-07

## Extract R^2:
s2$r.squared

## [1] 0.6712944

## Extract F-stat:
s2$fstatistic

##    value    numdf    dendf 
## 19.06087  3.00000 28.00000

## Extract coefficients:
(cf <- coef(out2))

## (Intercept)          hp          wt        carb 
## 18.96070158 -0.02274737  0.89623413 -0.23417108

The confint() function computes confidence intervals (CIs) for the coefficients.

## Compute confidence intervals for coefficients:
confint(out2)

##                   2.5 %      97.5 %
## (Intercept) 17.58236974 20.33903343
## hp          -0.03334596 -0.01214878
## wt           0.35264355  1.43982471
## carb        -0.60861505  0.14027290

confint(out2, parm = "hp")

##          2.5 %      97.5 %
## hp -0.03334596 -0.01214878

confint(out2, parm = c("(Intercept)", "wt"), level = 0.99)

##                  0.5 %    99.5 %
## (Intercept) 17.1013580 20.820045
## wt           0.1629407  1.629528

We can also compute our own statistics based on the fitted model object.

## Manually compute standard errors and t-statistics:
se <- out2 %>% vcov() %>% diag() %>% sqrt()
t  <- cf / se

s2

## 
## Call:
## lm(formula = qsec ~ hp + wt + carb, data = cars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6403 -0.5012 -0.0832  0.2884  3.7455 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.960702   0.672880  28.178  < 2e-16 ***
## hp          -0.022747   0.005174  -4.396 0.000144 ***
## wt           0.896234   0.265372   3.377 0.002166 ** 
## carb        -0.234171   0.182798  -1.281 0.210689    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.078 on 28 degrees of freedom
## Multiple R-squared:  0.6713, Adjusted R-squared:  0.6361 
## F-statistic: 19.06 on 3 and 28 DF,  p-value: 6.203e-07

## (Intercept)          hp          wt        carb 
##   28.178437   -4.396422    3.377270   -1.281040

Model Comparison

Now, we’ll compare the SLR and MLR models.

## Change in R^2:
s2$r.squared - s1$r.squared

## [1] 0.169714

## Significant increase in R^2?
anova(out1, out2)

## Compare MSE values:
MSE(y_pred = predict(out1), y_true = cars$qsec)

## [1] 1.541801

MSE(y_pred = predict(out2), y_true = cars$qsec)

## [1] 1.016811

## Information criteria:
AIC(out1, out2)

BIC(out1, out2)

If we want to update and rerun an existing lm() model, we can use the update() function to do so without a bunch of copy-pasting.

## The long way:
out1 <- lm(qsec ~ hp, data = mtcars)
out2 <- lm(qsec ~ hp + wt, data = mtcars)
out3 <- lm(qsec ~ hp + wt + gear + carb, data = mtcars)

## The short way:
out2.1 <- update(out1, ". ~ . + wt")
out3.1 <- update(out2.1, ". ~ . + gear + carb")
out3.2 <- update(out1, ". ~ . + wt + gear + carb")

## We can also remove variables:
out1.1 <- update(out2, ". ~ . - wt")

summary(out2)

## 
## Call:
## lm(formula = qsec ~ hp + wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8283 -0.4055 -0.1464  0.3519  3.7030 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.825585   0.671867  28.020  < 2e-16 ***
## hp          -0.027310   0.003795  -7.197 6.36e-08 ***
## wt           0.941532   0.265897   3.541  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.09 on 29 degrees of freedom
## Multiple R-squared:  0.652,  Adjusted R-squared:  0.628 
## F-statistic: 27.17 on 2 and 29 DF,  p-value: 2.251e-07

summary(out2.1)

## 
## Call:
## lm(formula = qsec ~ hp + wt, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.8283 -0.4055 -0.1464  0.3519  3.7030 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.825585   0.671867  28.020  < 2e-16 ***
## hp          -0.027310   0.003795  -7.197 6.36e-08 ***
## wt           0.941532   0.265897   3.541  0.00137 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.09 on 29 degrees of freedom
## Multiple R-squared:  0.652,  Adjusted R-squared:  0.628 
## F-statistic: 27.17 on 2 and 29 DF,  p-value: 2.251e-07

all.equal(out1, out1.1)

## [1] TRUE

all.equal(out2, out2.1)

## [1] TRUE

all.equal(out3, out3.1)

## [1] TRUE

all.equal(out3, out3.2)

## [1] TRUE

## We can rerun the same model on new data:
mtcars2 <- mtcars[1 : 15, ]

out4 <- update(out3, data = mtcars2)

summary(out3)

## 
## Call:
## lm(formula = qsec ~ hp + wt + gear + carb, data = mtcars)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7810 -0.5349 -0.1265  0.3214  3.6687 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 18.143046   2.423001   7.488  4.7e-08 ***
## hp          -0.022499   0.005304  -4.242 0.000233 ***
## wt           0.996423   0.392208   2.541 0.017129 *  
## gear         0.166134   0.472310   0.352 0.727758    
## carb        -0.288823   0.242148  -1.193 0.243341    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.095 on 27 degrees of freedom
## Multiple R-squared:  0.6728, Adjusted R-squared:  0.6243 
## F-statistic: 13.88 on 4 and 27 DF,  p-value: 2.842e-06

summary(out4)

## 
## Call:
## lm(formula = qsec ~ hp + wt + gear + carb, data = mtcars2)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.39320 -0.65268 -0.07605  0.50496  2.47398 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  7.714599   7.168210   1.076   0.3071  
## hp          -0.008156   0.014654  -0.557   0.5901  
## wt           1.970702   0.681556   2.891   0.0161 *
## gear         2.417899   1.502550   1.609   0.1387  
## carb        -1.196560   0.483448  -2.475   0.0328 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.109 on 10 degrees of freedom
## Multiple R-squared:  0.7179, Adjusted R-squared:  0.6051 
## F-statistic: 6.363 on 4 and 10 DF,  p-value: 0.008195

Categorical Predictors

Factor Variables

R represents nominal variables as factors.

## Look at the 'Species' factor:
iris$Species

##   [1] versicolor setosa     setosa     setosa     virginica  virginica 
##   [7] setosa     virginica  versicolor versicolor versicolor virginica 
##  [13] setosa     versicolor setosa     versicolor versicolor virginica 
##  [19] setosa     setosa     versicolor versicolor versicolor virginica 
##  [25] versicolor versicolor virginica  setosa     versicolor setosa    
##  [31] versicolor virginica  setosa     virginica  virginica  virginica 
##  [37] versicolor versicolor setosa     versicolor virginica  setosa    
##  [43] versicolor versicolor virginica  setosa     virginica  setosa    
##  [49] setosa     virginica  versicolor versicolor versicolor setosa    
##  [55] virginica  setosa     versicolor virginica  setosa     setosa    
##  [61] setosa     versicolor virginica  setosa     setosa     setosa    
##  [67] virginica  setosa     setosa     virginica  virginica  virginica 
##  [73] setosa     virginica  virginica  versicolor setosa     setosa    
##  [79] versicolor setosa     setosa     versicolor setosa     virginica 
##  [85] versicolor setosa     setosa     virginica  virginica  versicolor
##  [91] virginica  setosa     setosa     versicolor versicolor setosa    
##  [97] virginica  setosa     versicolor setosa    
## Levels: setosa versicolor virginica

is.factor(iris$Species)

## [1] TRUE

str(iris$Species)

##  Factor w/ 3 levels "setosa","versicolor",..: 2 1 1 1 3 3 1 3 2 2 ...

summary(iris$Species)

##     setosa versicolor  virginica 
##         39         32         29

Factors have special attributes to allow them to encode grouping information.

attributes(iris$Species)

## $levels
## [1] "setosa"     "versicolor" "virginica" 
## 
## $class
## [1] "factor"

attributes(iris$Petal.Length)

## NULL

attributes(iris)

## $names
## [1] "Sepal.Length" "Sepal.Width"  "Petal.Length" "Petal.Width"  "Species"     
## 
## $class
## [1] "data.frame"
## 
## $row.names
##   [1]   1   2   3   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18
##  [19]  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36
##  [37]  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54
##  [55]  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72
##  [73]  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90
##  [91]  91  92  93  94  95  96  97  98  99 100

## Factors have labeled levels:
levels(iris$Species)

## [1] "setosa"     "versicolor" "virginica"

nlevels(iris$Species)

## [1] 3

Although the underlying data are represented numerically, factors are not numeric variables. So, we cannot use a factor as a numeric vector.

mean(iris$Species)

## [1] NA

var(iris$Species)

## Error in var(iris$Species): Calling var(x) on a factor x is defunct.
##   Use something like 'all(duplicated(x)[-1L])' to test for a constant vector.

iris$Species - iris$Species

##   [1] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [26] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [51] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA
##  [76] NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA NA

Dummy Codes

If we use a factor variable as a predictor in the lm() function, the function will automatically generate a set of dummy codes.

## Use a factor variable as a predictor:
out1 <- lm(Petal.Length ~ Species, data = iris)
summary(out1)

## 
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22759 -0.16206  0.02051  0.22051  0.97241 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.37949    0.06712   20.55   <2e-16 ***
## Speciesversicolor  2.77676    0.09997   27.78   <2e-16 ***
## Speciesvirginica   4.34810    0.10277   42.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared:  0.9513, Adjusted R-squared:  0.9503 
## F-statistic: 947.7 on 2 and 97 DF,  p-value: < 2.2e-16

The lm() function only knows how to create these codes because of the factor’s contrasts.

## Check the contrasts:
contrasts(iris$Species)

##            versicolor virginica
## setosa              0         0
## versicolor          1         0
## virginica           0         1

If we want to influence the factors eventual coding, we need to modify its contrasts.

## Change the reference group:
iris$Species2 <- relevel(iris$Species, ref = "virginica")

levels(iris$Species)

## [1] "setosa"     "versicolor" "virginica"

levels(iris$Species2)

## [1] "virginica"  "setosa"     "versicolor"

## How are the contrasts affected?
contrasts(iris$Species)

##            versicolor virginica
## setosa              0         0
## versicolor          1         0
## virginica           0         1

contrasts(iris$Species2)

##            setosa versicolor
## virginica       0          0
## setosa          1          0
## versicolor      0          1

## Which carries through to the models:
out2 <- lm(Petal.Length ~ Species2, data = iris)
summary(out1)

## 
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22759 -0.16206  0.02051  0.22051  0.97241 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.37949    0.06712   20.55   <2e-16 ***
## Speciesversicolor  2.77676    0.09997   27.78   <2e-16 ***
## Speciesvirginica   4.34810    0.10277   42.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared:  0.9513, Adjusted R-squared:  0.9503 
## F-statistic: 947.7 on 2 and 97 DF,  p-value: < 2.2e-16

summary(out2)

## 
## Call:
## lm(formula = Petal.Length ~ Species2, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22759 -0.16206  0.02051  0.22051  0.97241 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         5.72759    0.07783   73.59   <2e-16 ***
## Species2setosa     -4.34810    0.10277  -42.31   <2e-16 ***
## Species2versicolor -1.57134    0.10746  -14.62   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared:  0.9513, Adjusted R-squared:  0.9503 
## F-statistic: 947.7 on 2 and 97 DF,  p-value: < 2.2e-16

Significance Testing

To test the overall significance of a grouping factor, we generally need to compare a full model that contains the dummy codes representing that factor to a restricted model that excludes the dummy codes.

## Test the partial effect of Species:
out0 <- lm(Petal.Length ~ Petal.Width, data = iris)
out1 <- update(out0, ". ~ . + Species")

(s0 <- summary(out0))

## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.11608 -0.27890 -0.00569  0.20170  1.05551 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.92371    0.06945   13.30   <2e-16 ***
## Petal.Width  2.38799    0.05210   45.83   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.399 on 98 degrees of freedom
## Multiple R-squared:  0.9554, Adjusted R-squared:  0.955 
## F-statistic:  2101 on 1 and 98 DF,  p-value: < 2.2e-16

(s1 <- summary(out1))

## 
## Call:
## lm(formula = Petal.Length ~ Petal.Width + Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.84382 -0.16146 -0.00899  0.15701  0.96795 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.08417    0.06356  17.056  < 2e-16 ***
## Petal.Width        1.29409    0.16001   8.087 1.86e-12 ***
## Speciesversicolor  1.36145    0.19139   7.113 2.03e-10 ***
## Speciesvirginica   2.05970    0.29396   7.007 3.36e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3249 on 96 degrees of freedom
## Multiple R-squared:  0.971,  Adjusted R-squared:  0.9701 
## F-statistic:  1073 on 3 and 96 DF,  p-value: < 2.2e-16

## Compute R^2 difference and test its significance:
s1$r.squared - s0$r.squared

## [1] 0.01561812

anova(out0, out1)

For models that contain the grouping factor as the only predictor, we can use an intercept-only model as the restricted model.

## Test the effect of Species:
out0 <- lm(Petal.Length ~ 1, data = iris)
out1 <- update(out0, ". ~ . + Species")

(s0 <- summary(out0))

## 
## Call:
## lm(formula = Petal.Length ~ 1, data = iris)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.529 -2.129  0.471  1.396  3.171 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)    3.529      0.188   18.77   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.88 on 99 degrees of freedom

(s1 <- summary(out1))

## 
## Call:
## lm(formula = Petal.Length ~ Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.22759 -0.16206  0.02051  0.22051  0.97241 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        1.37949    0.06712   20.55   <2e-16 ***
## Speciesversicolor  2.77676    0.09997   27.78   <2e-16 ***
## Speciesvirginica   4.34810    0.10277   42.31   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4191 on 97 degrees of freedom
## Multiple R-squared:  0.9513, Adjusted R-squared:  0.9503 
## F-statistic: 947.7 on 2 and 97 DF,  p-value: < 2.2e-16

anova(out0, out1)

We don’t need to go through all the trouble of fitting an intercept-only model, though.

s1$r.squared - s0$r.squared

## [1] 0.9513143

## An intercept only model has R^2 = 0:
s0$r.squared

## [1] 0

## So, the model comparison above is equivalent to the ordinary F-test from the 
## full model:
s1$fstatistic

##    value    numdf    dendf 
## 947.6864   2.0000  97.0000

Prediction

Now, we’ll use the insurance data (ins0, ins1) to explore the process of generating and evaluating predictions.

## Estimate three models:
out1 <- lm(charges ~ age + sex, data = ins0)
out2 <- update(out1, ". ~ . + region + children")
out3 <- update(out2, ". ~ . + bmi + smoker")

## Check that everything worked:
summary(out1)

## 
## Call:
## lm(formula = charges ~ age + sex, data = ins0)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -8860  -6996  -5710   5732  47215 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  2357.02    1160.32   2.031   0.0425 *  
## age           262.93      26.42   9.953   <2e-16 ***
## sexmale      1401.06     742.93   1.886   0.0596 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11740 on 997 degrees of freedom
## Multiple R-squared:  0.0935, Adjusted R-squared:  0.09169 
## F-statistic: 51.42 on 2 and 997 DF,  p-value: < 2.2e-16

summary(out2)

## 
## Call:
## lm(formula = charges ~ age + sex + region + children, data = ins0)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -9608  -7084  -5495   5159  46840 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      1777.62    1370.37   1.297   0.1949    
## age               261.45      26.37   9.916   <2e-16 ***
## sexmale          1328.47     741.98   1.790   0.0737 .  
## regionnorthwest  -531.29    1069.59  -0.497   0.6195    
## regionsoutheast  1033.78    1038.16   0.996   0.3196    
## regionsouthwest  -970.75    1074.93  -0.903   0.3667    
## children          698.36     315.17   2.216   0.0269 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11710 on 993 degrees of freedom
## Multiple R-squared:  0.1017, Adjusted R-squared:  0.0963 
## F-statistic: 18.74 on 6 and 993 DF,  p-value: < 2.2e-16

summary(out3)

## 
## Call:
## lm(formula = charges ~ age + sex + region + children + bmi + 
##     smoker, data = ins0)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11643.9  -2961.6   -952.1   1613.0  29932.2 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     -12121.96    1139.72 -10.636  < 2e-16 ***
## age                244.95      13.81  17.733  < 2e-16 ***
## sexmale            174.88     386.92   0.452  0.65139    
## regionnorthwest   -181.38     556.73  -0.326  0.74465    
## regionsoutheast  -1266.36     555.31  -2.280  0.02279 *  
## regionsouthwest   -816.41     561.38  -1.454  0.14619    
## children           445.95     164.08   2.718  0.00668 ** 
## bmi                355.46      32.82  10.832  < 2e-16 ***
## smokeryes        24059.56     475.74  50.573  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6094 on 991 degrees of freedom
## Multiple R-squared:  0.7573, Adjusted R-squared:  0.7553 
## F-statistic: 386.4 on 8 and 991 DF,  p-value: < 2.2e-16

We use the predict() function to generate predictions from a fitted model. As we saw above, if we provide the fitted model object as the only argument to predict(), we simply get the model-implied outcomes (i.e., \(\hat{Y}\)).

## Generate model-implied outcomes (i.e., y-hats):
p1 <- predict(out1)
p2 <- predict(out2)
p3 <- predict(out3)

We can use these \(\hat{Y}\) values to compute mean squared errors (MSEs) with the MLmetrics::MSE() function.

## Generate MSEs:
MSE(y_pred = p1, y_true = ins$charges)

## [1] 156682904

MSE(y_pred = p2, y_true = ins$charges)

## [1] 157852019

MSE(y_pred = p3, y_true = ins$charges)

## [1] 265069105

We can generate out-of-sample predictions for new data by supplying a new data frame to the newdata argument in predict().

## Generate out-of-sample (i.e., true) predictions:
p1.2 <- predict(out1, newdata = ins1)
p2.2 <- predict(out2, newdata = ins1)
p3.2 <- predict(out3, newdata = ins1)

## Generate MSEs:
MSE(y_pred = p1.2, y_true = ins1$charges)

## [1] 117084071

MSE(y_pred = p2.2, y_true = ins1$charges)

## [1] 115398905

MSE(y_pred = p3.2, y_true = ins1$charges)

## [1] 36285568

We can compute confidence or prediction intervals for predictions by specifying an appropriate value for the interval argument in predict().

## Get confidence and prediction intervals:
p3.3 <- predict(out3, newdata = ins1, interval = "confidence")
p3.4 <- predict(out3, newdata = ins1, interval = "prediction")

head(p3.3)

##           fit        lwr       upr
## 181 12243.075 11156.2610 13329.889
## 901  8058.695  6957.3947  9159.996
## 32   1641.097   537.1289  2745.066
## 694  2158.776  1089.5937  3227.959
## 79   7416.066  6122.0602  8710.071
## 230  9507.529  8510.2586 10504.800

head(p3.4)

##           fit         lwr      upr
## 181 12243.075    234.7479 24251.40
## 901  8058.695  -3950.9514 20068.34
## 32   1641.097 -10368.7943 13650.99
## 694  2158.776  -9847.9677 14165.52
## 79   7416.066  -4612.7831 19444.91
## 230  9507.529  -2493.0248 21508.08

Moderation

Finally, we’ll consider methods of testing for moderation and probing significant interactions.

Contiuous Moderators

We’ll start by using the Ginzberg depression data (ginz) to estimate a moderated regression model with a continuous moderator variable.

## Focal effect:
out0 <- lm(depression ~ fatalism, data = ginz)
summary(out0)

## 
## Call:
## lm(formula = depression ~ fatalism, data = ginz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.69096 -0.25544 -0.03114  0.20540  1.17070 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.34263    0.09408   3.642 0.000479 ***
## fatalism     0.65737    0.08425   7.802 1.97e-11 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3791 on 80 degrees of freedom
## Multiple R-squared:  0.4321, Adjusted R-squared:  0.425 
## F-statistic: 60.88 on 1 and 80 DF,  p-value: 1.97e-11

## Additive model:
out1 <- lm(depression ~ fatalism + simplicity, data = ginz)
summary(out1)

## 
## Call:
## lm(formula = depression ~ fatalism + simplicity, data = ginz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.67818 -0.23270 -0.05276  0.16817  1.10094 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.20269    0.09472   2.140 0.035455 *  
## fatalism     0.41777    0.10064   4.151 8.28e-05 ***
## simplicity   0.37953    0.10064   3.771 0.000312 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3512 on 79 degrees of freedom
## Multiple R-squared:  0.5188, Adjusted R-squared:  0.5066 
## F-statistic: 42.58 on 2 and 79 DF,  p-value: 2.837e-13

## Moderated model:
out2 <- lm(depression ~ fatalism * simplicity, data = ginz)
summary(out2)

## 
## Call:
## lm(formula = depression ~ fatalism * simplicity, data = ginz)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.55988 -0.20390 -0.03806  0.15617  1.01101 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -0.2962     0.1918  -1.544  0.12665    
## fatalism              0.8259     0.1685   4.902 5.05e-06 ***
## simplicity            0.9372     0.2121   4.418 3.17e-05 ***
## fatalism:simplicity  -0.4039     0.1370  -2.949  0.00421 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3353 on 78 degrees of freedom
## Multiple R-squared:  0.567,  Adjusted R-squared:  0.5504 
## F-statistic: 34.05 on 3 and 78 DF,  p-value: 3.587e-14

Probing the Interaction

Based on the results above, we can conclude that simplistic thinking significantly moderates the effect of fatalism on depression (\(\beta = -0.404\), \(t[78] = -2.95\), \(p = 0.004\)). So, we probably want to probe that interaction to get a better sense of what’s happening.

We can use the rockchalk::plotSlopes() function to generate simple slopes plots.

## First we use 'plotSlopes' to estimate the simple slopes:
plotOut1 <- plotSlopes(out2,
                       plotx      = "fatalism",
                       modx       = "simplicity",
                       modxVals   = "std.dev",
                       plotPoints = TRUE)

## We can also get simple slopes at the quartiles of simplicity's distribution:
plotOut2 <- plotSlopes(out2,
                       plotx      = "fatalism",
                       modx       = "simplicity",
                       modxVals   = "quantile",
                       plotPoints = TRUE)

## Or we can manually pick some values:
range(ginz$simplicity)

## [1] 0.25068 2.85408

plotOut3 <- plotSlopes(out2,
                       plotx      = "fatalism",
                       modx       = "simplicity",
                       modxVals   = seq(0.5, 2.5, 0.5),
                       plotPoints = TRUE)

We can also test the significance of the simple slopes plotted above. Doing so is call simple slopes analysis and is generally a good thing to do after finding a significant interaction.

We can use the rockchalk::testSlopes() function to easily implement the tests.

## Test the simple slopes via the 'testSlopes' function:
testOut1 <- testSlopes(plotOut1)

## Values of simplicity OUTSIDE this interval:
##       lo       hi 
## 1.476340 4.346491 
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant

ls(testOut1)

## [1] "hypotests" "jn"        "pso"

testOut1$hypotests

testOut2 <- testSlopes(plotOut2)

## Values of simplicity OUTSIDE this interval:
##       lo       hi 
## 1.476340 4.346491 
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant

testOut2$hypotests

testOut3 <- testSlopes(plotOut3)

## Values of simplicity OUTSIDE this interval:
##       lo       hi 
## 1.476340 4.346491 
## cause the slope of (b1 + b2*simplicity)fatalism to be statistically significant

testOut3$hypotests

Binary Categorical Moderators

Of course, we’re not limited to continuous moderators only. We can easily estimate models with binary moderator variables. Below, we’ll use the bfi data to see if gender (actually, biological sex) moderates the effect of agreeableness on neuroticism.

## Focal effect:
out0 <- lm(neuro ~ agree, data = bfi)
summary(out0)

## 
## Call:
## lm(formula = neuro ~ agree, data = bfi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9598 -0.9155 -0.0712  0.8297  3.1683 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  4.30315    0.11693  36.802   <2e-16 ***
## agree       -0.24525    0.02468  -9.938   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.167 on 2798 degrees of freedom
## Multiple R-squared:  0.03409,    Adjusted R-squared:  0.03375 
## F-statistic: 98.76 on 1 and 2798 DF,  p-value: < 2.2e-16

## Additive model:
out1 <- lm(neuro ~ agree + gender, data = bfi)
summary(out1)

## 
## Call:
## lm(formula = neuro ~ agree + gender, data = bfi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.9032 -0.8859 -0.0635  0.8227  3.2286 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4.23224    0.11552  36.637   <2e-16 ***
## agree        -0.29217    0.02487 -11.750   <2e-16 ***
## genderfemale  0.43061    0.04731   9.102   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.15 on 2797 degrees of freedom
## Multiple R-squared:  0.06188,    Adjusted R-squared:  0.06121 
## F-statistic: 92.24 on 2 and 2797 DF,  p-value: < 2.2e-16

## Moderated model:
out2 <- lm(neuro ~ agree * gender, data = bfi)
summary(out2)

## 
## Call:
## lm(formula = neuro ~ agree * gender, data = bfi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0034 -0.8689 -0.0623  0.8080  3.1803 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         3.88591    0.18371  21.152  < 2e-16 ***
## agree              -0.21325    0.04097  -5.206 2.07e-07 ***
## genderfemale        0.99651    0.23829   4.182 2.98e-05 ***
## agree:genderfemale -0.12484    0.05152  -2.423   0.0155 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.149 on 2796 degrees of freedom
## Multiple R-squared:  0.06384,    Adjusted R-squared:  0.06284 
## F-statistic: 63.56 on 3 and 2796 DF,  p-value: < 2.2e-16

In the case of categorical moderators, the estimate for the conditional focal effect automatically represents the simple slope in the reference group. So, we don’t need to do anything fancy to probe the interaction.

## Test 'female' simple slope by changing reference group:
bfi %<>% mutate(gender = relevel(gender, ref = "female"))

out2.1 <- lm(neuro ~ agree * gender, data = bfi)
summary(out2.1)

## 
## Call:
## lm(formula = neuro ~ agree * gender, data = bfi)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0034 -0.8689 -0.0623  0.8080  3.1803 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       4.88242    0.15176  32.171  < 2e-16 ***
## agree            -0.33809    0.03125 -10.820  < 2e-16 ***
## gendermale       -0.99651    0.23829  -4.182 2.98e-05 ***
## agree:gendermale  0.12484    0.05152   2.423   0.0155 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.149 on 2796 degrees of freedom
## Multiple R-squared:  0.06384,    Adjusted R-squared:  0.06284 
## F-statistic: 63.56 on 3 and 2796 DF,  p-value: < 2.2e-16

If we want to, we can also use rockchalk.

plotSlopes(out2, plotx = "agree", modx = "gender") %>% testSlopes()

## These are the straight-line "simple slopes" of the variable agree  
##  for the selected moderator values. 
##                  "gender"      slope Std. Error    t value     Pr(>|t|)
## male                agree -0.2132496 0.04096616  -5.205507 2.074922e-07
## female agree:genderfemale -0.3380877 0.03124533 -10.820426 9.265872e-27

Nominal Categorical Moderators (G > 2)

Finally, the situation doesn’t really get any more complicated for multinomial moderators.

## Moderated model:
out1 <- lm(Petal.Width ~ Sepal.Width * Species, data = iris)
summary(out1)

## 
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40713 -0.06840 -0.01969  0.09589  0.37311 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.04539    0.25635   0.177  0.85983    
## Sepal.Width                    0.05447    0.07597   0.717  0.47517    
## Speciesversicolor             -0.25807    0.36640  -0.704  0.48295    
## Speciesvirginica               0.63112    0.38406   1.643  0.10366    
## Sepal.Width:Speciesversicolor  0.48994    0.11954   4.099 8.81e-05 ***
## Sepal.Width:Speciesvirginica   0.38504    0.12138   3.172  0.00204 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared:  0.9562, Adjusted R-squared:  0.9539 
## F-statistic: 410.3 on 5 and 94 DF,  p-value: < 2.2e-16

## Test for significant moderation:
out0 <- lm(Petal.Width ~ Sepal.Width + Species, data = iris)
summary(out0)

## 
## Call:
## lm(formula = Petal.Width ~ Sepal.Width + Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.43465 -0.09643  0.01322  0.10208  0.41322 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.79303    0.18433  -4.302 4.07e-05 ***
## Sepal.Width        0.30426    0.05424   5.609 1.96e-07 ***
## Speciesversicolor  1.25726    0.05187  24.237  < 2e-16 ***
## Speciesvirginica   1.87574    0.04808  39.014  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1799 on 96 degrees of freedom
## Multiple R-squared:  0.947,  Adjusted R-squared:  0.9454 
## F-statistic: 572.3 on 3 and 96 DF,  p-value: < 2.2e-16

anova(out0, out1)

## Test different simple slopes by changing reference group:
iris %<>% mutate(Species = relevel(Species, ref = "virginica"))
out1.1 <- lm(Petal.Width ~ Sepal.Width * Species, data = iris)


iris %<>% mutate(Species = relevel(Species, ref = "versicolor"))
out1.2 <- lm(Petal.Width ~ Sepal.Width * Species, data = iris)

summary(out1)

## 
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40713 -0.06840 -0.01969  0.09589  0.37311 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.04539    0.25635   0.177  0.85983    
## Sepal.Width                    0.05447    0.07597   0.717  0.47517    
## Speciesversicolor             -0.25807    0.36640  -0.704  0.48295    
## Speciesvirginica               0.63112    0.38406   1.643  0.10366    
## Sepal.Width:Speciesversicolor  0.48994    0.11954   4.099 8.81e-05 ***
## Sepal.Width:Speciesvirginica   0.38504    0.12138   3.172  0.00204 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared:  0.9562, Adjusted R-squared:  0.9539 
## F-statistic: 410.3 on 5 and 94 DF,  p-value: < 2.2e-16

summary(out1.1)

## 
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40713 -0.06840 -0.01969  0.09589  0.37311 
## 
## Coefficients:
##                               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                    0.67652    0.28599   2.366  0.02006 *  
## Sepal.Width                    0.43951    0.09467   4.643 1.11e-05 ***
## Speciessetosa                 -0.63112    0.38406  -1.643  0.10366    
## Speciesversicolor             -0.88920    0.38771  -2.293  0.02405 *  
## Sepal.Width:Speciessetosa     -0.38504    0.12138  -3.172  0.00204 ** 
## Sepal.Width:Speciesversicolor  0.10490    0.13221   0.793  0.42952    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared:  0.9562, Adjusted R-squared:  0.9539 
## F-statistic: 410.3 on 5 and 94 DF,  p-value: < 2.2e-16

summary(out1.2)

## 
## Call:
## lm(formula = Petal.Width ~ Sepal.Width * Species, data = iris)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.40713 -0.06840 -0.01969  0.09589  0.37311 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                  -0.21268    0.26178  -0.812    0.419    
## Sepal.Width                   0.54441    0.09229   5.899 5.74e-08 ***
## Speciesvirginica              0.88920    0.38771   2.293    0.024 *  
## Speciessetosa                 0.25807    0.36640   0.704    0.483    
## Sepal.Width:Speciesvirginica -0.10490    0.13221  -0.793    0.430    
## Sepal.Width:Speciessetosa    -0.48994    0.11954  -4.099 8.81e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1653 on 94 degrees of freedom
## Multiple R-squared:  0.9562, Adjusted R-squared:  0.9539 
## F-statistic: 410.3 on 5 and 94 DF,  p-value: < 2.2e-16

## Do the same test using 'rockchalk':
plotOut1 <- plotSlopes(model      = out1,
                       plotx      = "Sepal.Width",
                       modx       = "Species",
                       plotPoints = FALSE)

testOut1 <- testSlopes(plotOut1)

## These are the straight-line "simple slopes" of the variable Sepal.Width  
##  for the selected moderator values. 
##                                "Species"     slope Std. Error   t value
## setosa                       Sepal.Width 0.0544667 0.07596798 0.7169693
## versicolor Sepal.Width:Speciesversicolor 0.5444098 0.09229194 5.8987799
## virginica   Sepal.Width:Speciesvirginica 0.4395070 0.09466868 4.6425809
##                Pr(>|t|)
## setosa     4.751708e-01
## versicolor 5.735785e-08
## virginica  1.114651e-05

testOut1$hypotests

End of Demonstration

R Code Demonstration

Linear Modeling

Kyle M. Lang

Last updated: 2022-12-05