# 46 Report

Structure

• Exploratory analysis

• plots
• preliminary results
• interesting structure/features in the data
• outliers
• Model

• Assumptions
• Why this model/ How is this model the best one?
• Consideration: interactions, collinearity, dependence
• Model Fit

• How well does it fit?

• Are the model assumptions met?

• Residual analysis
• Inference/ Prediction

• Are there different way to support your inference?
• Conclusion

• Recommendation

• Limitation of the analysis

• How to correct those in the future

This chapter is based on the jtools package. More information can be found here.

## 46.1 One summary table

Packages for reporting:

Summary Statistics Table:

Regression Table

library(jtools)
data(movies)
fit <- lm(metascore ~ budget + us_gross + year, data = movies)
summ(fit)
 Observations 831 (10 missing obs. deleted) Dependent variable metascore Type OLS linear regression
 F(3,827) 26.23 R² 0.09 Adj. R² 0.08
Est. S.E. t val. p
(Intercept) 52.06 139.67 0.37 0.71
budget -0.00 0.00 -5.89 0.00
us_gross 0.00 0.00 7.61 0.00
year 0.01 0.07 0.08 0.94
Standard errors: OLS
summ(
fit,
scale = TRUE,
vifs = TRUE,
part.corr = TRUE,
confint = TRUE,
pvals = FALSE
) # notice that scale here is TRUE
 Observations 831 (10 missing obs. deleted) Dependent variable metascore Type OLS linear regression
 F(3,827) 26.23 R² 0.09 Adj. R² 0.08
Est. 2.5% 97.5% t val. VIF partial.r part.r
(Intercept) 63.01 61.91 64.11 112.23 NA NA NA
budget -3.78 -5.05 -2.52 -5.89 1.31 -0.20 -0.20
us_gross 5.28 3.92 6.64 7.61 1.52 0.26 0.25
year 0.05 -1.18 1.28 0.08 1.24 0.00 0.00
Standard errors: OLS; Continuous predictors are mean-centered and scaled by 1 s.d. The outcome variable remains in its original units.

#obtain clsuter-robust SE
data("PetersenCL", package = "sandwich")
fit2 <- lm(y ~ x, data = PetersenCL)
summ(fit2, robust = "HC3", cluster = "firm") 
 Observations 5000 Dependent variable y Type OLS linear regression
 F(1,4998) 1310.74 R² 0.21 Adj. R² 0.21
Est. S.E. t val. p
(Intercept) 0.03 0.07 0.44 0.66
x 1.03 0.05 20.36 0.00
Standard errors: Cluster-robust, type = HC3

Model to Equation

# install.packages("equatiomatic") # not available for R 4.2
fit <- lm(metascore ~ budget + us_gross + year, data = movies)
# show the theoretical model
equatiomatic::extract_eq(fit)
# display the actual coefficients
equatiomatic::extract_eq(fit, use_coefs = TRUE)

## 46.2 Model Comparison

fit <- lm(metascore ~ log(budget), data = movies)
fit_b <- lm(metascore ~ log(budget) + log(us_gross), data = movies)
fit_c <- lm(metascore ~ log(budget) + log(us_gross) + runtime, data = movies)
coef_names <- c("Budget" = "log(budget)", "US Gross" = "log(us_gross)",
"Runtime (Hours)" = "runtime", "Constant" = "(Intercept)")
export_summs(fit, fit_b, fit_c, robust = "HC3", coefs = coef_names)
Table 38.1:
Model 1 Model 2 Model 3
Budget -2.43 *** -5.16 *** -6.70 ***
(0.44)    (0.62)    (0.67)
US Gross         3.96 *** 3.85 ***
(0.51)    (0.48)
Runtime (Hours)                 14.29 ***
(1.63)
Constant 105.29 *** 81.84 *** 83.35 ***
(7.65)    (8.66)    (8.82)
N 831        831        831
R2 0.03     0.09     0.17
Standard errors are heteroskedasticity robust. *** p < 0.001; ** p < 0.01; * p < 0.05.

Another package is modelsummary

library(modelsummary)
lm_mod <- lm(mpg ~ wt + hp + cyl, mtcars)
msummary(lm_mod, vcov = c("iid","robust","HC4"))
(1)   (2)   (3)
(Intercept) 38.752 38.752 38.752
(1.787) (2.286) (2.177)
wt −3.167 −3.167 −3.167
(0.741) (0.833) (0.819)
hp −0.018 −0.018 −0.018
(0.012) (0.010) (0.013)
cyl −0.942 −0.942 −0.942
(0.551) (0.573) (0.572)
Num.Obs. 32 32 32
R2 0.843 0.843 0.843
AIC 155.5 155.5 155.5
BIC 162.8 162.8 162.8
Log.Lik. −72.738 −72.738 −72.738
F 50.171 31.065 32.623
RMSE 2.35 2.35 2.35
Std.Errors IID HC3 HC4
modelplot(lm_mod, vcov = c("iid","robust","HC4"))

Another package is stargazer

library("stargazer")
stargazer(attitude)
#>
#> % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
#> % Date and time: Wed, Jul 10, 2024 - 6:20:44 PM
#> \begin{table}[!htbp] \centering
#>   \caption{}
#>   \label{}
#> \begin{tabular}{@{\extracolsep{5pt}}lccccc}
#> \\[-1.8ex]\hline
#> \hline \\[-1.8ex]
#> Statistic & \multicolumn{1}{c}{N} & \multicolumn{1}{c}{Mean} & \multicolumn{1}{c}{St. Dev.} & \multicolumn{1}{c}{Min} & \multicolumn{1}{c}{Max} \\
#> \hline \\[-1.8ex]
#> rating & 30 & 64.633 & 12.173 & 40 & 85 \\
#> complaints & 30 & 66.600 & 13.315 & 37 & 90 \\
#> privileges & 30 & 53.133 & 12.235 & 30 & 83 \\
#> learning & 30 & 56.367 & 11.737 & 34 & 75 \\
#> raises & 30 & 64.633 & 10.397 & 43 & 88 \\
#> critical & 30 & 74.767 & 9.895 & 49 & 92 \\
#> advance & 30 & 42.933 & 10.289 & 25 & 72 \\
#> \hline \\[-1.8ex]
#> \end{tabular}
#> \end{table}
## 2 OLS models
linear.1 <-
lm(rating ~ complaints + privileges + learning + raises + critical,
data = attitude)
linear.2 <-
lm(rating ~ complaints + privileges + learning, data = attitude)
## create an indicator dependent variable, and run a probit model
attitude$high.rating <- (attitude$rating > 70)

probit.model <-
glm(
high.rating ~ learning + critical + advance,
data = attitude,
)
stargazer(linear.1,
linear.2,
probit.model,
title = "Results",
align = TRUE)
#>
#> % Table created by stargazer v.5.2.3 by Marek Hlavac, Social Policy Institute. E-mail: marek.hlavac at gmail.com
#> % Date and time: Wed, Jul 10, 2024 - 6:20:44 PM
#> % Requires LaTeX packages: dcolumn
#> \begin{table}[!htbp] \centering
#>   \caption{Results}
#>   \label{}
#> \begin{tabular}{@{\extracolsep{5pt}}lD{.}{.}{-3} D{.}{.}{-3} D{.}{.}{-3} }
#> \\[-1.8ex]\hline
#> \hline \\[-1.8ex]
#>  & \multicolumn{3}{c}{\textit{Dependent variable:}} \\
#> \cline{2-4}
#> \\[-1.8ex] & \multicolumn{2}{c}{rating} & \multicolumn{1}{c}{high.rating} \\
#> \\[-1.8ex] & \multicolumn{2}{c}{\textit{OLS}} & \multicolumn{1}{c}{\textit{probit}} \\
#> \\[-1.8ex] & \multicolumn{1}{c}{(1)} & \multicolumn{1}{c}{(2)} & \multicolumn{1}{c}{(3)}\\
#> \hline \\[-1.8ex]
#>  complaints & 0.692^{***} & 0.682^{***} &  \\
#>   & (0.149) & (0.129) &  \\
#>   & & & \\
#>  privileges & -0.104 & -0.103 &  \\
#>   & (0.135) & (0.129) &  \\
#>   & & & \\
#>  learning & 0.249 & 0.238^{*} & 0.164^{***} \\
#>   & (0.160) & (0.139) & (0.053) \\
#>   & & & \\
#>  raises & -0.033 &  &  \\
#>   & (0.202) &  &  \\
#>   & & & \\
#>  critical & 0.015 &  & -0.001 \\
#>   & (0.147) &  & (0.044) \\
#>   & & & \\
#>  advance &  &  & -0.062 \\
#>   &  &  & (0.042) \\
#>   & & & \\
#>  Constant & 11.011 & 11.258 & -7.476^{**} \\
#>   & (11.704) & (7.318) & (3.570) \\
#>   & & & \\
#> \hline \\[-1.8ex]
#> Observations & \multicolumn{1}{c}{30} & \multicolumn{1}{c}{30} & \multicolumn{1}{c}{30} \\
#> R$^{2}$ & \multicolumn{1}{c}{0.715} & \multicolumn{1}{c}{0.715} &  \\
#> Adjusted R$^{2}$ & \multicolumn{1}{c}{0.656} & \multicolumn{1}{c}{0.682} &  \\
#> Log Likelihood &  &  & \multicolumn{1}{c}{-9.087} \\
#> Akaike Inf. Crit. &  &  & \multicolumn{1}{c}{26.175} \\
#> Residual Std. Error & \multicolumn{1}{c}{7.139 (df = 24)} & \multicolumn{1}{c}{6.863 (df = 26)} &  \\
#> F Statistic & \multicolumn{1}{c}{12.063$^{***}$ (df = 5; 24)} & \multicolumn{1}{c}{21.743$^{***}$ (df = 3; 26)} &  \\
#> \hline
#> \hline \\[-1.8ex]
#> \textit{Note:}  & \multicolumn{3}{r}{$^{*}$p$<$0.1; $^{**}$p$<$0.05; $^{***}$p$<$0.01} \\
#> \end{tabular}
#> \end{table}
# Latex
stargazer(
linear.1,
linear.2,
probit.model,
title = "Regression Results",
align = TRUE,
dep.var.labels = c("Overall Rating", "High Rating"),
covariate.labels = c(
"Handling of Complaints",
"No Special Privileges",
"Opportunity to Learn",
"Performance-Based Raises",
"Too Critical",
),
omit.stat = c("LL", "ser", "f"),
no.space = TRUE
)
# ASCII text output
stargazer(
linear.1,
linear.2,
type = "text",
title = "Regression Results",
dep.var.labels = c("Overall Rating", "High Rating"),
covariate.labels = c(
"Handling of Complaints",
"No Special Privileges",
"Opportunity to Learn",
"Performance-Based Raises",
"Too Critical",
),
omit.stat = c("LL", "ser", "f"),
ci = TRUE,
ci.level = 0.90,
single.row = TRUE
)
#>
#> Regression Results
#> ========================================================================
#>                                        Dependent variable:
#>                          -----------------------------------------------
#>                                          Overall Rating
#>                                    (1)                     (2)
#> ------------------------------------------------------------------------
#> Handling of Complaints   0.692*** (0.447, 0.937) 0.682*** (0.470, 0.894)
#> No Special Privileges    -0.104 (-0.325, 0.118)  -0.103 (-0.316, 0.109)
#> Opportunity to Learn      0.249 (-0.013, 0.512)   0.238* (0.009, 0.467)
#> Performance-Based Raises -0.033 (-0.366, 0.299)
#> Too Critical              0.015 (-0.227, 0.258)
#> Advancement              11.011 (-8.240, 30.262) 11.258 (-0.779, 23.296)
#> ------------------------------------------------------------------------
#> Observations                       30                      30
#> R2                                0.715                   0.715
#> ========================================================================
#> Note:                                        *p<0.1; **p<0.05; ***p<0.01
stargazer(
linear.1,
linear.2,
probit.model,
title = "Regression Results",
align = TRUE,
dep.var.labels = c("Overall Rating", "High Rating"),
covariate.labels = c(
"Handling of Complaints",
"No Special Privileges",
"Opportunity to Learn",
"Performance-Based Raises",
"Too Critical",
),
omit.stat = c("LL", "ser", "f"),
no.space = TRUE
)

Correlation Table

correlation.matrix <-
cor(attitude[, c("rating", "complaints", "privileges")])
stargazer(correlation.matrix, title = "Correlation Matrix")

## 46.3 Changes in an estimate

coef_names <- coef_names[1:3] # Dropping intercept for plots
plot_summs(fit, fit_b, fit_c, robust = "HC3", coefs = coef_names)
plot_summs(
fit_c,
robust = "HC3",
coefs = coef_names,
plot.distributions = TRUE
)

## 46.4 Standard Errors

sandwich vignette

Type Applicable Usage Reference
const Assume constant variances
HC HC0 vcovCL

Heterogeneity

White’s estimator

All other heterogeneity SE methods are derivatives of this.

HC1 vcovCL

Uses a degrees of freedom-based correction

When the number of clusters is small, HC2 and HC3 are better

HC2 vcovCL

Better with the linear model, but still applicable for Generalized Linear Models

Needs a hat (weighted) matrix

HC3 vcovCL

Better with the linear model, but still applicable for Generalized Linear Models

Needs a hat (weighted) matrix

HC4 vcovHC
HC4m vcovHC
HC5 vcovHC
data(cars)
model <- lm(speed ~ dist, data = cars)
summary(model)
#>
#> Call:
#> lm(formula = speed ~ dist, data = cars)
#>
#> Residuals:
#>     Min      1Q  Median      3Q     Max
#> -7.5293 -2.1550  0.3615  2.4377  6.4179
#>
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)
#> (Intercept)  8.28391    0.87438   9.474 1.44e-12 ***
#> dist         0.16557    0.01749   9.464 1.49e-12 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 3.156 on 48 degrees of freedom
#> Multiple R-squared:  0.6511, Adjusted R-squared:  0.6438
#> F-statistic: 89.57 on 1 and 48 DF,  p-value: 1.49e-12
lmtest::coeftest(model, vcov. = sandwich::vcovHC(model, type = "HC1"))
#>
#> t test of coefficients:
#>
#>             Estimate Std. Error t value  Pr(>|t|)
#> (Intercept) 8.283906   0.891860  9.2883 2.682e-12 ***
#> dist        0.165568   0.019402  8.5335 3.482e-11 ***
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 46.5 Coefficient Uncertainty and Distribution

The ggdist allows us to visualize uncertainty under both frequentist and Bayesian frameworks

library(ggdist)

## 46.6 Descriptive Tables

Export APA theme

data("mtcars")

library(flextable)
theme_apa(flextable(mtcars[1:5,1:5]))

Export to Latex

print(xtable::xtable(mtcars, type = "latex"),
file = file.path(getwd(), "output", "mtcars_xtable.tex"))

# American Economic Review style
stargazer::stargazer(
mtcars,
title = "Testing",
style = "aer",
out = file.path(getwd(), "output", "mtcars_stargazer.tex")
)

# other styles include
# Quarterly Journal of Economics

However, the above codes do not play well with notes. Hence, I create my own custom code that follows the AMA guidelines

ama_tbl <- function(data, caption, label, note, output_path) {
library(tidyverse)
library(xtable)
# Function to determine column alignment
get_column_alignment <- function(data) {
alignment <- c("l", "l")

# Check each column
for (col in seq_len(ncol(data))[-1]) {
if (is.numeric(data[[col]])) {
alignment <- c(alignment, "r")  # Right alignment for numbers
} else {
alignment <- c(alignment, "c")  # Center alignment for other data
}
}

return(alignment)
}

data %>%
# bold + left align first column
rename_with(~paste("\\multicolumn{1}{l}{\\textbf{", ., "}}"), 1) %>%
# bold + center align all other columns
colnames<-(ifelse(colnames(.) != colnames(.)[1],
paste("\\multicolumn{1}{c}{\\textbf{", colnames(.), "}}"),
colnames(.))) %>%

xtable(caption = caption,
label = label,
align = get_column_alignment(data),
auto = TRUE) %>%
print(
include.rownames = FALSE,
caption.placement = "top",

hline.after=c(-1, 0),

# p{0.9\linewidth} sets the width of the column to 90% of the line width, and the @{} removes any extra padding around the cell.

add.to.row = list(pos = list(nrow(data)), # Add at the bottom of the table
command = c(paste0("\\hline \n \\multicolumn{",ncol(data), "}{l} {", "\n \\begin{tabular}{@{}p{0.9\\linewidth}@{}} \n","Note: ", note, "\n \\end{tabular}  } \n"))), # Add your note here

# make sure your heading is untouched (because you manually change it above)
sanitize.colnames.function = identity,

# place a the top of the page
table.placement = "h",

file = output_path
)
}
ama_tbl(
mtcars,
caption     = "This is caption",
label       = "tab:this_is_label",
note        = "this is note",
output_path = file.path(getwd(), "output", "mtcars_custom_ama.tex")
)

## 46.7 Visualizations and Plots

You can customize your plots based on your preferred journals. Here, I am creating a custom setting for the American Marketing Association.

library(ggplot2)

# check available fonts
# windowsFonts()

# for Times New Roman
# names(windowsFonts()[windowsFonts()=="TT Times New Roman"])
# Making a theme
amatheme = theme_bw(base_size = 14, base_family = "serif") + # This is Time New Roman

theme(
# remove major gridlines
panel.grid.major   = element_blank(),

# remove minor gridlines
panel.grid.minor   = element_blank(),

# remove panel border
panel.border       = element_blank(),

line               = element_line(),

# change font
text               = element_text(),

# if you want to remove legend title
# legend.title     = element_blank(),

legend.title       = element_text(size = rel(0.6), face = "bold"),

# change font size of legend
legend.text        = element_text(size = rel(0.6)),

legend.background  = element_rect(color = "black"),

# legend.margin    = margin(t = 5, l = 5, r = 5, b = 5),
# legend.key       = element_rect(color = NA, fill = NA),

# change font size of main title
plot.title         = element_text(
size           = rel(1.2),
face           = "bold",
hjust          = 0.5,
margin         = margin(b = 15)
),

plot.margin        = unit(c(1, 1, 1, 1), "cm"),

# add black line along axes
axis.line          = element_line(colour = "black", linewidth = .8),

axis.ticks         = element_line(),

# axis title
axis.title.x       = element_text(size = rel(1.2), face = "bold"),
axis.title.y       = element_text(size = rel(1.2), face = "bold"),

# axis text size
axis.text.y        = element_text(size = rel(1)),
axis.text.x        = element_text(size = rel(1))
)

Example

library(tidyverse)
library(ggsci)
data("mtcars")
yourplot <- mtcars %>%
select(mpg, cyl, gear) %>%
ggplot(., aes(x = mpg, y = cyl, fill = gear)) +
geom_point() +
labs(title="Some Plot")

yourplot +
amatheme +
# choose different color theme
scale_color_npg() 

yourplot +
amatheme +
scale_color_continuous()

Other pre-specified themes

library(ggthemes)

# Stata theme
yourplot +
theme_stata()

# The economist theme
yourplot +
theme_economist()

yourplot +
theme_economist_white()

# Wall street journal theme
yourplot +
theme_wsj()

# APA theme
yourplot +
jtools::theme_apa(
legend.font.size = 24,
x.font.size = 20,
y.font.size = 20
)