12.10 Lab: R Code
12.10.1 Clustered Standard Errors
Further below we use clustered standard errors for some estimations (clustering according to ethnic group and district). Sometimes this is slightly more complicated in R than in Stata (see Arai 2015; Esarey 2016). Here we use two functions based on Arai (2015) but in future you may rely on the clusterSEs
package by Esarey and Menger (2016) (see ?clusterSEs
for instructions on how to use it).
# One-way cluster function (Arai 2015)
clx <-
function(fm, dfcw, cluster){
library(sandwich)
library(lmtest)
M <- length(unique(cluster))
N <- length(cluster)
dfc <- (M/(M-1))*((N-1)/(N-fm$rank))
u <- apply(estfun(fm),2,
function(x) tapply(x, cluster, sum))
vcovCL <- dfc*sandwich(fm, meat=crossprod(u)/N)*dfcw
coeftest(fm, vcovCL) }
# Two-way cluster function (Arai 2015)
mclx <-
function(fm, dfcw, cluster1, cluster2){
library(sandwich)
library(lmtest)
cluster12 = paste(cluster1,cluster2, sep="")
M1 <- length(unique(cluster1))
M2 <- length(unique(cluster2))
M12 <- length(unique(cluster12))
N <- length(cluster1)
K <- fm$rank
dfc1 <- (M1/(M1-1))*((N-1)/(N-K))
dfc2 <- (M2/(M2-1))*((N-1)/(N-K))
dfc12 <- (M12/(M12-1))*((N-1)/(N-K))
u1 <- apply(estfun(fm), 2,
function(x) tapply(x, cluster1, sum))
u2 <- apply(estfun(fm), 2,
function(x) tapply(x, cluster2, sum))
u12 <- apply(estfun(fm), 2,
function(x) tapply(x, cluster12, sum))
vc1 <- dfc1*sandwich(fm, meat=crossprod(u1)/N )
vc2 <- dfc2*sandwich(fm, meat=crossprod(u2)/N )
vc12 <- dfc12*sandwich(fm, meat=crossprod(u12)/N)
vcovMCL <- (vc1 + vc2 - vc12)*dfcw
coeftest(fm, vcovMCL)}
12.10.2 Load data
# d <- read_dta("www/iv-Nunn_Wantchekon_AER_2011.dta")
d <- read_dta("www/data/iv-Nunn_Wantchekon_AER_2011.dta")
#d <- read_dta("https://docs.google.com/uc?id=0Bwer5wQoreiuNkc1cmg0TjFFR1E&export=download")
12.10.3 Summary stats & graphs
#d <- read_dta("https://docs.google.com/uc?id=0Bwer5wQoreiuNkc1cmg0TjFFR1E&export=download")
stargazer(data.frame(d),
summary.stat = c("min", "max", "mean", "sd"),
type = "html")
Statistic | Min | Max | Mean | St. Dev. |
location_id | 1 | 2,891 | 1,336.274 | 840.300 |
trust_relatives | 0.000 | 3.000 | 2.189 | 0.958 |
trust_neighbors | 0.000 | 3.000 | 1.738 | 1.010 |
intra_group_trust | 0.000 | 3.000 | 1.678 | 1.004 |
inter_group_trust | 0.000 | 3.000 | 1.363 | 0.998 |
trust_local_council | 0.000 | 3.000 | 1.665 | 1.103 |
ln_export_area | 0.000 | 3.656 | 0.535 | 0.944 |
export_area | 0.000 | 37.707 | 2.655 | 7.653 |
export_pop | 0.000 | 4.464 | 0.113 | 0.224 |
ln_export_pop | 0.000 | 1.698 | 0.092 | 0.166 |
age | 18.000 | 130.000 | 36.425 | 14.692 |
age2 | 324.000 | 16,900.000 | 1,542.632 | 1,313.195 |
male | 0 | 1 | 0.500 | 0.500 |
urban_dum | 0 | 1 | 0.366 | 0.482 |
occupation | 0.000 | 995.000 | 15.785 | 76.183 |
religion | 0.000 | 995.000 | 28.531 | 106.188 |
living_conditions | 1.000 | 5.000 | 2.556 | 1.205 |
education | 0.000 | 9.000 | 3.074 | 2.010 |
near_dist | 0.033 | 1,459.088 | 432.716 | 337.115 |
distsea | 1.250 | 1,252.683 | 439.892 | 311.472 |
loc_ln_export_area | 0.000 | 3.739 | 0.457 | 0.878 |
local_council_performance | 1.000 | 4.000 | 2.512 | 0.927 |
council_listen | 0.000 | 3.000 | 1.176 | 1.021 |
corrupt_local_council | 0.000 | 3.000 | 1.279 | 0.902 |
school_present | 0.000 | 1.000 | 0.784 | 0.412 |
electricity_present | 0.000 | 1.000 | 0.527 | 0.499 |
piped_water_present | 0.000 | 1.000 | 0.487 | 0.500 |
sewage_present | 0.000 | 1.000 | 0.227 | 0.419 |
health_clinic_present | 0.000 | 1.000 | 0.471 | 0.499 |
district_ethnic_frac | 0.000 | 0.906 | 0.405 | 0.297 |
frac_ethnicity_in_district | 0.003 | 1.000 | 0.599 | 0.349 |
townvill_nonethnic_mean_exports | 0.000 | 3.656 | 0.386 | 0.708 |
district_nonethnic_mean_exports | 0.000 | 3.656 | 0.365 | 0.649 |
region_nonethnic_mean_exports | 0.000 | 3.656 | 0.426 | 0.671 |
country_nonethnic_mean_exports | 0.000 | 2.885 | 0.469 | 0.642 |
murdock_centr_dist_coast | 1.250 | 1,252.683 | 439.892 | 311.472 |
centroid_lat | -32.739 | 27.817 | -6.867 | 14.566 |
centroid_long | -16.409 | 49.246 | 21.581 | 17.072 |
explorer_contact | 0.000 | 1.000 | 0.439 | 0.496 |
railway_contact | 0.000 | 1.000 | 0.434 | 0.496 |
dist_Saharan_node | 25.420 | 5,221.348 | 2,573.802 | 1,635.097 |
dist_Saharan_line | 113.862 | 5,221.348 | 2,578.978 | 1,627.699 |
malaria_ecology | 0.000 | 34.640 | 11.506 | 9.745 |
v30 | 1.000 | 8.000 | 6.115 | 1.247 |
v33 | 1.000 | 4.000 | 2.918 | 0.916 |
fishing | 2.500 | 60.000 | 8.741 | 7.292 |
exports | 0.000 | 854.958 | 93.169 | 205.281 |
ln_exports | 0.000 | 6.752 | 1.950 | 2.314 |
total_missions_area | 0.000 | 0.003 | 0.0002 | 0.0003 |
ln_init_pop_density | -4.274 | 5.870 | 2.547 | 1.310 |
cities_1400_dum | 0 | 1 | 0.125 | 0.331 |
[1] 21822
Let’s visualize two outcome variables (trust in neighbours and in the local council), the treatment slave exports and the instrument distance to the see.
p1 <- plot_ly(x = d$trust_neighbors, type = "histogram",
name = "Trust neighbours") %>%
layout(xaxis = list(title = "Trust neighbours"),
yaxis = list(title = "Frequency"))
p2 <- plot_ly(x = d$trust_local_council, type = "histogram",
name = "Trust Local Council") %>%
layout(xaxis = list(title = "Trust neighbours"),
yaxis = list(title = "Frequency"))
p3 <- plot_ly(x = d$exports, type = "histogram",
name = "Slave exports") %>%
layout(xaxis = list(title = "Slave exports"),
yaxis = list(title = "Frequency"))
p4 <- plot_ly(x = d$distsea, type = "histogram",
name = "Distance to sea") %>%
layout(xaxis = list(title = "Distance to sea"),
yaxis = list(title = "Frequency"))
subplot(p1,p2,p3,p4, nrows=2, shareX = FALSE, shareY = FALSE,
titleX = T, titleY = T, margin = c(0.1,0.1,0.1,0.1))