13.11 Lab: R Code

13.11.1 Run the following functions

13.11.2 Load data & install packages

  # set.seed(48104) # ?set.seed 
  
# Load data
  data_rdd <- foreign::read.dta("https://docs.google.com/uc?id=1xWHmST5FYcfLxe9V7Hwqd2LIy_A3ninG&export=download")
  # data_rdd <- foreign::read.dta("www/rdd-fouirnaies_hall_financial_incumbency_advantage.dta")
  data_rdd <- data_rdd %>% rename(x_score_victorymargin = rv,
                                  y_donationshare = dv_money,
                                  cov_statelevel = statelevel,
                                  cov_total_race_money = total_race_money,
                                  cov_total_votes = total_votes,
                                  cov_dem_inc = dem_inc,
                                  cov_rep_inc = rep_inc,
                                  cov_total_group_money = total_group_money) %>% 
    dplyr::select(x_score_victorymargin,
           y_donationshare,
           cov_statelevel,
           cov_total_race_money,
           cov_total_votes,
           cov_dem_inc,
           cov_rep_inc,
           cov_total_group_money,
           state,
           dist,
           year)

13.11.3 Explore data and subsetting

# names(data_rdd) # Show variables
# View(data_rdd)

# Filter out state legislative district level
  table(data_rdd$cov_statelevel) # How many state-level elections are there?

0	1
6533	32670

  data_rdd <- filter(data_rdd, cov_statelevel==1)
  data_rdd <- data_rdd %>% arrange(state, dist, year, x_score_victorymargin)

13.11.4 Summary statistics + graphs

Below we inspect the variables. Q: What the min. and max. values of the variable x_score_victorymargin mean substantively? And the same for y_donationshare?

# Summarize score variable (also called running variable): Vote share
  summary(data_rdd$x_score_victorymargin)

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.
-50	-16.05517	0.134985	1.357636	20.07864	50

# Summarize outcome variable
  summary(data_rdd$y_donationshare)

Min.	1st Qu.	Median	Mean	3rd Qu.	Max.	NA’s
0	6.540446	50.60789	50.78094	96.80258	100	5467

  # Share of donations flowing to the incumbent’s party

# Plot oucome var vs. score variable
p = qplot(x_score_victorymargin, y_donationshare, data=data_rdd) + 
  xlab("Democratic margin of victory at t") + 
  ylab("Democratic share of contributions at t+1")
p

We can do the same with plotly..

data_rdd$colour[data_rdd$x_score_victorymargin<=0] <- "Treated"
data_rdd$colour[data_rdd$x_score_victorymargin>0] <- "Control"

plot_ly(data = data_rdd,
        type = "scatter", 
        mode = "markers",
        x = data_rdd$x_score_victorymargin, 
        y = data_rdd$y_donationshare, 
        color = data_rdd$colour,
        marker = list(size=3)) %>% 
    layout(xaxis = list(title = "Democratic margin of victory at t", dtick = 25),
                yaxis = list(title = "Democratic share of contributions at t+1"))

13.11.5 Continuity-based analysis

13.11.5.1 Density test

First, we can check the density of the running/score variable as a falsification test. If the density (the number of observations) below cutoff is considerably different from the one above it would indicate that individuals/politicians/parties in districts have the possibility to manipulate their scores.

Q: What would you say? Does it look as if the density is discontinuous at the cutoff?

#  Density test
summary(rddensity(X = data_rdd$x_score_victorymargin, vce="jackknife"))

## 
## RD Manipulation Test using local polynomial density estimation.
## 
## Number of obs =       32670
## Model =               unrestricted
## Kernel =              triangular
## BW method =           comb
## VCE method =          jackknife
## 
## Cutoff c = 0          Left of c           Right of c          
## Number of obs         16281               16389               
## Eff. Number of obs    2770                3176                
## Order est. (p)        2                   2                   
## Order bias (q)        3                   3                   
## BW est. (h)           5.458               6.935               
## 
## Method                T                   P > |T|             
## Robust                -0.9165             0.3594

# histogram of density test
p = ggplot(data_rdd,aes(x=x_score_victorymargin, fill = factor(data_rdd$x_score_victorymargin>0)))+geom_histogram(binwidth=0.5)+xlim(-25,25)+geom_vline(xintercept = 0)+xlab("Democratic vote share at t")+scale_colour_manual(values = c("red","blue"))+theme_bw()+theme(legend.position='none')
p

13.11.5.2 Covariates as outcomes

Next, Skovron and Titiunik (2015, 30) investigate the effect of the treatment on five predetermined covariates (see Table 1 below) but find no treatment effect for any of them. N“ear the cutoff, treated units are similar to control units. The idea is simply that, if units lack the ability to precisely manipulate the value of the score they receive, units just above and just below the cutoff should be similar in all those characteristics that could not have been affected by the treatment” (Skovron and Titiunik 2015, 28).

Skovron & Titiunik 2015, Tab. 1, p.31

We can reproduce and graph these results (for Total money and Total money from groups) using the code below.

# Total money in race
  summary(rdrobust(data_rdd$cov_total_race_money, data_rdd$x_score_victorymargin, all=TRUE))

## [1] "Mass points detected in the running variable."
## Call: rdrobust
## 
## Number of Obs.                32670
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               16281       16389
## Eff. Number of Obs.           5798        4920
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                 11.114      11.114
## BW bias (b)                 17.304      17.304
## rho (h/b)                    0.642       0.642
## Unique Obs.                  11187       10946
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional -7945.069 23075.255    -0.344     0.731[-53171.737 , 37281.599] 
## Bias-Corrected-14849.630 23075.255    -0.644     0.520[-60076.298 , 30377.038] 
##         Robust-14849.630 27623.054    -0.538     0.591[-68989.820 , 39290.561] 
## =============================================================================

  rdplot(data_rdd$cov_total_race_money,data_rdd$x_score_victorymargin,
         x.lim = c(-10,10),
         y.lim = c(0,400000),
         x.lab="Democratic margin of victory at t",
         y.lab="Total money in race at t+1", title = "")

# Total votes in race
  summary(rdrobust(data_rdd$cov_total_votes,data_rdd$x_score_victorymargin,all=TRUE))

## [1] "Mass points detected in the running variable."
## Call: rdrobust
## 
## Number of Obs.                32670
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               16281       16389
## Eff. Number of Obs.           3077        2781
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                  6.024       6.024
## BW bias (b)                 10.430      10.430
## rho (h/b)                    0.578       0.578
## Unique Obs.                  11187       10946
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional -1629.150  1655.583    -0.984     0.325 [-4874.033 , 1615.732]  
## Bias-Corrected -2160.740  1655.583    -1.305     0.192 [-5405.623 , 1084.143]  
##         Robust -2160.740  1921.312    -1.125     0.261 [-5926.442 , 1604.962]  
## =============================================================================

  rdplot(data_rdd$cov_total_votes,data_rdd$x_score_victorymargin,
         x.lim = c(-10,10),
         y.lim = c(0,50000),
         x.lab="Democratic margin of victory at t",
         y.lab="Total votes in race at t+1", title = "")

13.11.5.3 Estimation of RD effect

Below we finally estimate the RD effect. Table 2 displays the estimates.

Skovron & Titiunik 2015, Tab. 1, p.31

We can reproduce this as follows relying on the rdrobust function (See ?rdrobust). Before we can fit the local polynomial functions we have to choose a bandwidth, i.e. the width or region of the neighborhood around the cutoff for which we accept the data. This choice involves a bias-variance trade-off. A large bandwidth may result in in a large bias if the unknown function differs considerably from the polynomial approximation. At the same time it will decrease the variance of the estimated coefficients because we have more observations (and the other way round). The rdrobust function chooses optimal bandwidth and produces robust bias-corrected confidence intervals (read more on this in Skovron and Titiunik (2015) and related publications, ?rdrobust).

  #?rdrobust
  set.seed(48104)
# RD Effect for main outcome variable 
  # ==> Local linear polynomial estimation with optimal bandwidth
  fit <- rdrobust(data_rdd$y_donationshare, data_rdd$x_score_victorymargin, c = 0, all=TRUE)

## [1] "Mass points detected in the running variable."

  # optimal h is ~ 9.275 (fit$bws)
  summary(fit)

## Call: rdrobust
## 
## Number of Obs.                27203
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               13292       13911
## Eff. Number of Obs.           3994        3435
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                  9.270       9.270
## BW bias (b)                 19.028      19.028
## rho (h/b)                    0.487       0.487
## Unique Obs.                   9006        9199
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    20.532     1.285    15.984     0.000    [18.014 , 23.049]    
## Bias-Corrected    19.971     1.285    15.547     0.000    [17.453 , 22.489]    
##         Robust    19.971     1.423    14.038     0.000    [17.183 , 22.760]    
## =============================================================================

You can also chose particular bandwidths specifying the h argument. Fouirnaies/Hall use bandwidths of 1, 2, and 3 percentage points… (below just 1 but try playing around with the values)

summary(rdrobust(data_rdd$y_donationshare,data_rdd$x_score_victorymargin,h=1,all=TRUE))

## [1] "Mass points detected in the running variable."
## Call: rdrobust
## 
## Number of Obs.                27203
## BW type                      Manual
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               13292       13911
## Eff. Number of Obs.            415         400
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                  1.000       1.000
## BW bias (b)                  1.000       1.000
## rho (h/b)                    1.000       1.000
## Unique Obs.                   9006        9199
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    23.642     3.830     6.173     0.000    [16.136 , 31.148]    
## Bias-Corrected    23.604     3.830     6.164     0.000    [16.098 , 31.110]    
##         Robust    23.604     5.520     4.276     0.000    [12.785 , 34.424]    
## =============================================================================

And we can plot the results. CCT bandwidth in main estimation was about 9 (see above) so we restrict the x axis to (-10,10).

rdplot(data_rdd$y_donationshare,data_rdd$x_score_victorymargin,x.lim=c(-10,10),
       y.lab="Democratic share of contributions at t+1", title = "RDD Estimate")

Naturally, focusing on that subset on the x-Axis does not give us the whole picture. Hence, we a a graph that visualizes the whole data range. Q: What do you see here?

# RD plot using whole data range
  rdplot(data_rdd$y_donationshare,data_rdd$x_score_victorymargin,
         x.lab="Democratic margin of victory at t",
         y.lab="Democratic share of contributions at t+1", 
         title = "RDD Estimate")

Finally, we have the possibility to test (and should) whether we can find treatment effects for placebo cutoffs. There shouldn’t be any.

# cutoff c = 1
summary(rdrobust(data_rdd$y_donationshare,data_rdd$x_score_victorymargin, c = 1 ,all=TRUE))

## [1] "Mass points detected in the running variable."
## Call: rdrobust
## 
## Number of Obs.                27203
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               13692       13511
## Eff. Number of Obs.           1271        1204
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                  3.125       3.125
## BW bias (b)                  8.042       8.042
## rho (h/b)                    0.389       0.389
## Unique Obs.                   9406        8799
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -1.994     2.257    -0.883     0.377    [-6.418 , 2.430]     
## Bias-Corrected    -3.071     2.257    -1.361     0.174    [-7.495 , 1.352]     
##         Robust    -3.071     2.394    -1.283     0.200    [-7.764 , 1.621]     
## =============================================================================

# cutoff c = -3
summary(rdrobust(data_rdd$y_donationshare,data_rdd$x_score_victorymargin, c = -3 ,all=TRUE))

## [1] "Mass points detected in the running variable."
## Call: rdrobust
## 
## Number of Obs.                27203
## BW type                       mserd
## Kernel                   Triangular
## VCE method                       NN
## 
## Number of Obs.               12055       15148
## Eff. Number of Obs.           2005        1906
## Order est. (p)                   1           1
## Order bias  (q)                  2           2
## BW est. (h)                  4.668       4.668
## BW bias (b)                  9.363       9.363
## rho (h/b)                    0.499       0.499
## Unique Obs.                   7769       10436
## 
## =============================================================================
##         Method     Coef. Std. Err.         z     P>|z|      [ 95% C.I. ]       
## =============================================================================
##   Conventional    -2.139     1.801    -1.188     0.235    [-5.670 , 1.391]     
## Bias-Corrected    -2.930     1.801    -1.626     0.104    [-6.460 , 0.601]     
##         Robust    -2.930     1.997    -1.467     0.142    [-6.844 , 0.984]     
## =============================================================================

References

Skovron, Christopher, and Rocıo Titiunik. 2015. “A Practical Guide to Regression Discontinuity Designs in Political Science.”