Chapter 3 Spatial Analysis
3.1 Data
library(tidyverse); library(sf); library(cowplot); library(magrittr)
list <- read.csv("./_data/us_abbrev.csv", stringsAsFactors = F) %>%
mutate(state = trimws(state),
STATE = trimws(STATE))
nsduh <- read.csv("./_data/NSDUH_final2.csv", stringsAsFactors = F) %>%
dplyr::select(outcome, year_pair, state, estimate) %>%
mutate(outcome = ifelse(outcome=="Needing But Not Receiving Treatment for Substance Use at a Specialty Facility in the Past Year (2015 onward)", "Needing But Not Receiving Treatment for Substance Use at a Specialty Facility in the Past Year (through 2014)", outcome)) %>%
spread(outcome, estimate) %>%
dplyr::rename(heroin = `Heroin Use in the Past Year`,
tx_su = `Needing But Not Receiving Treatment for Substance Use at a Specialty Facility in the Past Year (through 2014)`,
tx_mental = `Received Mental Health Services in the Past Year`,
mental = `Serious Mental Illness in the Past Year`) %>%
dplyr::select(year_pair, state, heroin, tx_su, tx_mental, mental) %>%
mutate(year = as.numeric(str_sub(year_pair,1,4))) %>%
inner_join(list, by = "state")
data <- read.csv("./_data/Cleaned_Final_Dataset_2018_7-2-20.csv", stringsAsFactors = F) %>%
dplyr::rename(County.Code = county_code) %>%
separate(county_name, c("County", "STATE"), sep = ", ") %>%
mutate_all(.funs = ~ifelse(.==-999,NA,.)) %>%
dplyr::select(-c(X, state, #County, STATE,
heroin_deaths, cocaine_deaths, meth_deaths,
#heroin_crude_death_rate, cocaine_crude_death_rate, meth_crude_death_rate,
next_year_synthetic_opioid_death_rate, two_year_out_synthetic_opioid_death_rate)) %>%
inner_join(nsduh, by = c("STATE", "year"))
usa.sf <- st_read("./_data/GIS/tl_2017_us_county/tl_2017_us_county.shp", stringsAsFactors = F) %>%
#filter(STATEFP=="01") %>%
filter(STATEFP!="02", STATEFP!="15", STATEFP!="60", STATEFP!="66", STATEFP!="69", STATEFP!="72", STATEFP!="78") %>%
mutate(County.Code = as.numeric(GEOID),
STATEFP = as.numeric(STATEFP)) %>%
left_join(read_excel("./_data/fp_codes.xlsx") %>%
dplyr::rename(STATEFP = `Numeric code`), by = "STATEFP")
dat.full <- usa.sf %>%
dplyr::select(County.Code, loc) %>%
inner_join(data, by="County.Code")
3.2 Spatial Analysis
library(spdep)
library(tmap)
usa.sp <- as(usa.sf, "Spatial")
map.sf <- usa.sf %>%
dplyr::select(County.Code, loc) %>%
inner_join(data %>% distinct(County.Code), by="County.Code")
nb <- poly2nb(map.sf)
self <- nb2listw(include.self(nb))
breaks <- c(-Inf, -2.58, -1.96, -1.65, 1.65, 1.96, 2.58, Inf)
labels <- c("Cold spot: 99% confidence", "Cold spot: 95% confidence", "Cold spot: 90% confidence", "County Not significant","Hot spot: 90% confidence", "Hot spot: 95% confidence", "Hot spot: 99% confidence")
usa.getis <- dat.full %>%
st_set_geometry(NULL) %>%
nest(-year) %>%
mutate(LISA = map(.x = data, .f = ~localG(.x$synthetic_opioid_crude_death_rate, self)),
clust_LISA = map(.x = LISA, .f = ~cut(.x, include.lowest = TRUE, breaks = breaks, labels = labels))) %>%
unnest() %>%
dplyr::select(County.Code, year, LISA, clust_LISA)
dat.full.getis <- usa.sf %>%
dplyr::select(County.Code, loc) %>%
inner_join(usa.getis, by="County.Code")
(fig2a <- dat.full.getis %>%
ggplot() +
geom_sf(aes(fill=clust_LISA), col = NA) +
geom_sf(data = dat.full.getis %>% summarise(n = n()), fill = NA) +
scale_fill_brewer(name="Cluster", palette = "RdBu", direction=-1) +
facet_wrap(.~year, ncol = 3) +
theme_bw() +
theme(legend.position = c(1, 0),
legend.justification = c(1, 0))
)
ggsave("./_out/fig2a.png", fig2a, height = 8, width = 13)
3.3 Stability
a <- dat.full.getis %>%
st_set_geometry(NULL) %>%
select(-LISA) %>%
mutate(clust = clust_LISA,
clust_LISA = as.numeric(clust_LISA))
# Correlation
library(GGally)
a %>%
spread(year, clust_LISA) %>%
select(-County.Code, -loc) %>%
ggcorr(label = T)
# Kendals W
library(synchrony)
library(purrr)
b <- a %>%
#group_by(County.Code, loc) %>%
nest() %>%
expand_grid(year1 = 2010:2017) %>%
expand_grid(year2 = 2010:2017) %>%
filter(year1 >= year2) %>%
filter(year1 != year2) %>%
mutate(subset = pmap(.l = list(x=data, y=year1, z=year2), .f = function(x, y, z) filter(x, year==y | year==z)),
reshape = map(.x = subset, .f = ~spread(.x, year, clust_LISA) %>% select(-County.Code, -loc)),
w = map(.x = reshape, .f = ~kendall.w(.x, nrands=99, type = 2)), #change to rands=999 for paper
w.d = map(.x = w, .f = ~as.data.frame(as.list(.x)[-6])))
c <- b %>%
select(year1, year2, w.d) %>%
unnest()
# Coldspot
c.b <- a %>%
# ===============
group_by(County.Code, loc) %>%
mutate(i = min(clust_LISA)) %>%
filter(i<4) %>%
ungroup() %>%
# ===============
nest() %>%
expand_grid(year1 = 2010:2017) %>%
expand_grid(year2 = 2010:2017) %>%
filter(year1 >= year2) %>%
filter(year1 != year2) %>%
mutate(subset = pmap(.l = list(x=data, y=year1, z=year2), .f = function(x, y, z) filter(x, year==y | year==z)),
reshape = map(.x = subset, .f = ~spread(.x, year, clust_LISA) %>% select(-County.Code, -loc)),
w = map(.x = reshape, .f = ~kendall.w(.x, nrands=99, type = 2)), #change to rands=999 for paper
w.d = map(.x = w, .f = ~as.data.frame(as.list(.x)[-6])))
c.c <- c.b %>%
select(year1, year2, w.d) %>%
unnest()
### Hotspot
h.b <- a %>%
# ===============
group_by(County.Code, loc) %>%
mutate(i = max(clust_LISA)) %>%
filter(i>4) %>%
ungroup() %>%
# ===============
nest() %>%
expand_grid(year1 = 2010:2017) %>%
expand_grid(year2 = 2010:2017) %>%
filter(year1 >= year2) %>%
filter(year1 != year2) %>%
mutate(subset = pmap(.l = list(x=data, y=year1, z=year2), .f = function(x, y, z) filter(x, year==y | year==z)),
reshape = map(.x = subset, .f = ~spread(.x, year, clust_LISA) %>% select(-County.Code, -loc)),
w = map(.x = reshape, .f = ~kendall.w(.x, nrands=99, type = 2)), #change to rands=999 for paper
w.d = map(.x = w, .f = ~as.data.frame(as.list(.x)[-6])))
h.c <- h.b %>%
select(year1, year2, w.d) %>%
unnest()
# Plots
f1 <- c %>%
ggplot(aes(x = year1, y = year2, fill = w.corrected)) +
geom_tile() +
scale_y_continuous(trans="reverse", breaks = c(2010:2017), expand = c(0,0)) +
scale_x_continuous(breaks = c(2010:2017), expand = c(0,0)) +
scale_fill_viridis_c(direction = -1, lim = c(0,1)) +
coord_equal() +
geom_label(aes(label = round(w.corrected, 2), col = ifelse(w.corrected>.55,"a","b")), label.size = NA) +
scale_color_manual(values = c("white", "black")) +
guides(color="none") +
ggtitle("Overall") +
labs(x = "Clusters at target year", y = "Clusters at starting year") +
theme_minimal()
f2 <- c.c %>%
ggplot(aes(x = year1, y = year2, fill = w.corrected)) +
geom_tile() +
scale_y_continuous(trans="reverse", breaks = c(2010:2017), expand = c(0,0)) +
scale_x_continuous(breaks = c(2010:2017), expand = c(0,0)) +
scale_fill_viridis_c(direction = -1, lim = c(0,1)) +
coord_equal() +
geom_label(aes(label = round(w.corrected, 2), col = ifelse(w.corrected>.55,"a","b")), label.size = NA) +
scale_color_manual(values = c("black")) +
guides(color="none") +
ggtitle("Coldspots") +
labs(x = "Clusters at target year", y = "Clusters at starting year") +
theme_minimal()
f3 <- h.c %>%
ggplot(aes(x = year1, y = year2, fill = w.corrected)) +
geom_tile() +
scale_y_continuous(trans="reverse", breaks = c(2010:2017), expand = c(0,0)) +
scale_x_continuous(breaks = c(2010:2017), expand = c(0,0)) +
scale_fill_viridis_c(direction = -1, lim = c(0,1)) +
coord_equal() +
geom_label(aes(label = round(w.corrected, 2), col = ifelse(w.corrected>.55,"a","b")), label.size = NA) +
scale_color_manual(values = c("white", "black")) +
guides(color="none") +
ggtitle("Hotspots") +
labs(x = "Clusters at target year", y = "Clusters at starting year") +
theme_minimal()
library(cowplot)
fig3 <- plot_grid(f1, plot_grid(f2,f3, ncol = 1), ncol = 2)
ggsave("./_out/fig3.png", fig3, height = 8, width = 13)
3.4 Stable clusters
library(ggthemes)
sa <- a %>%
spread(year, clust_LISA) %>%
mutate(pattern = paste0(`2010`,`2011`,`2012`,`2013`,`2014`,`2015`,`2016`,`2017`))
epiDisplay::tab1(sa$pattern, sort.group = "decreasing")
sa %>%
group_by(pattern) %>%
count() %>%
filter(n>10) %>%
ggplot(aes(x = fct_reorder(pattern,-n), y = n)) +
geom_col()
a %>%
inner_join(sa %>% dplyr::select(County.Code, pattern), by = "County.Code") %>%
mutate(n4 = str_count(pattern, "4")) %>%
#filter(n4<2) %>%
#filter(pattern != "44444444") %>%
group_by(County.Code) %>%
mutate(ord = sum(clust_LISA)) %>%
ggplot(aes(x = year, y = fct_reorder(as.factor(County.Code), ord), color = clust)) +
geom_point(alpha = .5, size = .3) +
scale_color_brewer(palette = "RdBu", direction = -1) +
labs(y = "County",
#caption = "Consistent (>4y) non-significant clusters excluded",
color = "Cluster category") +
theme_base() +
guides(colour = guide_legend(override.aes = list(size=5))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ggsave("./_out/panel.png", height = 12)
# Coldspots
a %>%
# ===============
group_by(County.Code, loc) %>%
mutate(i = min(clust_LISA)) %>%
filter(i<4) %>%
# ==============
inner_join(sa %>% dplyr::select(County.Code, pattern), by = "County.Code") %>%
mutate(n4 = str_count(pattern, "4")) %>%
#filter(n4<2) %>%
#filter(pattern != "44444444") %>%
group_by(County.Code) %>%
mutate(ord = sum(clust_LISA)) %>%
ggplot(aes(x = year, y = fct_reorder(as.factor(County.Code), ord), color = clust)) +
geom_point(alpha = .5, size = .3) +
scale_color_brewer(palette = "RdBu", direction = -1) +
labs(y = "County",
#caption = "Consistent (>4y) non-significant clusters excluded",
color = "Cluster category") +
theme_base() +
guides(colour = guide_legend(override.aes = list(size=5))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ggsave("./_out/panel_c.png", height = 12)
# Hotspot
a %>%
# ===============
group_by(County.Code, loc) %>%
mutate(i = max(clust_LISA)) %>%
filter(i>4) %>%
# ==============
inner_join(sa %>% dplyr::select(County.Code, pattern), by = "County.Code") %>%
mutate(n4 = str_count(pattern, "4")) %>%
#filter(n4<2) %>%
#filter(pattern != "44444444") %>%
group_by(County.Code) %>%
mutate(ord = sum(clust_LISA)) %>%
ggplot(aes(x = year, y = fct_reorder(as.factor(County.Code), ord), color = clust)) +
geom_point(alpha = .5, size = .3) +
scale_color_brewer(palette = "RdBu", direction = -1) +
labs(y = "County",
#caption = "Consistent (>4y) non-significant clusters excluded",
color = "Cluster category") +
theme_base() +
guides(colour = guide_legend(override.aes = list(size=5))) +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank()) +
ggsave("./_out/panel_h.png", height = 12)
3.4.1 Overall
3.4.2 Coldspots
Counties with at least one year as coldspot
3.4.3 Hotspots
Counties with at least one year as hotspot