library(tidyverse)
Gazzelloni F. (2023), Data Visualization: Lyme Disease Map
Load libraries
Data for this visualization is from CDC: https://www.cdc.gov/lyme/stats/survfaq.html
This is old dataset, it is updated at 2011. I had a look at it, then used the most updated one.
library(jsonlite)
<- "https://data.cdc.gov/api/views/smai-7mz9/rows.json?accessType=DOWNLOAD"
url <- fromJSON(url)
data <- data$meta$view$columns
keys
<- as.data.frame(data$data)
df names(df) <- keys$name%>%t()
%>%head
df
<- df %>%
my_df select(StateCode,CountyCode,StateName,CountyName,
ConfirmedCount_1992_1996,
ConfirmedCount_1997_2001,
ConfirmedCount_2002_2006,%>%
ConfirmedCount_2007_2011)pivot_longer(5:8)%>%
mutate(name=gsub("ConfirmedCount_","",name),
name=gsub("_","-",name),
value=as.numeric(value),
value=ifelse(is.na(value),0,value),
CountyName=tolower(CountyName),
CountyName=gsub(" county","",CountyName),
CountyName=trimws(CountyName))%>%
rename(Years=name,subregion=CountyName)
%>%head my_df
This data contains County-level Lyme disease data from 2001-2020
<- "https://www.cdc.gov/lyme/resources/datasurveillance/LD-Case-Counts-by-County-01-20.csv"
url2
<- read.csv(url2) df2
Tidy data, the Ctyname vector is the one I want to use for looking at the cases by cities. I had to leave some information behind.
<- df2 %>%
my_df2 select(1:2,6:25)%>%
pivot_longer(3:22)%>%
mutate(name=gsub("Cases","",name),
Stname=tolower(Stname),
Ctyname= str_to_lower(Ctyname),
Ctyname=gsub(" county","",Ctyname),
Ctyname=gsub(" parish","",Ctyname),
Ctyname=gsub(" city","",Ctyname),
Ctyname=gsub("st.","st",Ctyname),
Ctyname=gsub(" bay borough","",Ctyname),
Ctyname=case_when(Ctyname=="armstong"~"armstrong",
TRUE~Ctyname),
name=as.numeric(name),
name=cut_interval(name,4))%>%
rename(Years=name,subregion2=Ctyname,region=Stname)%>%
distinct()
Here is a double check of the City names differences in map_data("county")
and the Lyme disease dataset
(my_df2
). I am going to leave some city names behind.
<- map_data("county")
us_county_map <- us_county_map%>%count(subregion)%>%select(-n)%>%unlist()
subregion <- my_df2%>%count(subregion2)%>%select(-n)%>%unlist()
subregion2 intersect(subregion, subregion2)%>%length()
setdiff(subregion2, subregion)%>%length()
setdiff(subregion, subregion2)%>%length()
%>%
my_df2filter(str_detect(subregion2,"yellowstone"))
These are the sets that will be used in the Map.
- Base map layers
<- map_data("state")
us_state_map <- map_data("county") us_county_map
- Cases layer
<- map_data("county")%>%
my_us_county_map filter(subregion%in%subregion2)
<- my_df2%>%
my_df_coords rename(subregion=subregion2)%>%
inner_join(my_us_county_map,by=c("region","subregion"))%>%
distinct()
<- my_df_coords%>%
df_jitter filter(value>0)%>%
group_by(subregion,region,Years)%>%
mutate(long=mean(range(long)),lat=mean(range(lat)))%>%
#value=sum(value))%>%
ungroup()%>%
distinct()
Change the facet lables:
<- my_df2 %>%
lbl group_by(Years)%>%
reframe(value=sum(value))%>%
mutate(pct=lag(value),
pct=round((value-pct)/pct*100,2),
pct=ifelse(is.na(pct),"-",pct),
Year=gsub("\\[|\\]", "", Years),
Year=gsub("\\(|\\]", "", Year),
Year=gsub(",", "-", Year),
lbl=paste(Year,"pct change",pct,"%"))%>%
select(lbl)%>%
unlist()
Set the fonts:
library(showtext)
library(sysfonts)
::font_add_google("David Libre","David Libre")
sysfonts::font_add_google("Syne Mono","Syne Mono") sysfonts
Make the Map:
showtext_auto()
<-ggplot()+
p geom_polygon(data=us_county_map,
aes(x=long,y=lat,group = group),
fill="white",color = "#192d40",linewidth=0.02) +
geom_polygon(data=us_state_map,aes(x=long,y=lat,group = group),
fill=NA,color = "grey50",linewidth=0.2)+
geom_jitter(data=df_jitter,
aes(x=long,y=lat,size=value),
color = "#192d40")+
coord_quickmap()+
facet_wrap(~Years,scales = "free",
labeller = as_labeller(setNames(lbl, sort(unique(df_jitter$Years))))) +
guides(size="none")+
scale_size(range=c(0.1,1))+
labs(caption="#30DayChartChallenge Day 29 Monochrome\nDataSource: CDC Lyme Disease | Map: Federica Gazzelloni") +
::theme_map()+
ggthemestheme(text=element_text(color="white",size=40),
legend.background = element_blank(),
legend.position = "right",
strip.background = element_rect(color=NA,fill=NA),
strip.text = element_text(family="David Libre",color="white"),
plot.background = element_rect(color="#192d40",fill="#192d40"),
plot.caption = element_text(family="Syne Mono",lineheight = 0.5,hjust=0.3),
panel.background = element_rect(color="#192d40",fill="#192d40"),
plot.margin = margin(0,0,0,0,unit = "pt"))
Have a look at the Map:
p
Set some information about the Lyme disease, and use ggtitle()
for making a title plot to use in the main plot layout.
library(ggtext)
<- "Lyme disease is caused by bacteria called Borrelia burgdorferi, which is spread to people through the bite of several types of blacklegged ticks. In the United States, most infections occur in Eastern states, Northern midwestern states, and West Coast. Recent estimates using other methods suggest that approximately 476,000 people may get Lyme disease each year in the United States."
text
=tibble(text)
text
<- ggplot() +
title ggtitle(label =text) +
theme(
plot.title.position = "plot",
plot.title = element_textbox_simple(
size = 50,lineheight = 0.3,
color="white",
family="Syne Mono",
padding = margin(5.5, 5.5, 5.5, 5.5),
margin = margin(0, 0, 5.5, 0),
fill = "#192d40"),
plot.background = element_rect(color="#192d40",fill="#192d40"),
panel.background = element_rect(color="#192d40",fill="#192d40"),
plot.margin = margin(0,0,0,0,unit = "pt")
)
Here is the part where the layout design is done. Use {patchwork}
for setting the area to combine the title with the map with the area()
function. Then check the result with the plot()
function, as shown below:
library(patchwork)
# area(t, l, b = t, r = l)
<- c(
design area(1, 1,20,100),
area(5, 1, 100,100)
)
plot(design)
Finally, combine all together in one plot:
+ p + plot_layout(design = design) &
title theme(plot.background = element_rect(fill="#192d40",color="#192d40"))
And save it!
ggsave("p.png",width = 9,height = 7,dpi=320,bg="#192d40")