rm(list=ls())setwd(dirname(rstudioapi::getActiveDocumentContext()$path))# load libraries and optionsoptions(scipen =999) # this is to avoid number truncationlibrary(countrycode)library(tidyverse)# set the colorsmycolors <-colorRampPalette(RColorBrewer::brewer.pal(n=9, name="Set1"))(5)# load datadeath_timeseries<-read.csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-04-12/death_timeseries.csv")# data wranglingdeath_timeseries1<-death_timeseries%>%rename(Deaths=`Deaths...Cause..All.causes...Risk..Household.air.pollution.from.solid.fuels...Sex..Both...Age..All.Ages..Number.`,Deaths1=`Deaths...Cause..All.causes...Risk..Household.air.pollution.from.solid.fuels...Sex..Both...Age..All.Ages..Number..1`)death_timeseries2 <- death_timeseries1%>%filter(Year==1990,Year.1==2019) %>%filter(!str_detect(Entity,"World|Region|Central|Countries|European"),!str_detect(Entity,"Western|Southern|Eastern|Northen"),!str_detect(Entity,"North|South|Southeast"),!str_detect(Entity,"income|High|Low"),!str_detect(Entity,"Commonwealth"),!str_detect(Entity,"G20|SDI"),!str_detect(Entity,"Europe|Asia|Africa|America|Oceania") ) # add missing continents' names with countrycode() function -----------death_timeseries2$continent <-countrycode(sourcevar = death_timeseries2[, "Entity"],origin ="country.name",destination ="continent")death_timeseries3 <- death_timeseries2%>%# check for some values that were not matched unambiguously ---------# filter(Entity%in%c("Australasia", "Caribbean", # "England", "Micronesia (country)", # "Scotland", "Timor", "Wales")) %>%mutate(Entity=as.character(Entity), # mutate Entity back as a character to use case_when() functioncontinent=case_when(Entity%in%c("England","Scotland","Wales")~"Europe", Entity=="Australasia"~"Oceania", Entity=="Caribbean"~"Americas", Entity=="Micronesia (country)"~"Asia", Entity=="Timor"~"Asia",TRUE~ continent),Entity=as.factor(Entity)) %>%filter(Deaths1>=1,Deaths>=1) # make the plotdeath_timeseries3 %>%ggplot(aes(x=Deaths1,y=Deaths))+geom_jitter(size=1.7,aes(fill=continent),shape=21,alpha=0.7,color="grey45")+geom_smooth(method ="lm",se=F,color="grey60",linetype="dashed",size=0.5)+geom_text(aes(label=Entity,color=continent),hjust ="left",show.legend = F,vjust="top",check_overlap = T,size=3)+scale_x_log10(breaks=c(1,10,100,1000,10000,100000),expand=expansion(add=c(0,0.05)),label=scales::comma_format(accuracy =NULL))+scale_y_log10(breaks=c(1,10,100,1000,10000,100000),expand=expansion(add=c(0,0.8)),label=scales::comma_format(accuracy =NULL))+scale_fill_manual(guide=guide_legend(nrow =1),values=mycolors)+scale_color_manual(values=mycolors)+labs(title="Deaths due to Household air pollution\nfrom solid fuels",subtitle="1990 vs 2019 All Ages and Gender",caption="\n#30DayChartChallenge 2022 day12 - theme day: The Economist\nDataSource: Our World in Data | #TidyTuesday week15 - Indoor Air Pollution\nDataViz: Federica Gazzelloni",x="Deaths from indoor air pollution in 2019",y="Deaths from indoor air pollution in 1990",fill="") + ggthemes::theme_economist() +theme(legend.text =element_text(size=10),plot.title =element_text(size=23),axis.title.x =element_text(vjust =-0.8),axis.title.y =element_text(vjust =0.8),plot.caption =element_text(vjust =-1,size=9),legend.position =c(0.4,0.85))#save the plotggsave("day12_the_economist.png",dpi=320,width =8,height =6)