# load the libraries
library(tidyverse)
library(forcats)
library(ggridges)
library(showtext)
library(cowplot)
# set the font
showtext_auto(enable = T)
::font_families_google()
sysfonts::font_add_google("Abril Fatface", "Abril Fatface")
sysfonts
# load data
<- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_titles.tsv')
nyt_titles <- readr::read_tsv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-05-10/nyt_full.tsv')
nyt_full
# look at the data
%>%head
nyt_titles%>%head
nyt_full
# a bit of wrangling
<- nyt_titles %>%
df full_join(nyt_full,by=c("year","title","author"))
<- df %>%
df2 mutate(year_fct = fct_rev(as.factor(year))) %>% #
filter(!str_detect(author,"Edited|edited|created|compiled|Completed|NO AUTHOR| and |Illustrated| with |translated"))%>%
mutate(author=gsub("! by |? by |?by |\\?|\"|, Jr| Jr|\\.$| writing as.*","",author)) %>%
group_by(author) %>%
summarise(year_fct,
avg_rank=mean(rank),
scale=scale(rank,center = F),
scale_pct=scale/sum(scale),
avg_rank_pct=avg_rank/sum(avg_rank),
id=n()) %>%
ungroup()
# set the dataset for the geom_text labels
<- df2 %>%
side_labels group_by(year_fct,author)%>%
summarize(top=max(scale),scale_pct=mean(scale_pct))%>%
distinct()%>%
filter(top==max(top))%>%
mutate(lab=paste(author,"in",year_fct),
lab2=paste(author,"ranked",round(top),"on avg in",year_fct))%>%
ungroup() %>%
select(year_fct,lab,scale_pct)%>%
arrange(desc(year_fct))
# make the plot
%>%
df2 # reorder different authors within the same year along with the percentages values
# this will reorder the density courves for each year
mutate(author=fct_reorder(author,scale_pct)) %>%
ggplot(aes(x=scale_pct, y=year_fct)) +
geom_density_ridges(aes(fill=author),
show.legend = F,
size=0.3,
scale=1,
alpha = .8,
color = "grey25",
from = 0, to = 1) +
geom_label(data=side_labels,
aes(x=0.5,y=year_fct,label=lab),
label.padding = unit(0.05, "lines"),
label.r = unit(0.5, "lines"),
label.size = 0,
family="Abril Fatface",size=10,
inherit.aes = F,hjust=0,vjust=0)+
scale_y_discrete(expand = c(0, 0)) +
scale_x_continuous(expand = expansion(mult=c(0,-0.35),
add=c(0, -0.02))) +
scale_fill_grey(
start = 0.2,
end = 0.9,
na.value = "red",
aesthetics = "fill") +
labs(title="The New York Times",
subtitle="Solo author ranks from 1931 to 2020",
caption="DataSource: Post45 Data Collective NYT HARDCOVER FICTION BESTSELLERS\nDataViz: Federica Gazzelloni (@fgazzelloni)",
x="Rank density",y="Year") +
coord_cartesian(clip = "off") +
theme_ridges(grid = FALSE)+
theme(text = element_text(family="Abril Fatface",size=45),
plot.title = element_text(size=90),
plot.caption = element_text(hjust=1),
axis.text.y = element_text(size=30,hjust=0),
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
plot.background = element_rect(color="grey90",fill="grey90"),
panel.background = element_rect(color="grey90",fill="grey90"))
# save first partial version
ggsave("partial.png",
dpi=320,
height = 14,
width = 10)
# frame the graphics and add a notation with {cowplot}
# it helps reducing time when setting the text position.
ggdraw()+
draw_image("partial.png") +
draw_label("How to read it:
On average, authors rank 7.6 based on weekly frequencies on NYT,
which corresponds to 3.4% of the total scaled avg-ranks.
Each year shown in the graph represents the density curve of the
ranks for the NYT's solo authors in that year.
The density curves are ordered by total percentage of scaled ranks.
On the right is the author with the avg-weekly highest rank for the year.",
lineheight = 0.25,hjust=0,
x=0.04,y=0.05,fontfamily="Abril Fatface",size=25)
# save the final version
ggsave("w19_nyt.png",
dpi=320,
height = 12,
width = 9)
Housekeeping: clean the space before to start and set the working dorectory to your .R file source
rm(list=ls()) setwd(dirname(rstudioapi::getActiveDocumentContext()$path))