Web page metrics

Welcome to #TidyTuesday 2022 day 46

Networks
Published

November 15, 2022

library(tidyverse)
# install.packages("ggsci")
library(ggsci)
library(ggnewscale)
# ggnewscale::new_scale_fill()
library(ggtext)
library(lubridate)

# set the fonts
library(showtext)
library(sysfonts)
library(extrafont)
showtext::showtext_auto()
showtext::showtext_opts(dpi=320)
font_add_google(name="Roboto Condensed",
                family="Roboto Condensed")  
font_add_google(name="Zen Dots",
                family="Zen Dots") 
theme_set(theme_void(base_family = "Roboto Condensed", base_size = 9))
theme_update(
  axis.text.x = element_text(color = "grey60", margin = margin(t = 4)),
  axis.ticks.x = element_line(color = "grey60"),
  axis.ticks.length.x = unit(.4, "lines"),
  panel.spacing = unit(1.5, "cm"),
  legend.position = "none",
  panel.grid = element_blank(),
  # t,r,b,l
  plot.margin = margin(5, 70, 5, 20),
  plot.background = element_rect(fill = "grey98", color = "grey98"),
  plot.title = element_text(family = "Zen Dots",size=14),
  plot.caption = element_text(family = "Roboto Condensed", color = "grey60", 
                              size = 8, margin = margin(t = 30, r = 50)),
  strip.text = element_text(family = "Zen Dots")
)
# image_alt <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/image_alt.csv')
# color_contrast <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/color_contrast.csv')
# ally_scores <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/ally_scores.csv')
bytes_total <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/bytes_total.csv')

speed_index <- readr::read_csv('https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2022/2022-11-15/speed_index.csv')
bytes_total%>%head
speed_index%>%head
center <- function(x) {
  (x-min(x))/(max(x)-min(x))
}
df <- speed_index %>%
  inner_join(bytes_total,by=c("client","date","timestamp")) %>%
  select(-timestamp,-measure.x,-measure.y) %>%
  mutate(date= ymd(date),
         ym=zoo::as.yearmon(date),.after=date) %>%
  group_by(client) %>%
  arrange(client,date) %>%
  ungroup() %>%
  group_by(ym) %>%
  mutate(speed_ym=sum(p50.x),
         bytes_ym=sum(p50.y),.after=ym) %>%
  ungroup() %>%
  pivot_longer(cols = c("p10.x","p25.x","p50.x","p75.x","p90.x"),names_to="sp_quantiles",values_to = "speed") %>%
  pivot_longer(cols = c("p10.y","p25.y","p50.y","p75.y","p90.y"),names_to="by_quantiles",values_to = "bytes") %>%
  mutate(sp_quantiles=gsub(".x","",sp_quantiles),
         by_quantiles=gsub(".y","",by_quantiles)) %>%
  distinct() %>%
  mutate(speed_ctr=center(speed),
         bytes_ctr=center(bytes),
         alpha= center(speed_ym),
         bytes_seconds=bytes/speed)


df
labels <- df %>% 
  filter(date==max(date)) %>%
  select(client,date,by_quantiles,bytes, bytes_ctr,speed,speed_ctr) %>%
  group_by(by_quantiles,date,client) %>%
  summarize(avg_bytes=mean(bytes),
         avg_speed=mean(speed),
         avg_bytes_ctr=mean(bytes_ctr),
         avg_speed_ctr=mean(speed_ctr),.groups = "drop") %>%
  ungroup() %>%
  distinct() %>%
  mutate(bts_sec=round(avg_bytes/avg_speed,2),
         by_quantiles=paste0(gsub("p","",by_quantiles),"%"))

labels
summary(df$alpha)
df %>%
  ggplot(aes(x = date)) +
  geom_area(aes(y = bytes_ctr,
                group=by_quantiles,
                alpha=speed_ctr,
                color=by_quantiles,
                fill=by_quantiles)) +
  ggsci::scale_fill_futurama() +
  ggsci::scale_color_futurama() +
  geom_richtext(data = labels,
                aes(x = date, y = avg_bytes_ctr, group = by_quantiles,
                    label = glue::glue("<b style='font-size:7.5pt;'>{by_quantiles}</b><b style='font-size:6pt;'> {round(bts_sec, 2)} Bps<br>"),
                    color = by_quantiles,# nudge_x = 1,
                    hjust = 0.05,
                    vjust = 0.7),
                inherit.aes = FALSE,
                position = "stack",
                family = "Roboto Condensed",
                size = 2.3,
                label.colour = NA,
                fill = NA) +
  facet_wrap(vars(client),
             labeller = labeller(client=c("desktop"="Desktop","mobile"="Mobile"))) +
  scale_alpha(range = c(.5, 0.8)) +
  scale_x_date(date_labels = "%Y",
               date_breaks = "1 year",
               expand = c(0,0)) +
  scale_y_continuous(expand = c(0,0)) +
  coord_cartesian(clip = "off") +
  guides(alpha="none",color="none",fill="none") +
  labs(title="Speed Index\n\n",
       #subtitle="Desktop vs Mobile Bps",
       caption="Data provides informations about median values of bytes loaded per second by type of device used\n#TidyTuesday 2022 week 46 | Data: Web Page metrics by httparchive.org | Viz: Federica Gazzelloni") 
ggsave("w46_web_page_metrics.png",
       dpi=300,
       width = 9,height = 6)