For this analysis, we will use the Open source Data by the Ayuntamiento de Valencia.

library(dplyr)
library(lubridate)
library(ggplot2)
library(RColorBrewer)

First, we create a function to read the csv tables containing the air pollution data in Valencia (Spain).

load_station <- function(url) {
  read.csv(url, header=TRUE, stringsAsFactors=FALSE,
           fileEncoding="latin1", sep = ";")
}

We create another function to make plots and visualize pollution level in the city.

make_plot <- function(dataset, title) {
  dataset %>% 
  ggplot(aes(x = day, y = Name)) + 
  geom_raster(aes(fill = avgPM)) + 
  scale_fill_gradientn(colours = rev(brewer.pal(10, name="PiYG")), 
                       values = scales::rescale(c(0,40,120)), 
                       na.value = "grey50" ) +
  facet_grid(rows = vars(month)) +
  labs(x = "Day",
       y = "Cabin station", 
       title =  expression(paste("Mean PM"[2.5], "  ", mu, "g/m"^3)), 
       subtitle = paste(paste0(title, ","), "Valencia (Spain)"))
  
}

Next, we create a vector with the url for each cabin station in Valencia.

url = c("http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/1A.csv", 
        "http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/3A.csv",
        "http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/4A.csv", 
        "http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/5A.csv", 
        "http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/6A.csv",
        "http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/7A.csv")

Load air pollution data.

est1 = load_station(url[1])
est2 = load_station(url[2])
est3 = load_station(url[3])
est5 = load_station(url[5])

Add station name.

est1 <- est1 %>% mutate(Name = "UPV")
est2 <- est2 %>% mutate(Name = "Moli")
est3 <- est3 %>% mutate(Name = "PistaSilla")
est5 <- est5 %>% mutate(Name = "AvdFrancia")

Tidy data to select a given pollutant.

my_cols = c("Name", "Fecha", "PM2.5.µg.m..")

e1 = est1 %>% select(all_of(my_cols))
e2 = est2 %>% select(all_of(my_cols))
e3 = est3 %>% select(all_of(my_cols))
e5 = est5 %>% select(all_of(my_cols))

Build data set.

pm2.5 = bind_rows(e1, e2, e3, e5)

Wrangling data.

pm2.5_by_year = pm2.5 %>% 
  mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>% 
  mutate(year = year(Fecha)) %>% 
  select(year, PM2.5.µg.m..)

Look at general distribution.

pm2.5_by_year %>% 
  ggplot(aes(x = factor(year), y = PM2.5.µg.m..)) + 
  geom_boxplot() +
  labs(x = "", y = expression(paste("PM"[2.5], "  ", mu, "g/m"^3)))

Build 2 datasets : 2019, 2020.
For comparative purposes, each dataset will contain only from Jan to June (data available at the time od this writing for 2020).

pm2.5_2019 = pm2.5 %>% 
  mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>% 
  mutate(day =  day(Fecha), month = month(Fecha), year = year(Fecha)) %>% 
  group_by(Name, day, month, year) %>% 
  filter(year == '2019', month < 7) %>% 
  summarize(avgPM = mean(PM2.5.µg.m..))

pm2.5_2020 = pm2.5 %>% 
  mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>% 
  mutate(day =  day(Fecha), month = month(Fecha), year = year(Fecha)) %>% 
  group_by(Name, day, month, year) %>% 
  filter(year == '2020') %>% 
  summarize(avgPM = mean(PM2.5.µg.m..))

Make plots

make_plot(pm2.5_2019, 2019)

make_plot(pm2.5_2020, 2020)