For this analysis, we will use the Open source Data by the Ayuntamiento de Valencia.
library(dplyr)
library(lubridate)
library(ggplot2)
library(RColorBrewer)
First, we create a function to read the csv tables containing the air pollution data in Valencia (Spain).
load_station <- function(url) {
read.csv(url, header=TRUE, stringsAsFactors=FALSE,
fileEncoding="latin1", sep = ";")
}
We create another function to make plots and visualize pollution level in the city.
make_plot <- function(dataset, title) {
dataset %>%
ggplot(aes(x = day, y = Name)) +
geom_raster(aes(fill = avgPM)) +
scale_fill_gradientn(colours = rev(brewer.pal(10, name="PiYG")),
values = scales::rescale(c(0,40,120)),
na.value = "grey50" ) +
facet_grid(rows = vars(month)) +
labs(x = "Day",
y = "Cabin station",
title = expression(paste("Mean PM"[2.5], " ", mu, "g/m"^3)),
subtitle = paste(paste0(title, ","), "Valencia (Spain)"))
}
Next, we create a vector with the url for each cabin station in Valencia.
url = c("http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/1A.csv",
"http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/3A.csv",
"http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/4A.csv",
"http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/5A.csv",
"http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/6A.csv",
"http://mapas.valencia.es/WebsMunicipales/uploads/atmosferica/7A.csv")
Load air pollution data.
est1 = load_station(url[1])
est2 = load_station(url[2])
est3 = load_station(url[3])
est5 = load_station(url[5])
Add station name.
est1 <- est1 %>% mutate(Name = "UPV")
est2 <- est2 %>% mutate(Name = "Moli")
est3 <- est3 %>% mutate(Name = "PistaSilla")
est5 <- est5 %>% mutate(Name = "AvdFrancia")
Tidy data to select a given pollutant.
my_cols = c("Name", "Fecha", "PM2.5.µg.m..")
e1 = est1 %>% select(all_of(my_cols))
e2 = est2 %>% select(all_of(my_cols))
e3 = est3 %>% select(all_of(my_cols))
e5 = est5 %>% select(all_of(my_cols))
Build data set.
pm2.5 = bind_rows(e1, e2, e3, e5)
Wrangling data.
pm2.5_by_year = pm2.5 %>%
mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>%
mutate(year = year(Fecha)) %>%
select(year, PM2.5.µg.m..)
Look at general distribution.
pm2.5_by_year %>%
ggplot(aes(x = factor(year), y = PM2.5.µg.m..)) +
geom_boxplot() +
labs(x = "", y = expression(paste("PM"[2.5], " ", mu, "g/m"^3)))
Build 2 datasets : 2019, 2020.
For comparative purposes, each dataset will contain only from Jan to June (data available at the time od this writing for 2020).
pm2.5_2019 = pm2.5 %>%
mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>%
mutate(day = day(Fecha), month = month(Fecha), year = year(Fecha)) %>%
group_by(Name, day, month, year) %>%
filter(year == '2019', month < 7) %>%
summarize(avgPM = mean(PM2.5.µg.m..))
pm2.5_2020 = pm2.5 %>%
mutate(Fecha = as.Date(Fecha, format = "%d/ %m/ %Y")) %>%
mutate(day = day(Fecha), month = month(Fecha), year = year(Fecha)) %>%
group_by(Name, day, month, year) %>%
filter(year == '2020') %>%
summarize(avgPM = mean(PM2.5.µg.m..))
Make plots
make_plot(pm2.5_2019, 2019)
make_plot(pm2.5_2020, 2020)