# 第 41 章 探索性数据分析-驯鹿迁移

## 41.1 驯鹿位置跟踪

# devtools::install_github("thebioengineer/tidytuesdayR")
library(tidytuesdayR)

# or
# tuesdata <- tidytuesdayR::tt_load(2020, week = 26)
library(tidyverse)
library(lubridate)
library(gganimate)

locations <- readr::read_csv("./demo_data/caribou/locations.csv")

## 41.2 驯鹿的身份信息

individuals %>% glimpse()
## Rows: 286
## Columns: 14
## $animal_id <chr> "HR_151.510", "GR_C04... ##$ sex                  <chr> "f", "f", "f", "f", "...
## $life_stage <chr> NA, NA, NA, NA, NA, N... ##$ pregnant             <lgl> NA, NA, NA, NA, NA, N...
## $with_calf <lgl> NA, NA, NA, NA, NA, N... ##$ death_cause          <chr> NA, NA, NA, NA, NA, N...
## $study_site <chr> "Hart Ranges", "Graha... ##$ deploy_on_longitude  <dbl> NA, NA, NA, NA, NA, N...
## $deploy_on_latitude <dbl> NA, NA, NA, NA, NA, N... ##$ deploy_on_comments   <chr> NA, NA, NA, NA, NA, N...
## $deploy_off_longitude <dbl> NA, NA, NA, NA, NA, N... ##$ deploy_off_latitude  <dbl> NA, NA, NA, NA, NA, N...
## $deploy_off_type <chr> "unknown", "unknown",... ##$ deploy_off_comments  <chr> NA, NA, NA, NA, NA, N...
individuals %>% count(animal_id)
## # A tibble: 260 x 2
##    animal_id     n
##    <chr>     <int>
##  1 BP_car022     1
##  2 BP_car023     1
##  3 BP_car032     1
##  4 BP_car043     1
##  5 BP_car100     1
##  6 BP_car101     1
##  7 BP_car115     1
##  8 BP_car144     1
##  9 BP_car145     1
## 10 GR_C01        2
## # ... with 250 more rows

individuals %>% janitor::get_dupes(animal_id)
## # A tibble: 50 x 15
##    animal_id dupe_count sex   life_stage pregnant
##    <chr>          <int> <chr> <chr>      <lgl>
##  1 GR_C01             2 f     <NA>       NA
##  2 GR_C01             2 f     <NA>       NA
##  3 GR_C02             2 f     <NA>       NA
##  4 GR_C02             2 f     <NA>       NA
##  5 GR_C04             2 f     <NA>       NA
##  6 GR_C04             2 f     <NA>       NA
##  7 GR_C05             2 f     <NA>       NA
##  8 GR_C05             2 f     <NA>       NA
##  9 GR_C06             2 f     <NA>       NA
## 10 GR_C06             2 f     <NA>       NA
## # ... with 40 more rows, and 10 more variables:
## #   with_calf <lgl>, death_cause <chr>,
## #   study_site <chr>, deploy_on_longitude <dbl>,
## #   deploy_on_latitude <dbl>,
## #   deploy_off_longitude <dbl>,
## #   deploy_off_latitude <dbl>, deploy_off_type <chr>,
## #   deploy_off_comments <chr>
individuals %>%
filter(deploy_on_latitude > 50) %>%
ggplot(aes(x = deploy_on_longitude, y = deploy_on_latitude)) +
geom_point(aes(color = study_site)) #+

# borders("world", regions = "china")

## 41.5 驯鹿的活动信息

locations %>%
ggplot(aes(x = longitude, y = latitude)) +
geom_point(aes(color = study_site))

## 41.6 被追踪最多次的驯鹿的轨迹

top_animal_ids <-
count(locations, animal_id, sort = TRUE) %>%
slice(1:10) %>%
pull(animal_id)

locations %>%
filter(animal_id %in% top_animal_ids) %>%
arrange(animal_id, timestamp) %>%
group_by(animal_id) %>%
mutate(measurement_n = row_number()) %>%
ggplot(aes(
x = longitude,
y = latitude,
color = animal_id,
alpha = measurement_n
)) +
geom_point(show.legend = FALSE, size = 1) +
geom_path(show.legend = FALSE, size = 1) +
# scale_color_manual(values = ) +
theme_minimal() +
theme(
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 10),
text = element_text(color = "White"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray60", size = 0.05),
plot.background = element_rect(fill = "gray10"),
axis.text = element_text(color = "white")
) +
labs(
x = "\nLongitude", y = "Latitude\n",
title = "Caribou movement tracking",
subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n",
caption = "Tidy Tuesday: Caribou Location Tracking"
)

## 41.7 某一只驯鹿的轨迹

locations %>%
dplyr::filter(animal_id %in% c("QU_car143")) %>%
dplyr::arrange(animal_id, timestamp) %>%
dplyr::group_by(animal_id) %>%
dplyr::mutate(measurement_n = row_number()) %>%
ggplot(aes(
x = longitude,
y = latitude,
color = measurement_n,
alpha = measurement_n
)) +
geom_point(show.legend = FALSE, size = 1) +
geom_path(show.legend = FALSE, size = 1) +
scale_color_gradient(low = "white", high = "firebrick3") +
theme_minimal() +
theme(
plot.title = element_text(size = 20, face = "bold"),
plot.subtitle = element_text(size = 10),
text = element_text(color = "White"),
panel.grid.minor = element_blank(),
panel.grid.major = element_line(color = "gray60", size = 0.05),
plot.background = element_rect(fill = "gray10"),
axis.text = element_text(color = "white")
) +
labs(
x = "\nLongitude", y = "Latitude\n",
title = "QU_car143 movement tracking",
subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n Ligher colors indicate earlier measurements",
caption = "Tidy Tuesday: Caribou Location Tracking"
)

## 41.8 选择某个驯鹿，查看他的活动轨迹

example_animal <- locations %>%
dplyr::filter(animal_id == sample(animal_id, 1)) %>%
dplyr::arrange(timestamp)
example_animal
## # A tibble: 1,788 x 7
##    event_id animal_id study_site season
##       <dbl> <chr>     <chr>      <chr>
##  1   2.26e9 GR_GC08   Graham     Winter
##  2   2.26e9 GR_GC08   Graham     Winter
##  3   2.26e9 GR_GC08   Graham     Winter
##  4   2.26e9 GR_GC08   Graham     Winter
##  5   2.26e9 GR_GC08   Graham     Winter
##  6   2.26e9 GR_GC08   Graham     Winter
##  7   2.26e9 GR_GC08   Graham     Winter
##  8   2.26e9 GR_GC08   Graham     Winter
##  9   2.26e9 GR_GC08   Graham     Winter
## 10   2.26e9 GR_GC08   Graham     Winter
## # ... with 1,778 more rows, and 3 more variables:
## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>
"2010-03-28 21:00:44" %>% lubridate::as_date()
"2010-03-28 21:00:44" %>% lubridate::as_datetime()
"2010-03-28 21:00:44" %>% lubridate::quarter()
example_animal %>%
dplyr::mutate(date = lubridate::as_date(timestamp)) %>%
ggplot(aes(x = longitude, y = latitude, color = date)) +
geom_path()

example_animal %>%
dplyr::mutate(quarter = lubridate::quarter(timestamp) %>% as.factor()) %>%
ggplot(aes(x = longitude, y = latitude, color = quarter)) +
geom_path() +
facet_wrap(vars(quarter)) +
labs(title = "一只小驯鹿到处啊跑")

## 41.9 季节模式

movement <- locations %>%
filter(study_site != "Hart Ranges") %>%
mutate(
season = fct_rev(season),
longitude = round(longitude, 2),
latitude = round(latitude, 2)
) %>%
distinct(season, study_site, longitude, latitude)

ggplot(movement) +
geom_point(aes(longitude, latitude,
group = study_site,
colour = study_site
), size = 0.1) +
gghighlight::gghighlight(
unhighlighted_params = list(colour = "grey70"), use_direct_label = FALSE
) +
scale_colour_manual(
values = c("#ffe119", "#4363d8", "#f58231", "#e6194B", "#800000", "#000075", "#f032e6", "#3cb44b"),
breaks = c("Graham", "Scott", "Moberly", "Burnt Pine", "Kennedy", "Quintette", "Narraway")
) +
guides(colour = guide_legend(title = "Herd", override.aes = list(size = 3))) +
coord_fixed(ratio = 1.5) +
facet_wrap(vars(season), ncol = 2) +
# labs(
#   title = "Migration patterns of Northern Caribou\nin the South Peace of British Columbia",
#     subtitle = str_wrap("In summer, most caribou migrate towards the central core of the Rocky Mountains where they use alpine and subalpine habitat. The result of this movement to the central core of the Rocky Mountains is that some of the east side herds can overlap with west side herds during the summer.", 100),
#     caption = str_wrap("Source: Seip DR, Price E (2019) Data from: Science update for the South Peace Northern Caribou (Rangifer tarandus caribou pop. 15) in British Columbia. Movebank Data Repository. https://doi.org/10.5441/001/1.p5bn656k | Graphic: Georgios Karamanis", 70)
# ) +
theme_void() +
theme(
legend.position = c(0.5, 0.6),
legend.text = element_text(size = 11, colour = "#F9EED9"),
legend.title = element_text(size = 16, hjust = 0.5, colour = "#F9EED9"),
panel.spacing.x = unit(3, "lines"),
plot.margin = margin(20, 20, 20, 20),
plot.background = element_rect(fill = "#7A6A4F", colour = NA),
strip.text = element_text(colour = "#F9EED9", size = 18),
plot.title = element_text(colour = "white", size = 20, hjust = 0, lineheight = 1),
plot.subtitle = element_text(colour = "white", size = 12, hjust = 0, lineheight = 1, margin = margin(10, 0, 50, 0)),
plot.caption = element_text(colour = "grey80", size = 7, hjust = 1, margin = margin(30, 0, 10, 0))
)

## 41.10 迁移速度

location_with_speed <- locations %>%
dplyr::group_by(animal_id) %>%
dplyr::mutate(
last_longitude = lag(longitude),
last_latitude = lag(latitude),
hours = as.numeric(difftime(timestamp, lag(timestamp), units = "hours")),
km = geosphere::distHaversine(
cbind(longitude, latitude), cbind(last_longitude, last_latitude)
) / 1000,
speed = km / hours
) %>%
dplyr::ungroup()

location_with_speed
## # A tibble: 249,450 x 12
##    event_id animal_id study_site season
##       <dbl> <chr>     <chr>      <chr>
##  1   2.26e9 GR_C01    Graham     Winter
##  2   2.26e9 GR_C01    Graham     Winter
##  3   2.26e9 GR_C01    Graham     Winter
##  4   2.26e9 GR_C01    Graham     Winter
##  5   2.26e9 GR_C01    Graham     Winter
##  6   2.26e9 GR_C01    Graham     Winter
##  7   2.26e9 GR_C01    Graham     Winter
##  8   2.26e9 GR_C01    Graham     Winter
##  9   2.26e9 GR_C01    Graham     Winter
## 10   2.26e9 GR_C01    Graham     Winter
## # ... with 249,440 more rows, and 8 more variables:
## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>,
## #   last_longitude <dbl>, last_latitude <dbl>,
## #   hours <dbl>, km <dbl>, speed <dbl>
location_with_speed %>%
ggplot(aes(x = speed)) +
geom_histogram() +
scale_x_log10()

## 41.11 动态展示

library(gganimate)

example_animal %>%
ggplot(aes(x = longitude, y = latitude)) +
geom_point() +
transition_time(time = timestamp) +
labs(title = "date is {frame_time}")

## 41.12 更多

df <- locations %>%
dplyr::filter(
study_site == "Graham",
year(timestamp) == 2002
) %>%
dplyr::group_by(animal_id) %>%
dplyr::filter(
as_date(min(timestamp)) == "2002-01-01",
as_date(max(timestamp)) == "2002-12-31"
) %>%
dplyr::ungroup() %>%
dplyr::mutate(date = as_date(timestamp)) %>%
dplyr::group_by(animal_id, date) %>%
dplyr::summarise(
longitude_centroid = mean(longitude),
latitude_centroid = mean(latitude)
) %>%
dplyr::ungroup() %>%
tidyr::complete(animal_id, date) %>%
dplyr::arrange(animal_id, date) %>%
tidyr::fill(longitude_centroid, latitude_centroid, .direction = "down")
p <- df %>%
ggplot(aes(longitude_centroid, latitude_centroid, colour = animal_id)) +
geom_point(size = 2) +
coord_map() +
theme_void() +
theme(legend.position = "none") +
transition_time(time = date) +
shadow_mark(alpha = 0.2, size = 0.8) +
ggtitle("Caribou location on {frame_time}")
p