第 41 章 探索性数据分析-驯鹿迁移

本章我们分析加拿大哥伦比亚林地驯鹿追踪数据,数据包含了从1988年到2016年期间260只驯鹿,近250000个位置标签。

41.1 驯鹿位置跟踪

大家可以在这里了解数据集的信息,它包含了两个数据集

# devtools::install_github("thebioengineer/tidytuesdayR")
library(tidytuesdayR)

tuesdata <- tidytuesdayR::tt_load("2020-06-23")
# or
# tuesdata <- tidytuesdayR::tt_load(2020, week = 26)
library(tidyverse)
library(lubridate)
library(gganimate)

individuals <- readr::read_csv("./demo_data/caribou/individuals.csv")
locations <- readr::read_csv("./demo_data/caribou/locations.csv")

41.2 驯鹿的身份信息

individuals %>% glimpse()
## Rows: 286
## Columns: 14
## $ animal_id            <chr> "HR_151.510", "GR_C04...
## $ sex                  <chr> "f", "f", "f", "f", "...
## $ life_stage           <chr> NA, NA, NA, NA, NA, N...
## $ pregnant             <lgl> NA, NA, NA, NA, NA, N...
## $ with_calf            <lgl> NA, NA, NA, NA, NA, N...
## $ death_cause          <chr> NA, NA, NA, NA, NA, N...
## $ study_site           <chr> "Hart Ranges", "Graha...
## $ deploy_on_longitude  <dbl> NA, NA, NA, NA, NA, N...
## $ deploy_on_latitude   <dbl> NA, NA, NA, NA, NA, N...
## $ deploy_on_comments   <chr> NA, NA, NA, NA, NA, N...
## $ deploy_off_longitude <dbl> NA, NA, NA, NA, NA, N...
## $ deploy_off_latitude  <dbl> NA, NA, NA, NA, NA, N...
## $ deploy_off_type      <chr> "unknown", "unknown",...
## $ deploy_off_comments  <chr> NA, NA, NA, NA, NA, N...
individuals %>% count(animal_id)
## # A tibble: 260 x 2
##    animal_id     n
##    <chr>     <int>
##  1 BP_car022     1
##  2 BP_car023     1
##  3 BP_car032     1
##  4 BP_car043     1
##  5 BP_car100     1
##  6 BP_car101     1
##  7 BP_car115     1
##  8 BP_car144     1
##  9 BP_car145     1
## 10 GR_C01        2
## # ... with 250 more rows

我们发现有重复id的,怎么办?

individuals %>% janitor::get_dupes(animal_id)
## # A tibble: 50 x 15
##    animal_id dupe_count sex   life_stage pregnant
##    <chr>          <int> <chr> <chr>      <lgl>   
##  1 GR_C01             2 f     <NA>       NA      
##  2 GR_C01             2 f     <NA>       NA      
##  3 GR_C02             2 f     <NA>       NA      
##  4 GR_C02             2 f     <NA>       NA      
##  5 GR_C04             2 f     <NA>       NA      
##  6 GR_C04             2 f     <NA>       NA      
##  7 GR_C05             2 f     <NA>       NA      
##  8 GR_C05             2 f     <NA>       NA      
##  9 GR_C06             2 f     <NA>       NA      
## 10 GR_C06             2 f     <NA>       NA      
## # ... with 40 more rows, and 10 more variables:
## #   with_calf <lgl>, death_cause <chr>,
## #   study_site <chr>, deploy_on_longitude <dbl>,
## #   deploy_on_latitude <dbl>,
## #   deploy_on_comments <chr>,
## #   deploy_off_longitude <dbl>,
## #   deploy_off_latitude <dbl>, deploy_off_type <chr>,
## #   deploy_off_comments <chr>
individuals %>%
  filter(deploy_on_latitude > 50) %>%
  ggplot(aes(x = deploy_on_longitude, y = deploy_on_latitude)) +
  geom_point(aes(color = study_site)) #+

# borders("world", regions = "china")

41.3 性别比例

41.4 每个站点运动最频繁的前10的驯鹿

41.5 驯鹿的活动信息

简单点说,就是哪个驯鹿在什么时间出现在什么地方

locations %>%
  ggplot(aes(x = longitude, y = latitude)) +
  geom_point(aes(color = study_site))

41.6 被追踪最多次的驯鹿的轨迹

top_animal_ids <-
  count(locations, animal_id, sort = TRUE) %>%
  slice(1:10) %>%
  pull(animal_id)


locations %>%
  filter(animal_id %in% top_animal_ids) %>%
  arrange(animal_id, timestamp) %>%
  group_by(animal_id) %>%
  mutate(measurement_n = row_number()) %>%
  ggplot(aes(
    x = longitude,
    y = latitude,
    color = animal_id,
    alpha = measurement_n
  )) +
  geom_point(show.legend = FALSE, size = 1) +
  geom_path(show.legend = FALSE, size = 1) +
  # scale_color_manual(values = ) +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 10),
    text = element_text(color = "White"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray60", size = 0.05),
    plot.background = element_rect(fill = "gray10"),
    axis.text = element_text(color = "white")
  ) +
  labs(
    x = "\nLongitude", y = "Latitude\n",
    title = "Caribou movement tracking",
    subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n",
    caption = "Tidy Tuesday: Caribou Location Tracking"
  )

41.7 某一只驯鹿的轨迹

locations %>%
  dplyr::filter(animal_id %in% c("QU_car143")) %>%
  dplyr::arrange(animal_id, timestamp) %>%
  dplyr::group_by(animal_id) %>%
  dplyr::mutate(measurement_n = row_number()) %>%
  ggplot(aes(
    x = longitude,
    y = latitude,
    color = measurement_n,
    alpha = measurement_n
  )) +
  geom_point(show.legend = FALSE, size = 1) +
  geom_path(show.legend = FALSE, size = 1) +
  scale_color_gradient(low = "white", high = "firebrick3") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 20, face = "bold"),
    plot.subtitle = element_text(size = 10),
    text = element_text(color = "White"),
    panel.grid.minor = element_blank(),
    panel.grid.major = element_line(color = "gray60", size = 0.05),
    plot.background = element_rect(fill = "gray10"),
    axis.text = element_text(color = "white")
  ) +
  labs(
    x = "\nLongitude", y = "Latitude\n",
    title = "QU_car143 movement tracking",
    subtitle = "Latitude and longitude locations of the animals with the highest number of measurements\n Ligher colors indicate earlier measurements",
    caption = "Tidy Tuesday: Caribou Location Tracking"
  )

41.8 选择某个驯鹿,查看他的活动轨迹

example_animal <- locations %>%
  dplyr::filter(animal_id == sample(animal_id, 1)) %>%
  dplyr::arrange(timestamp)
example_animal
## # A tibble: 1,788 x 7
##    event_id animal_id study_site season
##       <dbl> <chr>     <chr>      <chr> 
##  1   2.26e9 GR_GC08   Graham     Winter
##  2   2.26e9 GR_GC08   Graham     Winter
##  3   2.26e9 GR_GC08   Graham     Winter
##  4   2.26e9 GR_GC08   Graham     Winter
##  5   2.26e9 GR_GC08   Graham     Winter
##  6   2.26e9 GR_GC08   Graham     Winter
##  7   2.26e9 GR_GC08   Graham     Winter
##  8   2.26e9 GR_GC08   Graham     Winter
##  9   2.26e9 GR_GC08   Graham     Winter
## 10   2.26e9 GR_GC08   Graham     Winter
## # ... with 1,778 more rows, and 3 more variables:
## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>
"2010-03-28 21:00:44" %>% lubridate::as_date()
"2010-03-28 21:00:44" %>% lubridate::as_datetime()
"2010-03-28 21:00:44" %>% lubridate::quarter()
example_animal %>%
  dplyr::mutate(date = lubridate::as_date(timestamp)) %>%
  ggplot(aes(x = longitude, y = latitude, color = date)) +
  geom_path()

example_animal %>%
  dplyr::mutate(quarter = lubridate::quarter(timestamp) %>% as.factor()) %>%
  ggplot(aes(x = longitude, y = latitude, color = quarter)) +
  geom_path() +
  facet_wrap(vars(quarter)) +
  labs(title = "一只小驯鹿到处啊跑")

41.9 季节模式

看看驯鹿夏季和冬季运动模式,这段代码来自gkaramanis

movement <- locations %>%
  filter(study_site != "Hart Ranges") %>%
  mutate(
    season = fct_rev(season),
    longitude = round(longitude, 2),
    latitude = round(latitude, 2)
  ) %>%
  distinct(season, study_site, longitude, latitude)



ggplot(movement) +
  geom_point(aes(longitude, latitude,
    group = study_site,
    colour = study_site
  ), size = 0.1) +
  gghighlight::gghighlight(
    unhighlighted_params = list(colour = "grey70"), use_direct_label = FALSE
  ) +
  scale_colour_manual(
    values = c("#ffe119", "#4363d8", "#f58231", "#e6194B", "#800000", "#000075", "#f032e6", "#3cb44b"),
    breaks = c("Graham", "Scott", "Moberly", "Burnt Pine", "Kennedy", "Quintette", "Narraway")
  ) +
  guides(colour = guide_legend(title = "Herd", override.aes = list(size = 3))) +
  coord_fixed(ratio = 1.5) +
  facet_wrap(vars(season), ncol = 2) +
  # labs(
  #   title = "Migration patterns of Northern Caribou\nin the South Peace of British Columbia",
  #     subtitle = str_wrap("In summer, most caribou migrate towards the central core of the Rocky Mountains where they use alpine and subalpine habitat. The result of this movement to the central core of the Rocky Mountains is that some of the east side herds can overlap with west side herds during the summer.", 100),
  #     caption = str_wrap("Source: Seip DR, Price E (2019) Data from: Science update for the South Peace Northern Caribou (Rangifer tarandus caribou pop. 15) in British Columbia. Movebank Data Repository. https://doi.org/10.5441/001/1.p5bn656k | Graphic: Georgios Karamanis", 70)
  # ) +
  theme_void() +
  theme(
    legend.position = c(0.5, 0.6),
    legend.text = element_text(size = 11, colour = "#F9EED9"),
    legend.title = element_text(size = 16, hjust = 0.5, colour = "#F9EED9"),
    panel.spacing.x = unit(3, "lines"),
    plot.margin = margin(20, 20, 20, 20),
    plot.background = element_rect(fill = "#7A6A4F", colour = NA),
    strip.text = element_text(colour = "#F9EED9", size = 18),
    plot.title = element_text(colour = "white", size = 20, hjust = 0, lineheight = 1),
    plot.subtitle = element_text(colour = "white", size = 12, hjust = 0, lineheight = 1, margin = margin(10, 0, 50, 0)),
    plot.caption = element_text(colour = "grey80", size = 7, hjust = 1, margin = margin(30, 0, 10, 0))
  )

41.10 迁移速度

location_with_speed <- locations %>%
  dplyr::group_by(animal_id) %>%
  dplyr::mutate(
    last_longitude = lag(longitude),
    last_latitude = lag(latitude),
    hours = as.numeric(difftime(timestamp, lag(timestamp), units = "hours")),
    km = geosphere::distHaversine(
      cbind(longitude, latitude), cbind(last_longitude, last_latitude)
    ) / 1000,
    speed = km / hours
  ) %>%
  dplyr::ungroup()

location_with_speed
## # A tibble: 249,450 x 12
##    event_id animal_id study_site season
##       <dbl> <chr>     <chr>      <chr> 
##  1   2.26e9 GR_C01    Graham     Winter
##  2   2.26e9 GR_C01    Graham     Winter
##  3   2.26e9 GR_C01    Graham     Winter
##  4   2.26e9 GR_C01    Graham     Winter
##  5   2.26e9 GR_C01    Graham     Winter
##  6   2.26e9 GR_C01    Graham     Winter
##  7   2.26e9 GR_C01    Graham     Winter
##  8   2.26e9 GR_C01    Graham     Winter
##  9   2.26e9 GR_C01    Graham     Winter
## 10   2.26e9 GR_C01    Graham     Winter
## # ... with 249,440 more rows, and 8 more variables:
## #   timestamp <dttm>, longitude <dbl>, latitude <dbl>,
## #   last_longitude <dbl>, last_latitude <dbl>,
## #   hours <dbl>, km <dbl>, speed <dbl>
location_with_speed %>%
  ggplot(aes(x = speed)) +
  geom_histogram() +
  scale_x_log10()

41.11 动态展示

library(gganimate)

example_animal %>%
  ggplot(aes(x = longitude, y = latitude)) +
  geom_point() +
  transition_time(time = timestamp) +
  shadow_mark(past = TRUE) +
  labs(title = "date is {frame_time}")

41.12 更多

df <- locations %>%
  dplyr::filter(
    study_site == "Graham",
    year(timestamp) == 2002
  ) %>%
  dplyr::group_by(animal_id) %>%
  dplyr::filter(
    as_date(min(timestamp)) == "2002-01-01",
    as_date(max(timestamp)) == "2002-12-31"
  ) %>%
  dplyr::ungroup() %>%
  dplyr::mutate(date = as_date(timestamp)) %>%
  dplyr::group_by(animal_id, date) %>%
  dplyr::summarise(
    longitude_centroid = mean(longitude),
    latitude_centroid = mean(latitude)
  ) %>%
  dplyr::ungroup() %>%
  tidyr::complete(animal_id, date) %>%
  dplyr::arrange(animal_id, date) %>%
  tidyr::fill(longitude_centroid, latitude_centroid, .direction = "down")
p <- df %>%
  ggplot(aes(longitude_centroid, latitude_centroid, colour = animal_id)) +
  geom_point(size = 2) +
  coord_map() +
  theme_void() +
  theme(legend.position = "none") +
  transition_time(time = date) +
  shadow_mark(alpha = 0.2, size = 0.8) +
  ggtitle("Caribou location on {frame_time}")
p