Chapter 15 On your own
Congratulations! You made it to the end of the planned content in one peice.
You now have the opportunity to apply some of the skills you have learnt to a completely new database. Cut and paste your code from previous chapters to check the data, write the analysis date frames and explore the dataset.
I will help you read in the data - and correct a mistake I made. Then you are on your own!
15.1 The new dataset
15.1.1 Data cleaning and creation
# Load your data
<- read.csv("data/raw_data/your_data/proj.csv", header=T)
pro $project_id <- pro$project_name
pro
<- read.csv("data/raw_data/your_data/img.csv", header=T)
img $project_id <- pro$project_name
img
<- read.csv("data/raw_data/your_data/dep.csv", header=T)
dep $project_id <- pro$project_name
dep
<- read.csv("data/raw_data/your_data/cam.csv", header=T)
cam $project_id <- pro$project_name cam
Load your packages:
# A list of the required packages
<- c("activity", "corrplot", "cowplot", "dplyr", "elevatr", "gfcanalysis", "ggplot2", "gridExtra", "iNEXT", "kableExtra", "Hmsc", "leaflet", "lme4", "lubridate", "magrittr", "MCMCvis", "MODISTools", "osmdata", "pals", "plotly", "remotes", "rmarkdown", "sf", "spOccupancy", "stars", "stringr", "terra", "tibble", "tidyr", "unmarked", "viridis", "jtools", "vegan", "MuMIn")
list.of.packages
# A check to see which ones you have and which are missing
<- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
new.packages
# Code which tells R to install the missing packages
if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
Away you go!
We have included the relevant code blocks below:
# start dates
$start_date <- ymd(dep$start_date)
dep
# end dates
$end_date <- ymd(dep$end_date)
dep
# camera days
$days <- interval(dep$start_date, dep$end_date)/ddays(1)
dep
# Image timestamp
$timestamp <- ymd_hms(img$timestamp)
img
# First, set a single categorical variable of interest from station covariates for summary graphs. If you do not have an appropriate category use "project_id".
<- "feature_type"
category
# We first convert this category to a factor with discrete levels
<- factor(dep[,category])
dep[,category] # then use the turbo() function to assign each level a color
<- turbo(length(levels(dep[,category])))
col.cat # then we apply it to the dataframe
$colours <- col.cat[dep[,category]]
dep
<- leaflet() %>%
m addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%
addTiles(group="Base") %>% # Include a basemap option too
addCircleMarkers(lng=dep$longitude, lat=dep$latitude,
# Co lour the markers depending on the 'feature type'
color=dep$colours,
# Add a popup of the placename and feature_type together
popup=paste(dep$placename, dep[,category])) %>%
# Add a legend explaining what is going on
addLegend("bottomleft", colors = col.cat, labels = levels(dep[,category]),
title = category,
labFormat = labelFormat(prefix = "$"),
opacity = 1) %>%
# add a layer control box to toggle between the layers
addLayersControl(
baseGroups = c("Satellite", "Base"))
m
QUESTION What is the survey design? Hint: zoom in.
# Distance between traps
<- dep %>%
camera_locs ::select(placename, latitude, longitude) %>%
dplyrunique() %>% # remove duplicated rows (rows where the placename and coordinates match)
st_as_sf(coords = c("longitude", "latitude"), crs = "+proj=longlat") # Convert to `sf` format
# distance matrix for all cameras
<- st_distance(camera_locs) %>%
camera_dist as.dist() %>%
::dist_setNames(as.character(camera_locs$placename)) %>%
usedistas.matrix()
# convert to pairwise list
<- t(combn(colnames(camera_dist), 2))
camera_dist_list <- data.frame(camera_dist_list, dist = camera_dist[camera_dist_list]) %>%
camera_dist_list arrange(dist) # sort descending
# Duplicate and flip the stations so each one is represented on the left hand side
<- rbind(camera_dist_list, camera_dist_list[,c(2,1,3)])
camera_dist_list
# keep just the closest camera for each location
<- camera_dist_list %>%
camera_dist_list group_by(X1) %>%
slice(which.min(dist))
summary(camera_dist_list$dist)
Do you have anything to be worried about here?
# Do all deployments have images, and images have deployments?
table(unique(img$placename) %in% unique(dep$placename))
table(unique(dep$placename) %in% unique(img$placename))
# Activity check
# Correct the brken deploymenrt
#dep[dep$deployment_id=="LAC065_C187_030222", ]$end_date <- "2022-05-22"
# Call the plot
<- plot_ly()
p
# We want a separate row for each 'placename' - so lets turn it into a factor
$placename <- as.factor(dep$placename)
dep
# loop through each place name
for(i in seq_along(levels(dep$placename)))
{#Subset the data to just that placename
<- dep[dep$placename==levels(dep$placename)[i],]
tmp # Order by date
<- tmp[order(tmp$start_date),]
tmp # Loop through each deployment at that placename
for(j in 1:nrow(tmp))
{# Add a line to 'p'
<- add_trace(p,
p #Use the start and end date as x coordinates
x = c(tmp$start_date[j], tmp$end_date[j]),
#Use the counter for the y coordinates
y = c(i,i),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "lines+markers",
# Add the deployment ID as hover text
hovertext=tmp$deployment_id[j],
# Color it all black
color=I("black"),
# Suppress the legend
showlegend = FALSE)
}
}# Add a categorical y axis
<- p %>% layout(yaxis = list(
p
ticktext = as.list(levels(dep$placename)),
tickvals = as.list(1:length(levels(dep$placename))),
tickmode = "array"))
p
QUESTION What do you think about the camera deployments?
### DETCTION CHECK
#Camera activity plot
# Make a separate plot for each 20 stations For each 20 stations
# To do this make a plot dataframe
<- data.frame("deployment_id"=unique(dep$deployment_id), "plot_group"=ceiling(1:length(unique(dep$deployment_id))/20))
tmp
<- left_join(dep,tmp, by="deployment_id")
dep_tmp
for(i in 1:max(dep_tmp$plot_group))
{ # Call the plot
<- plot_ly()
p
#Subset the data to just that placename
<- dep_tmp[dep_tmp$plot_group==i,]
tmp # Order by placename
<- tmp[order(tmp$placename),]
tmp
# Loop through each deployment at that placename
for(j in 1:nrow(tmp))
{#Subset the image data
<- img[img$deployment_id==tmp$deployment_id[j],]
tmp_img
if(nrow(tmp_img)>0)
{
<- add_trace(p,
p #Use the start and end date as x coordinates
x = c(tmp_img$timestamp),
#Use the counter for the y coordinates
y = rep(j, nrow(tmp_img)),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "markers",
# Add the deployment ID as hover text
hovertext=paste(tmp_img$genus,tmp_img$species),
# Color it all black
marker = list(color = "red"),
# Suppress the legend
showlegend = FALSE)
}
# Add a line to 'p'
<- add_trace(p,
p #Use the start and end date as x coordinates
x = c(tmp$start_date[j], tmp$end_date[j]),
#Use the counter for the y coordinates
y = c(j,j),
# State the type of chart
type="scatter",
# make a line that also has points
mode = "lines",
# Add the deployment ID as hover text
hovertext=tmp$deployment_id[j],
# Color it all black
color=I("black"),
# Suppress the legend
showlegend = FALSE)
}# Add custom y axis labels
<- p %>% layout(yaxis = list(
p
ticktext = as.list(tmp$deployment_id),
tickvals = as.list(1:nrow(tmp)),
tickmode = "array"))
print(p)
}
QUESTION Do you need to make any edits? Note - there are three plot windows to check.
# Create a species list
# add an sp column to the img dataframe - remember the genus and species columns are not pasted together yet
$sp <- paste(img$genus, img$species, sep=".")
img### Species lists
# First define vector of the headings you want to see (we will use this trick a lot later on)
<- c("class", "order", "family", "genus", "species", "common_name")
taxonomy_headings
# Subset the image data to just those columns
<- img[,colnames(img)%in% taxonomy_headings]
tmp# Remove duplicates
<- tmp[duplicated(tmp)==F,]
tmp
# Create an ordered species list
<- tmp[order(tmp$class, tmp$order, tmp$family, tmp$genus, tmp$species),]
sp_list $sp <- paste(sp_list$genus, sp_list$species, sep=".")
sp_list
write.csv(sp_list, paste0("data/raw_data/",pro$project_id[1],"_raw_species_list.csv"))
QUESTION Are there any classifications you will remove when we create our analysis dataframes?
# Diel activity check
# First lets convert our timestamp to decimal hours
$hours <- hour(img$timestamp) + minute(img$timestamp)/60 + second(img$timestamp)/(60*60)
img
# Count all of the captures
<- img %>% group_by(common_name) %>% summarize(count=n())
tmp
<- list(categoryorder = "array",
yform categoryarray = tmp$common_name)
<- plot_ly(x = img$hours, y = img$common_name,type="scatter",
fig height=1000, text=img$deployment_id, hoverinfo='text',
mode = 'markers',
marker = list(size = 5,
color = 'rgba(50, 100, 255, .2)',
line = list(color = 'rgba(0, 0, 0, 0)',
width = 0))) %>%
layout(yaxis = yform)
fig
# Remove the column
$hours <- NULL img
QUESTION Are there any images you would like to check?
# Create your dataframe folder
dir.create("data/processed_data")
# Filter your species
# Remove observations without animals detected, where we don't know the species, and non-mammals
<- img %>% filter(is_blank==0, # Remove the blanks
img_sub is.na(img$species)==FALSE, # Remove classifications which don't have species
=="Mammalia", # Subset to mammals
class!="sapiens") # Subset to anything that isn't human
species
table(img_sub$sp)
QUESTION Are there any other classifications you would like to remove?
# Create your dataframe folder
# Filter your species
# Remove observations without animals detected, where we don't know the species, and non-mammals
<- img_sub %>% filter(sp!=".") # Subset to anything that isn't human
img_sub
table(img_sub$sp)
##########################################
# Create your daily lookup
# Remove any deployments without end dates
<- dep[is.na(dep$end_date)==F,]
tmp
# Create an empty list to store our days
<- list()
daily_lookup
# Loop through the deployment dataframe and create a row for every day the camera is active
for(i in 1:nrow(tmp))
{if(ymd(tmp$start_date[i])!=ymd(tmp$end_date[i]))
{<- data.frame("date"=seq(ymd(tmp$start_date[i]), ymd(tmp$end_date[i]), by="days"), "placename"=tmp$placename[i])
daily_lookup[[i]]
}
}
# Merge the lists into a dataframe
<- bind_rows(daily_lookup)
row_lookup
# Remove duplicates - when start and end days are the same for successive deployments
<- row_lookup[duplicated(row_lookup)==F,]
row_lookup
###################################
# Create your independent detections
<- 30
independent
# Check for a `group_size` variable?
table(img_sub$group_size)
# Check for a 'number_of_objects' variable
table(img_sub$number_of_objects)
QUESTION WHich variable will you use for your animal_count
?
$animal_count <- ###INSERT YOUR SELECTION HERE### img_sub
# Create your independent data
<- img_sub %>%
img_tmp arrange(deployment_id) %>% # Order by deployment_id
group_by(deployment_id, sp) %>% # Group species together
mutate(duration = int_length(timestamp %--% lag(timestamp))) # Calculate the gap bet
library(stringr)
# Give a random value to all cells
$event_id <- 9999
img_tmp
# Create a counter
<- 1
counter
# Make a unique code that has one more zero than rows in your dataframe
<- as.numeric(paste0(nrow(img_sub),0))
num_code
# Loop through img_tmp - if gap is greater than the threshold -> give it a new event ID
for (i in 2:nrow(img_tmp)) {
$event_id[i-1] <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
img_tmp
if(is.na(img_tmp$duration[i]) | abs(img_tmp$duration[i]) > (independent * 60))
{<- counter + 1
counter
}
}
# Update the information for the last row - the loop above always updates the previous row... leaving the last row unchanged
# group ID for the last row
if(img_tmp$duration[nrow(img_tmp)] < (independent * 60)|
is.na(img_tmp$duration[nrow(img_tmp)])){
$event_id[nrow(img_tmp)] <- img_tmp$event_id[nrow(img_tmp)-1]
img_tmpelse{
} <- counter + 1
counter $event_id[nrow(img_tmp)] <- paste0("E", str_pad(counter, nchar(num_code), pad = "0"))
img_tmp
}
# remove the duration column
$duration <- NULL
img_tmp
# find out the last and the first of the time in the group
<- img_tmp %>% group_by(event_id) %>% top_n(1,timestamp) %>% dplyr::select(event_id, timestamp)
top <- img_tmp %>% group_by(event_id) %>% top_n(-1,timestamp) %>% dplyr::select(event_id, timestamp)
bot names(bot)[2] <- c("timestamp_end")
<- img_tmp %>% group_by(event_id) %>% summarise(event_observations=n()) # number of images in the event
img_num <- img_tmp %>% group_by(event_id) %>% summarise(event_groupsize=max(animal_count))
event_grp
# calculate the duration and add the other elements
<- top %>% left_join(bot, by="event_id") %>%
diff mutate(event_duration=abs(int_length(timestamp %--% timestamp_end))) %>%
left_join(event_grp, by="event_id")%>%
left_join(img_num, by="event_id")
# Remove columns you don't need
$timestamp <-NULL
diff$timestamp_end <-NULL
diff<- diff[duplicated(diff)==F,]
diff # Merge the img_tmp with the event data
<-
img_tmp left_join(img_tmp,diff,by="event_id")
# Remove duplicates
<- img_tmp[duplicated(img_tmp$event_id)==F,]
ind_dat
# Make a unique code for ever day and deployment where cameras were functioning
<- paste(row_lookup$date, row_lookup$placename)
tmp
#Subset ind_dat to data that matches the unique codes
<- ind_dat[paste(substr(ind_dat$timestamp,1,10), ind_dat$placename) %in% tmp, ]
ind_dat
$sp <- as.factor(ind_dat$sp)
ind_dat
# Export your data frames
write.csv(ind_dat, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_detections.csv"), row.names = F)
# also write the cleaned all detections file (some activity analyses require it)
write.csv(img_tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_raw_detections.csv"), row.names = F)
write.csv(row_lookup, paste0("data/processed_data/",ind_dat$project_id[1], "_daily_lookup.csv"), row.names = F)
#Subset the columns
<- dep[, c("project_id", "placename", "longitude", "latitude", "feature_type")]
tmp # Remove duplicated rows
<- tmp[duplicated(tmp)==F,]
tmp# write the file
write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_camera_locations.csv"), row.names = F)
<- sp_list[sp_list$sp %in% ind_dat$sp,]
tmp
write.csv(tmp, paste0("data/processed_data/",ind_dat$project_id[1], "_species_list.csv"), row.names = F)
In the next step, we will won’t create the daily dataframe as it takes too long!
# Total counts
# Station / Month / deport / Species
<- row_lookup
tmp
# Calculate the number of days at each site
<- tmp %>%
total_obs group_by(placename) %>%
summarise(days = n())
# Convert to a data frame
<- as.data.frame(total_obs)
total_obs
# Add columns for each species
levels(ind_dat$sp)] <- NA
total_obs[, # Duplicate for counts
<- total_obs
total_count # Test counter
<-1
i # For each station, count the number of individuals/observations
for(i in 1:nrow(total_obs))
{<- ind_dat[ind_dat$placename==total_obs$placename[i],]
tmp
<- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
tmp_stats
as.character(tmp_stats$sp)] <- tmp_stats$obs
total_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$count
total_count[i,
}
# Save them
write.csv(total_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_observations.csv"), row.names = F)
write.csv(total_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_total_counts.csv"), row.names = F)
# Monthly counts
# Station / Month / days / Covariates / Species
<- row_lookup
tmp # Simplify the date to monthly
$date <- substr(tmp$date,1,7)
tmp
# Calculate the number of days in each month
<- tmp %>%
mon_obs group_by(placename,date ) %>%
summarise(days = n())
# Convert to a data frame
<- as.data.frame(mon_obs)
mon_obs
levels(ind_dat$sp)] <- NA
mon_obs[, <- mon_obs
mon_count # For each month, count the number of individuals/observations
for(i in 1:nrow(mon_obs))
{<- ind_dat[ind_dat$placename==mon_obs$placename[i] & substr(ind_dat$timestamp,1,7)== mon_obs$date[i],]
tmp
<- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
tmp_stats
as.character(tmp_stats$sp)] <- tmp_stats$obs
mon_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$count
mon_count[i,
}
write.csv(mon_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_observations.csv"), row.names = F)
write.csv(mon_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_monthly_counts.csv"), row.names = F)
### Weekly
# Weekly format
# Station / Month / days / Covariates / Species
<- row_lookup
tmp # Simplify the date to year-week
$date <- strftime(tmp$date, format = "%Y-W%U")
tmp# The way this is coded is the counter W01 starts at the first Sunday of the year, everything before that is W00. Weeks do not roll across years.
# Calculate the number of days in each week
<- tmp %>%
week_obs group_by(placename,date ) %>%
summarise(days = n())
# Convert to a data frame
<- as.data.frame(week_obs)
week_obs
# Add species columns
levels(ind_dat$sp)] <- NA
week_obs[,
# Duplicate for counts
<- week_obs
week_count
# For each week, count the number of individuals/observations
for(i in 1:nrow(week_obs))
{<- ind_dat[ind_dat$placename==week_obs$placename[i] & strftime(ind_dat$timestamp, format = "%Y-W%U")== week_obs$date[i],]
tmp
<- tmp %>% group_by(sp, .drop=F) %>% summarise(obs=n(), count=sum(animal_count))
tmp_stats
as.character(tmp_stats$sp)] <- tmp_stats$obs
week_obs[i,as.character(tmp_stats$sp)] <- tmp_stats$count
week_count[i,
}
write.csv(week_obs, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_observations.csv"), row.names = F)
write.csv(week_count, paste0("data/processed_data/",ind_dat$project_id[1], "_",independent ,"min_independent_weekly_counts.csv"), row.names = F)
# Do an observations check
<- cbind(data.frame("Time"=c("Total", "Monthly", "Weekly")),
tmp rbind(colSums(total_obs[,2:ncol(total_obs)]),
colSums(mon_obs[,3:ncol(mon_obs)]),
colSums(week_obs[,3:ncol(week_obs)])))
%>%
tmp kbl() %>%
kable_styling(full_width = T) %>%
column_spec(1, bold = T, border_right = T)%>%
::scroll_box(width = "100%") kableExtra
15.1.2 Covariates
# LOad the packages
library(kableExtra);library(dplyr); library(sf); library(MODISTools); library(lubridate); library(corrplot); library(traitdata); library(terra); library(osmdata); library(elevatr)
# Start by reading in your species list
<- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations.csv"), header=T)
locs
# Species traits
library(traitdata)
data("elton_mammals")
$sp <- paste0(elton_mammals$Genus,"." ,elton_mammals$Species)
elton_mammals
<- elton_mammals[, c("sp","BodyMass.Value", "Activity.Nocturnal", "Activity.Crepuscular", "Activity.Diurnal")]
tmp
# Lets rename the columns to make them more usable
<- tmp %>% rename(
tmp mass_g = BodyMass.Value,
act_noct = Activity.Nocturnal,
act_crep = Activity.Crepuscular,
act_diur = Activity.Diurnal)
<- left_join(sp_summary, tmp) sp_summary
QUESTION Are there any missing values you need to fill in?
write.csv(sp_summary, paste0("data/processed_data/", locs$project_id[1],"_species_list.csv"), row.names = F)
# Spatial covariates
<- st_as_sf(locs, # We specify the dataframe
locs_sf coords=c("longitude", "latitude"), # The XY coordinates
crs=4326) # And the projection code
library(elevatr)
<- get_elev_point(locs_sf,
locs_sf src="aws", #Amazon Web Service Terrain Tiles - available globally
z = 12) # z specifies the zoom level, the lower the value the faster the code runs, but the coarser the elevation values are
boxplot(locs_sf$elevation)
library(osmdata)
<- st_bbox(st_buffer(locs_sf, 10000)) # Units are in meters
aoi
<- opq(aoi) %>%
water add_osm_feature(key="water") %>%
osmdata_sf()
# NO OPEN STREET MAP DATA FOR THIS AREA
library(MODISTools)
<- locs %>%
modis_locs select("placename", "longitude", "latitude") %>%
rename(site_name=placename, lat=latitude, lon=longitude)
# list available dates for a product at a location
<- mt_dates(product = "MOD13Q1", lat = modis_locs$lat[1], lon = modis_locs$lon[1]) #MOD15A2H
dates
# Get the first and last date!
first(dates$calendar_date); last(dates$calendar_date)
<- mt_batch_subset(product = "MOD13Q1",
site_ndvi df=modis_locs,
band = "250m_16_days_NDVI",
start = "2022-01-01",
end = "2022-02-28",
km_lr = 0, # Use these options if you want to buffer the value (km left)
km_ab = 0, # Use these options if you want to buffer the value (km above)
internal = TRUE)
<- site_ndvi %>%
ndvi_simple select( site, band, calendar_date, value) %>%
rename(placename=site)
<- ndvi_simple %>% #Take the NDVI layer
tmp group_by(placename) %>% # Group observations by the placename
summarize(mean_ndvi=mean(value)) # Take the mean of the values and call the new column `mean_ndvi`
# Add the new data to our locations dataframe
<- left_join(locs_sf, tmp)
locs_sf
# Convert it back to a dataframe
$geometry <- NULL
locs_sf
<- left_join(locs, locs_sf)
locs
# Write the dataset
write.csv(locs, paste0("data/processed_data/", locs$project_id[1],"_camera_locations_and_covariates.csv"), row.names=F)
15.1.3 Data exploration
<- c("kableExtra", "tidyr", "leaflet", "dplyr", "viridis", "corrplot", "lubridate", "plotly")
list.of.packages
<- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
new.packages if(length(new.packages)) install.packages(new.packages)
lapply(list.of.packages, require, character.only = TRUE)
# Final locations plot
<- read.csv(paste0("data/processed_data/",pro$project_id[1],"_camera_locations_and_covariates.csv"))
locs
# If you want to color by a category do it here:
<- "feature_type"
category # First lets choose a category to color
<- factor(locs[,category])
locs[,category] <- turbo(length(levels(locs[,category])))
col.cat # Add it to the dataframe
$colours <- col.cat[locs[,category]]
locs
<- leaflet() %>%
m # Add a satellite image layer
addProviderTiles(providers$Esri.WorldImagery, group="Satellite") %>%
addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%
addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
# Color the markers depending on the 'feature type'
color=locs$colours,
# Add a popup of the deployment code
popup=paste(locs$placename, locs[,category])) %>%
# Add a legend explaining what is going on
addLegend("bottomleft", colors = col.cat, labels = levels(locs[,category]),
title = category,
labFormat = labelFormat(prefix = "$"),
opacity = 1
%>%
) # add a layer control box to toggle between the layers
addLayersControl(
baseGroups = c("Satellite", "Base"),
options = layersControlOptions(collapsed = FALSE)
) m
<- read.csv(paste0("data/processed_data/",pro$project_id[1],"_species_list.csv"), header=T)
sp_summary <- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_total_observations.csv"), header=T)
total_obs
# Convert to long
<- total_obs %>%
long_obs pivot_longer(cols=sp_summary$sp, # The columns we want to create into rows - species
names_to="sp", # What we what the number column to be called
values_to = "count") # Takes the values in the species columns and calls them `count`
# We can them summaries those using dplyr
<- long_obs %>% # Take the long observation data frame `long_obs`
tmp group_by(sp) %>% # Group by species
summarise(count=sum(count)) # Sum all the independent observations
# Add it to the sp_summary dataframe
<- left_join(sp_summary, tmp)
sp_summary
## Occupancy
# We use the mutate function to mutate the column
<- total_obs %>% # The total obs dataframe
total_binary mutate(across(sp_summary$sp, ~+as.logical(.x))) # across all of the species columns, make it binary
# Flip the dataframe to longer - as before
<- total_binary %>%
long_bin pivot_longer(cols=sp_summary$sp, names_to="sp", values_to = "count") # Takes the species names columns, and makes them unique rows with "sp" as the key
# We can now sum the presence/absences and divide by the number of survey locations
<- long_bin %>%
tmp group_by(sp) %>%
summarise(occupancy=sum(count)/nrow(locs)) # divided the sum by the number of sites
# add the results to the sp_summary
<- left_join(sp_summary, tmp)
sp_summary
###########################
# Comparison plot
# Lets put the dataframes in a sensible order
<- sp_summary[order(sp_summary$count),]
sp_summary
<- list(categoryorder = "array",
yform categoryarray = sp_summary$sp)
<- list(title="Captures")
xform
# Capture rate
<- plot_ly(x = sp_summary$count, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>%
fig1 layout(yaxis = yform, xaxis=xform)
<- list(categoryorder = "array",
yform categoryarray = sp_summary$sp,
showticklabels=F)
<- list(title="Occupancy")
xform
# Occupancy
<- plot_ly(x = sp_summary$occupancy, y = sp_summary$common_name, type = 'bar', orientation = 'h') %>%
fig2 layout(yaxis = yform, xaxis=xform)
subplot(nrows=1,fig1, fig2, titleX = T) # We could stack them on top of one another using nrows=2
QUESTION What does this tell you about the behavior of the target species?
# Temporal patterns
<- read.csv(paste0("data/processed_data/",pro$project_id[1],"_30min_independent_monthly_observations.csv"), header=T)
mon_obs
# Count up the number of stations and the number of camera nights
<- mon_obs %>% # Use the monthly observations dataframe
mon_summary group_by(date) %>% # Group by the date
summarise(locs_active=n(), # Count the number of active cameras
cam_days=sum(days)) # And sum the active days
# Add in the species specific counts - and join it with the mon_summary dataframe
<- mon_obs %>%
mon_summary group_by(date) %>%
summarise(across(sp_summary$sp, sum, na.rm=TRUE)) %>% # summarise across all of
# the species columns
left_join(x=mon_summary) # Join with the mon_summary dataframe
# We first need to convert the date column to a date object
$date <- ym(mon_summary$date)
mon_summary
# Set up a two panel plot (side by side)
par(mfrow=c(1,2))
plot(mon_summary$date, mon_summary$locs_active,
type="o",
pch=19,
ylim=c(0, max(mon_summary$locs_active)),
las=1,
ylab="Number of cameras active", xlab="Date")
# Sum all the captures rates for the species columns
$all.sp <- rowSums(mon_summary[, sp_summary$sp])
mon_summary
# Plot them
plot(mon_summary$date, mon_summary$all.sp/(mon_summary$cam_days/100),
type="o",
pch=19,
las=1, ylab="Detections per 100 cam days", xlab="Date")
Question What is going on in these plots?
# Species specific plots
par(mfrow=c(2,2))
<- 1
i for(i in 1:length(sp_summary$sp))
{plot(mon_summary$date, pull(mon_summary, sp_summary$sp[i])/(mon_summary$cam_days/100), # The pull command allows you to grab a specific column in a dataframe and turn it into a vector!
type="o",
pch=19,
las=1, ylab="Detections per 100 cam days", xlab="Date",
main=sp_summary$sp[i])
}
Question Any interesting patterns?
# Spatial plots
<- left_join(total_obs, locs)
total_obs
# Jaguar
<- "Panthera.onca"
focal_species
<- pull(total_obs, focal_species)/(total_obs$days/100)
focal_cr
<- leaflet() %>%
m addProviderTiles(providers$Esri.WorldTopoMap, group="Base") %>%
addCircleMarkers(lng=locs$longitude, lat=locs$latitude,
# Add a popup of the deployment code
popup=paste(locs$placename),
radius=(focal_cr/max(focal_cr)*10)+1, stroke=F,
fillOpacity=0.6)
m
Question Any interesting patterns?
# Co-occurances
par(mfrow=c(1,1))
# Pull the data for each of the species from
<- total_obs[, sp_summary$sp]
tmp <- cor(tmp)
M
corrplot(M, method="color",
type="upper",
order="hclust",
# addCoef.col = "black", # We suppress the coefs to make a cleaner plot
tl.col="black", tl.srt=45, #Text label color and rotation
diag=FALSE
)
Question Any interesting patterns?
You can now do some data exploration plots.
# Prepare the data
<- locs %>%
locs mutate_if(is.character,as.factor) # If a column is a character string, make it a factor
<- left_join(total_obs, locs) total_obs
We have the following species:
$sp sp_summary
And the following categories to explore:
c("feature_type", "elevation", "mean_ndvi")