# Chapter 2 Analysis

## 2.1 Dataset

``````library(tidyverse)
library(lubridate)

filter(latitude>0) %>%
mutate(month = month(dtRecorded),
year = year(dtRecorded),
semester = semester(dtRecorded),
quater = quarter(dtRecorded),
index = (((year-2013)*12) + month)-9,
index_sem = (((year-2013)*2) + semester)-1,
index_q = (((year-2013)*4) + quater)-3,
out_bound = ifelse(latitude>48|latitude<20|longitude<(-123)|longitude>(-80),1,0))

dat.n <- dat %>%
dplyr::select(patientID, longitude, latitude) %>%
nest(-patientID) %>%
mutate(n = map(data, ~nrow(.)),
uniq = map(data, ~nrow(unique(.)))) %>%
filter(n>2 & uniq>2) # individuals with enought sample size``````

## 2.2 Example single Participant

### 2.2.1 Hierarchical Clustering

``````library(factoextra)
source("./_data/vdot_helpers.R")

a <- dat.n\$data[dat.n\$patientID==100][[1]]

a %>%
ggplot() +
geom_point(aes(longitude, latitude), size = 3, alpha = .5) +
theme_bw()``````

``n.clust(a, method = "silhouette", full=T)``
``````## \$`Optimal k`
## [1] 2
##
## \$`Plot optimization```````

``````##
## \$`Cluster Plot```````

### 2.2.2 Spatial Clustering

``````library(sp)
library(rgdal)
library(geosphere)

n.clust.sp(a, full=T)``````
``````## \$K
## [1] 13
##
## \$`Cluster Plot```````

``n.clust.sp(a, full=T, d = 10000)``
``````## \$K
## [1] 6
##
## \$`Cluster Plot```````

``n.clust.sp(a, full=T, d = 200000)``
``````## \$K
## [1] 3
##
## \$`Cluster Plot```````