# Clusters
n.clust <- function(dat, method = c("silhouette","gap_stat"), full) {
if (nrow(unique(dat))>20) {
k<-20
} else{
k<-nrow(unique(dat))-1
}
b<-fviz_nbclust(dat, kmeans, method = method, k.max = k)
if (method == "gap_stat") {
c<-b$layers[[4]]$data %>% as.numeric()
} else{
c<-b$layers[[3]]$data %>% as.numeric()
}
k <- kmeans(dat, centers = c, nstart = 25)
p <- fviz_cluster(k, data = dat)
if (isTRUE(full)) {
a <- list(c,b,p)
names(a) <- c("Optimal k", "Plot optimization", "Cluster Plot")
} else{
a <- c
}
return(a)
}
# Spatial
library(sp)
library(rgdal)
library(geosphere)
library(cowplot)
library(scales)
library(stringr)
library(Imap)
#library(enmSdm)
n.clust.sp <- function(dat, d = 50, full, df=F, method="single") {
a <- matrix(c(dat$longitude,dat$latitude), ncol=2)
# convert data to a SpatialPointsDataFrame object
xy <- SpatialPoints(a,
proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))
# use the distm function to generate a geodesic distance matrix in meters
mdist <- distm(xy)
#mdist2 <- pointDist(xy)
# cluster all points using a hierarchical clustering approach
hc <- hclust(as.dist(mdist), method=method)
# define clusters based on a tree "height" cutoff "d" and add them to the SpDataFrame
xy$clust <- cutree(hc, h=d)
DF <- as.data.frame(xy)
c<-max(DF$clust)
p <- DF %>%
ggplot() +
geom_point(aes(coords.x1, coords.x2, col = factor(clust)), alpha=.6) +
theme_bw() +
labs(col="K") +
theme(legend.position = "bottom")
g <- DF %>%
ggplot() +
geom_point(aes(coords.x1, coords.x2, col = factor(clust))) +
theme_bw(base_size = 5) +
labs(title = paste0("Clusters within ",d," m [k-specific]")) +
guides(col=F) +
scale_y_continuous(labels= scales::label_number(.1)) +
scale_x_continuous(labels= scales::label_number(.1)) +
facet_wrap(.~clust, scales = "free")
t<-plot_grid(p, g, labels = c('A', 'B'), label_size = 12, rel_widths = c(2,5))
if (isTRUE(full)) {
a <- list(c,t)
names(a) <- c("K", "Cluster Plot")
} else{
a <- c
}
if (isTRUE(df)) {
z <- DF
} else{
z <- a
}
return(z)
}
mfl <- function(dat, method = "random") {
dat %>%
group_by(clust) %>%
summarise(x1=mean(coords.x1),x2=mean(coords.x2), loc = n()) %>%
ungroup() %>%
mutate(mloc=max(loc),
mfl1 = ifelse(loc==mloc,1,0),
mfl2 = rank(-mfl1,ties.method = method),
mfl = ifelse(mfl1==mfl2,1,0)) %>%
dplyr::select(-mloc, -mfl1, -mfl2)
}
geodesic.dist <- function(d) {
t <- d %>%
dplyr::select(clust, loc) %>%
rename(to_cluster = clust)
a <- matrix(c(d$x1,d$x2), ncol=2)
# convert data to a SpatialPointsDataFrame object
xy <- SpatialPoints(a,
proj4string=CRS("+proj=longlat +ellps=WGS84 +datum=WGS84"))
# use the distm function to generate a geodesic distance matrix in meters
mdist <- as.data.frame(distm(xy)) %>%
mutate(mfl = d$mfl) %>%
filter(mfl==1) %>%
dplyr::select(-mfl) %>%
gather(to_cluster, distance_m) %>%
mutate(to_cluster = as.numeric(str_remove(to_cluster, "V")),
dist_cat = cut(distance_m, breaks = c(-Inf,0,65000,500000,Inf), label = c("base","local","mid","distant"))) #%>%
#filter(distance_m!=0)
# Great-circle dist
lo <- d %>% filter(mfl==1) %>% dplyr::select(x1) %>% unlist()
la <- d %>% filter(mfl==1) %>% dplyr::select(x2) %>% unlist()
d2 <- d %>%
mutate(lo = lo, la = la,
GC_dist = gdist(x1,x2,lo,la, units = "m"),
GC_cat = cut(GC_dist, breaks = c(-Inf,0,65000,500000,Inf), label = c("base","local","mid","distant"))) %>%
dplyr::select(GC_dist, GC_cat) #%>%
#filter(GC_dist!=0)
dist <- mdist %>%
bind_cols(d2) %>%
inner_join(t, by="to_cluster")
return(dist)
}
dist_s <- function(gd) {
gd %>%
group_by(dist_cat) %>%
summarise(avg_gd = mean(distance_m),
min_gd = min(distance_m),
max_gd = max(distance_m),
sd_gd = sd(distance_m),
avg_GC = mean(GC_dist),
min_GC = min(GC_dist),
max_GC = max(GC_dist),
sd_GC = sd(GC_dist),
freq = sum(loc),
n_clust = n())
}
plot.pattern <- function(d) {
d %>%
ggplot(aes(distance_m, loc, col = dist_cat, size = loc)) +
geom_point(alpha=.5) +
labs(y = "freq", size = "freq", x = "distance (m)", col = "distance \ncategory") +
scale_x_log10(labels = label_comma()) +
scale_y_log10() +
geom_vline(data = tibble(xi=c(65000,500000)),
aes(xintercept=xi), linetype = "dashed") +
geom_hline(data = tibble(yi=c(10,50)),
aes(yintercept=yi), linetype = "dashed") +
theme_bw()
}