Supplementary Material (R code and figures) for the occupancy analysis of Sambar Deer
Read in key data:
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
cache = TRUE,
message = FALSE,
warning = FALSE,
cache = FALSE
)
mapview::mapviewOptions(fgb = FALSE)
library(deervic)
library(galah)
library(dbplyr)
library(dplyr)
library(sf)
library(DBI)
library(VicmapR)
library(lme4)
library(dismo)
library(raster)
library(terra)
library(Hmisc)
library(kableExtra)
library(data.table)
library(ubms)
library(terra)
library(dismo)
library(galah)
library(SDMtune)
library(tibble)
library(tidyterra)
library(ROCR)
library(brms)
library(ggtext)
galah_config(email = "justin.cally@delwp.vic.gov.au")
options(mc.cores=4)
source("/Users/justincally/Documents/Work/R Repositories/blog/R/themes.R")
source("/Users/justincally/Documents/Work/R Repositories/blog/R/interaction_function.R") # some helpful functions for ubms
delwp_cols <- c(Teal = "#00B2A9",
Navy = "#201547",
Environment = "#CEDC00",
`Climate Change` = "#FDDA24",
Energy = "#0072CE",
Water = "#71C5E8",
Planning = "#642667",
Infrastructure = "#AF272F",
FFR = "#E57200",
Corporate = "#201547")
delwp_cols2 <- c(delwp_cols, "#737373", "#00B2A9")
# delwp_cols_seq <- lapply(delwp_cols, function(x) {
# tinter::tinter(x, direction = "tints", steps = 10)
# })
# Restrict the area of interest in this study to Victoria by pulling down Victoria BBOX and neighbouring LGAs
vic_bbox <- VicmapR::vicmap_query("datavic:VMLITE_LGA") %>%
collect() %>%
sf::st_bbox()
vic_bound <- vicmap_query("datavic:VMLITE_VICTORIA_POLYGON_SU5") %>%
filter(STATE == "VIC") %>% # Philip Island and French Island
collect() %>%
st_transform(3111)
# Select deer species of interest
deer_species <- c("Dama dama", "Rusa unicolor", "Cervus elaphus")
# Connect to database
con <- RPostgreSQL::dbConnect(RPostgres::Postgres(),
host = '10.110.7.201',
dbname = 'ari-dev-weda-psql-01',
user = "psql_admin",
password = keyring::key_get(service = "ari-dev-weda-psql-01", username = "psql_admin"),
port = 5432,
service = NULL,
list(sslmode = "require"))
# Pull down presence absence database vies and filter for species of interest
curated_presence_absence_species_data <- tbl(con, dbplyr::in_schema(schema = "deervic",
table = "curated_presence_absence_species_data")) %>%
filter(Species %in% deer_species) %>%
collect() %>%
rename(SiteID = Site) %>%
mutate(SiteID = as.numeric(SiteID)) %>%
filter(SiteID != 11084)
# Pull down site variable information
curated_site_data <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_data")) %>%
collect() %>%
st_as_sf(coords = c("Longitude", "Latitude"), crs = 4283) %>%
filter(SiteID != 11084)
# Pull down grids of sites selected but only select the watercourse distance as that was precisely measured
# All data apart from distance to water is obtained from the raster grid (50m resolution). However for the distance to water we use a more precise Euclidian distance from sf layers. Noting that any distance below 10m was rounded to 10m (in some cases this may be applicable to swampy areas), however for projection reasons they should be discounted (otherwise projections would include water bodies)
spatial_data_wc <- sf::st_read(dsn = con,
layer = Id(schema = "deervic", table = "curated_combined_spatial_data")) %>%
dplyr::select(SiteID, DistancetoWater = WatercourseDistanceClipped) %>%
sf::st_drop_geometry()
# read in spatial data rasters
processed_tifs <- list.files("/Volumes/DeerVic\ Photos/Processed_Rasters", full.names = TRUE)[stringr::str_detect(list.files("/Volumes/DeerVic\ Photos/Processed_Rasters"), ".tif$")]
processed_stack <- terra::rast(processed_tifs)
combined_spatial_data <- bind_cols(curated_site_data %>%
dplyr::select(SiteID),
terra::extract(x = processed_stack, terra::vect(curated_site_data %>%
sf::st_transform(3111))) %>%
dplyr::select(-ID)) %>%
# left_join(combined_spatial_data %>% dplyr::select(SiteID, WatercourseDistance) %>% st_drop_geometry()) %>%
mutate(DeerHunt = as.factor(DeerHunt),
SambarHunt = as.factor(DeerHunt))
# dplyr::select(-DistancetoWater)
# dplyr::left_join(spatial_data_wc, by = "SiteID")
combined_spatial_data <- combined_spatial_data %>%
mutate(DistancetoWater = case_when(DistancetoWater == 0 ~ 10,
TRUE ~ DistancetoWater))
combined_spatial_data[combined_spatial_data$SiteID == 44911 | is.na(combined_spatial_data$LANDMANAGER),]$LANDMANAGER <- "OTHER"
detection_history <- deervic::presence_absence_daily(con, species = c("Rusa unicolor", "Dama dama", "Cervus elaphus"))
curated_site_daily_temp <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_temp")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
curated_site_daily_precip <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_precip")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
curated_site_daily_day <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_day")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
curated_site_daily_understory <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_understory")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
curated_site_daily_woody_understory <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_woody_understory")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
curated_site_daily_herbaceous_understory <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "curated_site_daily_herbaceous_understory")) %>%
collect() %>%
as.data.frame() %>%
`rownames<-`(.$SiteID) %>%
dplyr::select(-SiteID)
combined_model_data <- curated_presence_absence_species_data %>%
left_join(curated_site_data %>% sf::st_drop_geometry(), by = "SiteID") %>%
left_join(combined_spatial_data, by = "SiteID")
combined_model_data_wide <- curated_presence_absence_species_data %>%
data.table::as.data.table() %>%
data.table::dcast(SiteID ~ Species, value.var = "Presence") %>%
left_join(curated_site_data %>% sf::st_drop_geometry(), by = "SiteID") %>%
left_join(combined_spatial_data, by = "SiteID") %>%
dplyr::mutate(UnderstoryCover = NWUCover + NNWHUCover + EWUCover + ENWHUCover) %>%
filter(SiteID != 11084) # Murray Sunset not picked up cam
regions <- vicmap_query("datavic:VMADMIN_DELWP_REGION") %>%
collect()
options(vicmap.chunk_limit = 15000)
statewide_water <- vicmap_query("datavic:VMLITE_HY_WATERCOURSE") %>%
collect() %>%
sf::st_transform(3111)
options(vicmap.chunk_limit = 1500)
regions_3111 <- regions %>%
sf::st_transform(3111) %>%
sf::st_simplify(500, preserveTopology = TRUE)
statewide_water_simp <- statewide_water %>%
filter(FEATURE_TYPE_CODE == "watercourse_river") %>%
sf::st_simplify(500, preserveTopology = TRUE) %>%
sf::st_intersection(regions_3111)
# One site was placed next to state forest (but technically out of SF)
# Kernel density function
st_kde <- function(points,cellsize, bandwith, extent = NULL){
if(is.null(extent)){
extent_vec <- sf::st_bbox(points)[c(1,3,2,4)]
} else{
extent_vec <- sf::st_bbox(extent)[c(1,3,2,4)]
}
n_y <- ceiling((extent_vec[4]-extent_vec[3])/cellsize)
n_x <- ceiling((extent_vec[2]-extent_vec[1])/cellsize)
extent_vec[2] <- extent_vec[1]+(n_x*cellsize)-cellsize
extent_vec[4] <- extent_vec[3]+(n_y*cellsize)-cellsize
coords <- sf::st_coordinates(points)
matrix <- MASS::kde2d(coords[,1],coords[,2],h = bandwith,n = c(n_x,n_y),lims = extent_vec)
raster::raster(matrix)
}
nrd_new <- function (x)
{
r <- quantile(x, c(0.25, 0.75))
h <- (r[2L] - r[1L])/1.34
4 * 1.06 * min(sqrt(var(x)), h, na.rm = T) * length(x)^(-1/5)
}
For each species we develop a smoothed kernel estimate of their distribution based on presence-only reecords from the past 30 years. This kernel density estimate uses a bandwith determined via the normal reference distribution and is configured so that the species distribution contains 99.5% of presence records (Gormley et al. 2011).
area <- vic_bound %>%
sf::st_union() %>%
sf::st_buffer(25000) %>%
sf::st_simplify(dTolerance = 10000) %>%
sf::st_transform(4283) %>%
sf::st_as_sf()
if(!file.exists("data/historic_deer_ala_records.rds")) {
historic_deer_ala_records <- galah_call() %>%
galah_identify(c("Cervus unicolor", "Dama dama", "Cervus elaphas")) %>%
galah::galah_geolocate(area) %>%
galah_filter(year >= 1992) %>%
atlas_occurrences() %>%
filter(!is.na(decimalLongitude) & !is.na(decimalLatitude)) %>%
sf::st_as_sf(coords = c("decimalLongitude", "decimalLatitude"), crs = 4283) %>%
mutate(scientificName = case_when(scientificName == "Dama dama dama" ~ "Dama dama",
scientificName == "Cervus unicolor" ~ "Rusa unicolor",
TRUE ~ scientificName))
saveRDS(historic_deer_ala_records, "data/historic_deer_ala_records.rds")
} else {
historic_deer_ala_records <- readRDS("data/historic_deer_ala_records.rds")
}
smoothed_kernel_species <- function(sr, all_area = vic_bound) {
hist_3111 <- sr %>%
sf::st_transform(3111)
points_kde <- st_kde(hist_3111,
cellsize = 1000,
bandwith = nrd_new(hist_3111 %>% sf::st_coordinates()),
extent = terra::ext(all_area))
pred_vals <- raster::extract(points_kde, hist_3111)
q995 <- quantile(pred_vals, 0.001, na.rm = T)
points_kde_cut <- points_kde
terra::values(points_kde_cut)[terra::values(points_kde_cut) >= q995] <- as.factor("1")
terra::values(points_kde_cut)[terra::values(points_kde_cut) < q995] <- NA
points_kde_cut <- terra::rast(points_kde_cut)
points_kde_cut <- terra::crop(points_kde_cut, terra::vect(vic_bound), mask = TRUE)
points_kde_cut
}
split_deer_records <- split(historic_deer_ala_records, historic_deer_ala_records$scientificName)
deer_kde <- lapply(split_deer_records, smoothed_kernel_species) %>%
terra::rast()
names(deer_kde) <- paste(names(deer_kde), "distribution")
At each site a camera was deployed and 3 transects were walked in order to detect deer signs (pellets, rubbings, footprints and wallows). Below we show sites that had at least one sign of deer and whether they were detected using the various methods. Generally, cameras appear to perform the best but do not appear to have 100 % detection probability.
transect_detection <- curated_presence_absence_species_data %>%
left_join(curated_site_data) %>%
group_by(SiteID) %>%
summarise(`Camera` = sum(Presence),
Pellets = sum(Pellets),
Wallows = sum(Wallows),
Rubbings = sum(Rubbings),
Footprints = if(any(Footprints == "Observed")) 1 else 0) %>%
mutate(across(c(`Camera`, Pellets, Wallows, Rubbings, Footprints),
~ case_when(. > 0 ~ "Detected",
. == 0 ~ "Not Detected")))
# transect_detection[transect_detection$SiteID %in% c(4905, 4257, 30755), "Pellets"] <- "Not Detected"
transect_detection_long <- transect_detection %>%
tidyr::gather(key="Method", value="Presence", -SiteID) %>%
mutate(SiteID = factor(SiteID)) %>%
arrange(Method, desc(Presence)) %>%
group_by(SiteID) %>%
filter(any(Presence == "Detected"))
transect_plot <- ggplot(transect_detection_long, aes(x=Method, y=SiteID, fill=Presence))+
geom_tile(colour="white", size=0.2) +
scale_fill_manual(na.value = "transparent", values = c(Detected = unname(delwp_cols["Navy"]), `Not Detected` = unname(delwp_cols["Environment"]))) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90))
transect_plot
Because we did not assign species to the deer sign records in the field or with genetic analysis we have to attempt to do it logically. Below we show the code required to assign deer sign to one of the given species based on the following logic:
For a given detection (1), we assign that observation to a given species based on the following conditions:
# Format other deer signs as a transect based detection method:
## Pellets
## Footprints
## Rubbings
## Wallows
raw_transects <- tbl(con, dbplyr::in_schema(schema = "deervic", table = "raw_site_transects")) %>%
collect() %>%
na.omit()
day1vars <- data.frame(SiteID = as.numeric(rownames(curated_site_daily_herbaceous_understory)),
HerbaceousUnderstoryCover = curated_site_daily_herbaceous_understory[,1],
DaysSinceDeploy = 1L,
MaxTemp = curated_site_daily_temp[,1],
Precipitation = curated_site_daily_precip[,1]) %>%
na.omit()
transects_long <- raw_transects %>%
group_by(Data_File_Id, I0_TransectNo) %>%
summarise(Pellet = if(sum(I0_Pellets) > 0) 1 else 0,
Footprint = if(any(I0_Footprints == "Observed")) 1 else 0,
Rubbing = if(sum(I0_Rubbings) > 0) 1 else 0,
Wallow = if(sum(I0_Wallows) > 0) 1 else 0) %>%
ungroup() %>%
left_join(curated_site_data[, c("SiteID", "Data_File_Id")] %>%
sf::st_drop_geometry(), by = "Data_File_Id") %>%
dplyr::select(-Data_File_Id) %>%
dplyr::select(SiteID, everything()) %>%
full_join(day1vars, by = "SiteID") %>%
left_join(curated_presence_absence_species_data %>%
as.data.table() %>%
dcast(SiteID ~ Species), by = "SiteID") %>%
as.data.table() %>%
filter(!is.na(SiteID))
# Assign detections to species based on:
# species seen on camera = 1
# species not seen on camera and another species seen = 0
# species not seen on camera and no others seen, but within KDE distribution = 1
# species not seen on camera, and not within KDE distribution = 0
hif_hany <- function(..., values) {
mtx <- do.call(cbind, list(...))
mtx <- array(mtx %in% values, dim = dim(mtx))
rowSums(mtx) > 0
}
detection_assign <- function(sp, species, othersp) {
sp.d <- paste(species, "distribution")
sf_m <- dplyr::mutate(sp, across(c(Pellet, Footprint, Rubbing, Wallow),
~ dplyr::case_when(.x == 0 ~ 0,
.x == 1 & !! sym(species) == 1 ~ 1,
.x == 1 & !! sym(species) != 1 & hif_hany(!!! syms(othersp), values = 1) ~ 0,
.x == 1 & !! sym(species) != 1 & !hif_hany(!!! syms(othersp), values = 1) & !! sym(sp.d) == 1 ~ 1,
.x == 1 & !! sym(species) != 1 & !! sym(sp.d) != 1 ~ 0)))
transects_detection <- sf_m %>%
as.data.table() %>%
dcast(SiteID ~ I0_TransectNo, value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
return(transects_detection)
}
species_overlap <- bind_cols(curated_site_data[, "SiteID"], terra::extract(deer_kde, vect(curated_site_data %>% sf::st_transform(3111)))) %>%
sf::st_drop_geometry() %>%
dplyr::select(-ID)
species_overlap[is.na(species_overlap)] <- 0
transects_sp <- left_join(transects_long, species_overlap, by = "SiteID")
Damadama_Detection <- detection_assign(transects_sp, "Dama dama", othersp = c("Rusa unicolor", "Cervus elaphus"))
Rusaunicolor_Detection <- detection_assign(transects_sp, "Rusa unicolor", othersp = c("Dama dama", "Cervus elaphus"))
Cervuselaphus_Detection <- detection_assign(transects_sp, "Cervus elaphus", othersp = c("Dama dama", "Rusa unicolor"))
transects_method_long <- transects_long
for(m in c("Pellet", "Footprint", "Rubbing", "Wallow")) {
transects_method_long[[m]] <- m
}
transects_method <- dcast(transects_method_long, SiteID ~ I0_TransectNo,
value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
transects_HerbaceousUnderstoryCover <- transects_long %>%
mutate(across(c("Pellet", "Footprint", "Rubbing", "Wallow"), ~ HerbaceousUnderstoryCover)) %>%
dcast(SiteID ~ I0_TransectNo, value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
transects_DaysSinceDeploy <- transects_long %>%
mutate(across(c("Pellet", "Footprint", "Rubbing", "Wallow"), ~ DaysSinceDeploy)) %>%
dcast(SiteID ~ I0_TransectNo, value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
transects_MaxTemp <- transects_long %>%
mutate(across(c("Pellet", "Footprint", "Rubbing", "Wallow"), ~ MaxTemp)) %>%
dcast(SiteID ~ I0_TransectNo, value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
transects_Precipitation <- transects_long %>%
mutate(across(c("Pellet", "Footprint", "Rubbing", "Wallow"), ~ Precipitation)) %>%
dcast(SiteID ~ I0_TransectNo, value.var = c("Pellet", "Footprint", "Rubbing", "Wallow")) %>%
column_to_rownames(var = "SiteID")
curated_site_daily_method <- curated_site_daily_day
curated_site_daily_method[!is.na(curated_site_daily_method)] <- "Camera"
# Combine the detection histories
siteorder <- as.character(combined_model_data_wide$SiteID)
detection_history_joined <- detection_history
detection_history_joined$`Rusa unicolor`$detection_history <- cbind(detection_history_joined$`Rusa unicolor`$detection_history[siteorder,],
Rusaunicolor_Detection[siteorder,])
detection_history_joined$`Dama dama`$detection_history <- cbind(detection_history_joined$`Dama dama`$detection_history[siteorder,],
Damadama_Detection[siteorder,])
detection_history_joined$`Cervus elaphus`$detection_history <- cbind(detection_history_joined$`Cervus elaphus`$detection_history[siteorder,],
Cervuselaphus_Detection[siteorder,])
# Combine the other detection variables
joined_HerbaceousUnderstoryCover <- cbind(curated_site_daily_herbaceous_understory[siteorder,],
transects_HerbaceousUnderstoryCover[siteorder,])
joined_DaysSinceDeploy <- cbind(curated_site_daily_day[siteorder,],
transects_DaysSinceDeploy[siteorder,])
joined_MaxTemp <- cbind(curated_site_daily_temp[siteorder,],
transects_MaxTemp[siteorder,])
joined_Precipitation <- cbind(curated_site_daily_precip[siteorder,],
transects_Precipitation[siteorder,])
joined_Method <- cbind(curated_site_daily_method[siteorder,],
transects_method[siteorder,])
# update combined_model_data
bound_detections <- bind_rows(detection_history_joined)
bound_presence <- data.frame(SiteID = rep(siteorder, times = length(detection_history_joined)),
Species = rep(names(detection_history_joined), each = length(siteorder)),
Presence = rowSums(bound_detections, na.rm = T)) %>%
mutate(Presence = case_when(Presence > 0 ~ 1,
TRUE ~ 0),
SiteID = as.numeric(SiteID))
combined_model_data_allMethod <- left_join(combined_model_data %>%
dplyr::select(-Presence),
bound_presence, by = c("SiteID", "Species"))
Below we view the distribution of sites sampled as well as the convex hull we used to influence sampling probability. The convex hull represents areas with a deer record within 50km and within the past 10 years.
deer_polygon <- readRDS("data/deer_polygon.rds")
crownland_mask <- terra::rast("/Volumes/DeerVic\ Photos/MaxentStack/maxent_stack_masked_cl.grd")[[1]]
crownland <- crownland_mask
crownland_ag <- terra::spatSample(x = crownland, size = 100000, method = "regular", as.raster = TRUE)
values(crownland_ag)[!is.na(values(crownland_ag))] <- 1
site_plot <- ggplot() +
geom_sf(data = regions_3111, fill = "grey", inherit.aes = FALSE, alpha = 0.2, colour = "grey70") +
tidyterra::geom_spatraster(data = crownland_ag, aes(fill = BIO04), alpha = 0.5, na.rm = T, show.legend = FALSE) +
geom_sf(data = deer_polygon, fill = "DarkGreen", inherit.aes = FALSE, alpha = 0.5, colour = "grey70") +
geom_point(aes(x = X, y = Y), size = 2.5, shape = 21, fill = "Orange", inherit.aes = FALSE,
data = combined_spatial_data %>%
st_transform(3111) %>%
st_coordinates() %>%
as.data.frame()) +
scale_fill_continuous(na.value = "transparent") +
ylab("Latitude") +
xlab("Longitude") +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = -1),
legend.title = element_text(size=24),
legend.text = element_text(size=18),
legend.key.size = unit(7, 'cm')) +
theme_bw()
site_plot
Figure 1: Locations of sites sampled across Victoria (2021 – 2022). The Orange points represent the sites visited, with the blue area being the crown land sampling was restricted to. The green area is the convex hull bounding area, where a deer record from the previous 10 years (since 2011) has occurred. This area thus received a five-fold increased inclusion probability in comparison to the area outside of it. Notably, Murray-Sunset National Park, Wyperfeld National Park and Hattah-Kulkyne National Park. Thus, the likelihood of selecting sites outside of the green area was five times less than within the green area (Balanced Acceptance Sampling considerations/methods applied).
combined_model_data_allMethod_loc <- cbind(combined_model_data_allMethod,
sf::st_transform(combined_model_data_allMethod %>%
sf::st_as_sf(), 3111) %>% sf::st_coordinates()) %>%
mutate(Presence = case_when(Presence == 1 ~ "Present",
TRUE ~ "Absent"),
Species = case_when(Species == "Rusa unicolor" ~ "Sambar Deer",
Species == "Cervus elaphus" ~ "Red Deer",
Species == "Dama dama" ~ "Fallow Deer")) %>%
arrange(Presence)
state_plot <- ggplot() +
geom_sf(data = regions_3111, fill = "grey", inherit.aes = FALSE, alpha = 0.2, colour = "grey70") +
geom_point(aes(x = X, y = Y, fill = Presence), size = 2, shape = 21, data = combined_model_data_allMethod_loc, inherit.aes = FALSE) +
scale_fill_manual(values = c(Absent = delwp_cols[["Environment"]], Present = delwp_cols[["Teal"]])) +
ylab("Latitude") +
xlab("Longitude") +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
axis.text.x = element_text(hjust = -1),
legend.title = element_text(size=24),
legend.text = element_text(size=18),
legend.key.size = unit(7, 'cm')) +
facet_wrap(~ Species, nrow = 2) +
theme_bw()
shift_legend2(state_plot)
Figure 2: Presence/Absence map of species across Victoria. Presence of deer species was based on camera data or deer signs at the site (footprints, pellets, rubbings and wallows)
We collected a suite of vegetation metrics in the field that may relate to deer impacts and/or presence. Below we briefly visualise the correlation between the measure and the presence of deer.
anydeer <- transect_detection %>%
tidyr::gather(key="Method", value="Presence", -SiteID) %>%
mutate(SiteID = factor(SiteID)) %>%
arrange(Method, desc(Presence)) %>%
group_by(SiteID) %>%
summarise(DeerSign = if(any(Presence == "Detected")) "Present" else "Absent") %>%
ungroup()
vegetation <- curated_site_data %>%
sf::st_drop_geometry() %>%
dplyr::select(SiteID,
NWUCover,
NNWHUCover,
EWUCover,
ENWHUCover,
BGroundCover,
TopHeight,
CanopyCov,
Saplings,
Seedlings) %>%
mutate(SiteID = as.character(SiteID)) %>%
inner_join(anydeer %>%
mutate(SiteID = as.character(SiteID)),
by = "SiteID") %>%
mutate(Exotics = case_when(EWUCover + ENWHUCover > 0 ~ 1,
TRUE ~ 0),
ExoticSum = EWUCover + ENWHUCover)
vegetation_long <- vegetation %>%
as.data.table() %>%
melt(id.vars = c("SiteID", "DeerSign"))
# Vegetation Plot
veg1 <- vegetation_long %>%
filter(!(variable %in% c("EWUCover", "ENWHUCover", "Saplings", "Seedlings", "Exotics", "ExoticSum"))) %>%
ggplot(aes(x = DeerSign, y = value)) +
geom_boxplot() +
facet_wrap(~variable, scales = "free", nrow = 5) +
theme_bw()
veg2 <- vegetation_long %>%
filter((variable %in% c("Saplings", "Seedlings", "ExoticSum"))) %>%
ggplot(aes(x = DeerSign, y = value)) +
geom_boxplot() +
scale_y_sqrt(limits = c(0,100)) +
facet_wrap(~variable, scales = "free", nrow = 3) +
theme_bw()
# veg3 <- ggcorrplot::ggcorrplot(cor(vegetation %>%
# mutate(DeerSign = case_when(DeerSign == "Present" ~ 1L,
# TRUE ~ 0L)) %>%
# dplyr::select(DeerSign, Exotics)),
# hc.order = TRUE,
# type = "lower",
# lab = TRUE)
cowplot::plot_grid(veg1, veg2, labels = "", ncol = 2)
Figure 3: Relation between deer presence and various vegetation metrics
vegetation_long %>%
filter(!(variable %in% c("Exotics", "EWUCover", "ENWHUCover"))) %>%
mutate(variable = as.character(variable)) %>%
mutate(variable = case_when(variable == "NWUCover" ~ "Native Woody Understorey (%)",
variable == "NNWHUCover" ~ "Native Herbaceous Understorey (%)",
variable == "BGroundCover" ~ "Bare Ground (%)",
variable == "TopHeight" ~ "Top Height (m)",
variable == "CanopyCov" ~ "Canopy Cover (%)",
variable == "ExoticSum" ~ "Exotics (%)",
TRUE ~ variable)) %>%
arrange(variable) %>%
mutate(value = case_when(value > 250 ~ 250,
TRUE ~ value)) %>%
ggplot(aes(y = value, x = DeerSign, fill = DeerSign)) +
ggbeeswarm::geom_quasirandom(shape = 21) +
geom_violin(draw_quantiles = c(0.5), size = 1, alpha = 0.5) +
facet_wrap(~variable, ncol = 4, scales = "free") +
scale_fill_manual(values = c(`Absent` = delwp_cols[["Environment"]],
`Present` = delwp_cols[["Navy"]])) +
theme_classic()
Based on the plots above, there only appear to be two of-interest relationships for this analysis (at the given time and in regards to occupancy). The first being canopy cover may be higher when deer are present; which may reflect Sambar deer preference for forested environments. The second is native non-woody herbaceous understorey; which may promote increased browsing around the camera or detectibility distance.
We undertake several data checks before modelling to assure certain assumptions (e.g. colinearity and normality) are not violated.
Covariates that are strongly correlated may proove to be an issue for fitting models. We assess the covariation in predictor variables at the sites includided in this analysis (not including factors):
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- abs(cor(x, y, use="pairwise.complete.obs"))
txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- paste0(prefix, txt)
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
text(0.5, 0.5, txt, cex = cex.cor * r)
}
pairs(combined_model_data_wide %>%
dplyr::select(BIO01,
BIO04,
BIO06,
BIO12,
BIO15,
NPP,
TreeDensity,
BareSoil,
PhotosyntheticVeg,
NonPhotosyntheticVeg,
Elevation,
TWIND,
DistancetoWater,
CrownGrazingDistance,
DeerControl,
DeerHunt,
SambarHunt,
MRVBF,
Nitrogen,
PastureDistance,
SLOPE), lower.panel=panel.smooth, upper.panel=panel.cor)
Due to the correlation of predictors we remove:
+ Elevation
+ BIO06
+ PhotosyntheticVeg
+ NPP
+ Nitrogen
+ Only use DeerHunt
or SambarHunt
combined_model_data_wide %>%
dplyr::select(BIO01,
BIO04,
BIO06,
BIO12,
BIO15,
NPP,
TreeDensity,
BareSoil,
PhotosyntheticVeg,
NonPhotosyntheticVeg,
Elevation,
TWIND,
DistancetoWater,
CrownGrazingDistance,
DeerControl,
MRVBF,
Nitrogen,
PastureDistance,
SLOPE) %>%
hist.data.frame(nclass = 10)
There is some evidence that several variables should be transformed (log). Ideally the following predictors can be transformed:
Doing so would result in the following:
combined_model_data_wide %>%
dplyr::select(BIO01,
BIO04,
BIO06,
BIO12,
BIO15,
NPP,
TreeDensity,
BareSoil,
PhotosyntheticVeg,
NonPhotosyntheticVeg,
Elevation,
TWIND,
DistancetoWater,
CrownGrazingDistance,
DeerControl,
MRVBF,
Nitrogen,
PastureDistance,
SLOPE) %>%
dplyr::mutate(`log(TreeDensity)` = log(TreeDensity),
`sqrt(BareSoil)` = sqrt(BareSoil),
`log(Elevation)` = log(Elevation),
`log(DistancetoWater)` = log(DistancetoWater),
`log(PastureDistance)` = log(PastureDistance),
`sqrt(CrownGrazingDistance)` = sqrt(CrownGrazingDistance),
`sqrt(MRVBF)` = sqrt(MRVBF),
`scrt(SLOPE)` = sqrt(SLOPE)) %>%
dplyr::select(-TreeDensity, -BareSoil, -Elevation, -DistancetoWater, -PastureDistance, -CrownGrazingDistance, -MRVBF, -SLOPE) %>%
hist.data.frame(nclass = 10)
Apart from tree density and MRVBF (which is zero-inflated) these transformations have the desired effect
Boosted regression trees (BRTs) can be useful in understanding the role of a wide variety of model covariates. Here we develop BRTs for Sambar Deer. The plots for these BRTs are shown below, keeping in mind that these plots are not necessarily linear and may be inclusive of interactions. However, using these responses will help inform which covariates should be included in a global imperfect detection model.
brt_data <- combined_model_data_allMethod %>%
dplyr::select(SiteID,
Species,
Presence,
BIO01,
BIO04,
# BIO06,
BIO12,
BIO15,
# NPP,
TreeDensity,
BareSoil,
# PhotosyntheticVeg,
NonPhotosyntheticVeg,
# Elevation,
TWIND,
DistancetoWater,
CrownGrazingDistance,
DeerControl,
MRVBF,
# Nitrogen,
PastureDistance,
SLOPE,
TSLF_bin,
DeerControl,
SambarHunt,
DeerHunt) %>%
dplyr::mutate(TreeDensity = log(TreeDensity),
BareSoil = sqrt(BareSoil),
# Elevation = log(Elevation),
DistancetoWater = log(DistancetoWater),
PastureDistance = log(PastureDistance),
CrownGrazingDistance = sqrt(CrownGrazingDistance),
MRVBF = sqrt(MRVBF),
SLOPE = sqrt(SLOPE))
brt_sambar <- brt_data %>%
filter(Species == "Rusa unicolor") %>%
as.data.frame()
brt <- gbm.step(data=brt_sambar,
gbm.x = 4:19, gbm.y = 3, family = "bernoulli", tree.complexity = 5,
learning.rate = 0.001, bag.fraction = 0.75, plot.main = FALSE, plot.folds = FALSE)
sambar.int <- gbm.interactions(brt)
b <- summary(brt)
We use Bayesian imperfect detection models via the implementation of functions and methods from the ubms
R package (Kellner 2021).
Given the variable surveying effort and potential variation in detection probabilities due to camera setup features (e.g. amount of surrounding vegetation blocking camera and immediate surrounding grazing area) as well as daily features (temperature, precipitation, days since deployment) we assume that there is variable/imperfect detection across the deployment and possibly between sites. Thus we provide five key detection-level variables for inclusion in a global model. Their use and reasoning are listed below:
Site-level covariates for the global model have been chosen based on the apparent contributions and effect of BRT models. This includes the potential role of interactions between predictor variables.
We generate a global model based on the apparent responses of the BRT model. Specifically including the variables explaining a large amount of the response. We also include several interaction effects based on the BRT results. The model is regularised strongly with a lasso prior (scale = 1) and contains an RSR component with a threshold of 100km. Some variables were not included in the global model as their contribution increased model complexity and BRT results suggested nearly no impact (e.g. hunting and time since last fire). in the detection submodel precipitation and herbaceous understorey were chosen as the two continuous variables of interest for the ‘camera’ method; ‘method’ being a five-class categorical variable.
# siteorder <- as.character(combined_model_data_wide$SiteID)
model_data_spatial <- bind_cols(combined_model_data_wide, combined_model_data_wide %>% sf::st_as_sf() %>% sf::st_transform(3111) %>% sf::st_coordinates()) %>%
mutate(Null = 1)
SambarStack <- unmarked::unmarkedFrameOccu(y = detection_history_joined$`Rusa unicolor`$detection_history[siteorder, ],
obsCovs =list(MaxTemp = joined_MaxTemp[siteorder, ],
Precipitation = joined_Precipitation[siteorder, ],
DaysSinceDeploy = joined_DaysSinceDeploy[siteorder, ],
HerbaceousUnderstoryCover = joined_HerbaceousUnderstoryCover[siteorder, ],
Method = joined_Method[siteorder, ]),
siteCovs = model_data_spatial)
if(!file.exists("models/SambarGlobal.rds")) {
SambarGlobal <- ubms::stan_occu(~ Method*scale(sqrt(Precipitation))
+ Method*scale(sqrt(HerbaceousUnderstoryCover))
~ scale(BIO12)
+ scale(BIO04)
+ scale(BIO01)
+ scale(BIO12)
+ scale(sqrt(BareSoil))
+ scale(log(DistancetoWater))
+ scale(BIO12*BIO04)
+ scale(BIO01*BIO12)
+ scale(sqrt(BareSoil)*BIO12)
+ scale(BIO04*log(DistancetoWater))
+ scale(TreeDensity)
+ scale(sqrt(SLOPE)*BIO04)
+ scale(NonPhotosyntheticVeg)
+ scale(log(PastureDistance))
+ scale(sqrt(CrownGrazingDistance))
+ scale(TWIND)
+ scale(BIO15)
+ RSR(X,Y, threshold = 100000),
prior_coef_state = laplace(scale = 1),
prior_coef_det = laplace(scale = 1),
data = SambarStack,
chains=4, iter=4000)
saveRDS(SambarGlobal, "models/SambarGlobal.rds", compress = "xz")
} else {
SambarGlobal <- readRDS("models/SambarGlobal.rds")
}
# SambarGlobal
# (fit_top_gof <- gof(SambarGlobal, draws=100, quiet=TRUE))
Our global model shows variable impacts of predictor variables on Sambar deer detectability and occupancy. Here, we utilise leave-one-out cross-validation (LOO) (Vehtari, Gelman, and Gabry 2017). Model selection using LOO allows for the comparison of several models based on their predictive performance. Specifically the expected predictive accuracy (elpd) is calculated for each model, with higher elpds suggesting better predictive performance. the ‘stacking’ weight is recomended for posterior model averaging and not the pseudo BMA+ weight (Yao et al. 2018). A set of candidate models were chosen based on the influence of their covariates from BRT.
if(!file.exists("models/SambarCandidateModels.rds")) {
SambarNull <- ubms::stan_occu(~Null
~Null,
data = SambarStack,
chains=4, iter=4000)
SambarNullDet <- ubms::stan_occu(~ Null
~ scale(BIO12*BIO04)
+ scale(BIO01*BIO12)
+ scale(sqrt(BareSoil)*BIO12)
+ scale(BIO04*log(DistancetoWater))
+ scale(TreeDensity)
+ scale(sqrt(SLOPE)*BIO04)
+ scale(NonPhotosyntheticVeg)
+ scale(log(PastureDistance))
+ scale(sqrt(CrownGrazingDistance))
+ scale(TWIND)
+ scale(BIO15)
+ RSR(X,Y, threshold = 100000),
data = SambarStack,
chains=4, iter=4000)
SambarNullOcc <- ubms::stan_occu(~ Method*scale(sqrt(Precipitation))
+ Method*scale(sqrt(HerbaceousUnderstoryCover))
~ Null,
data = SambarStack,
chains=4, iter=4000)
SambarRE <- ubms::stan_occu(~ Method*scale(MaxTemp)
+ Method*scale(sqrt(Precipitation))
+ Method*scale(DaysSinceDeploy)
+ Method*scale(sqrt(HerbaceousUnderstoryCover))
~ scale(BIO12*BIO04)
+ scale(BIO01*BIO12)
+ scale(sqrt(BareSoil)*BIO12)
+ scale(BIO04*log(DistancetoWater))
+ scale(TreeDensity)
+ scale(sqrt(SLOPE)*BIO04)
+ scale(NonPhotosyntheticVeg)
+ scale(log(PastureDistance))
+ scale(sqrt(CrownGrazingDistance))
+ scale(TWIND)
+ scale(BIO15)
+ TSLF_bin
+ (1|BIOREGION)
+ (1|EVC),
data = SambarStack,
chains=4, iter=4000)
SambarNoRE <- ubms::stan_occu(~ Method*scale(sqrt(Precipitation))
+ Method*scale(sqrt(HerbaceousUnderstoryCover))
~ scale(BIO12*BIO04)
+ scale(BIO01*BIO12)
+ scale(sqrt(BareSoil)*BIO12)
+ scale(BIO04*log(DistancetoWater))
+ scale(TreeDensity)
+ scale(sqrt(SLOPE)*BIO04)
+ scale(NonPhotosyntheticVeg)
+ scale(log(PastureDistance))
+ scale(sqrt(CrownGrazingDistance))
+ scale(TWIND)
+ scale(BIO15),
data = SambarStack,
chains=4, iter=4000)
SambarKeyVars <- ubms::stan_occu(~ Method*scale(sqrt(Precipitation))
+ Method*scale(sqrt(HerbaceousUnderstoryCover))
~ scale(BIO12*BIO04)
+ scale(BIO01*BIO12)
+ scale(sqrt(BareSoil)*BIO12)
+ scale(sqrt(SLOPE)*BIO04)
+ scale(NonPhotosyntheticVeg)
+ scale(log(PastureDistance))
+ scale(BIO15)
+ RSR(X,Y, threshold = 100000),
data = SambarStack,
chains=4, iter=4000)
SambarCandidateModels <- fitList(SambarGlobal,
SambarNull,
SambarNullDet,
SambarNullOcc,
SambarRE,
SambarNoRE,
SambarKeyVars)
saveRDS(SambarCandidateModels, "models/SambarCandidateModels.rds", compress = "xz")
} else {
SambarCandidateModels <- readRDS("models/SambarCandidateModels.rds")
}
# loolist <- lapply(SambarCandidateModels@models, function(x) x@loo)
# loo::loo_model_weights(loolist, method = "pseudobma")
SambarModSel <- ubms::modSel(SambarCandidateModels)
SambarModSel %>%
tibble::rownames_to_column(var = "model") %>%
kbl("html", caption = "Comparison Sambar Deer Models", digits = 3) %>%
kable_styling()
model | elpd | nparam | elpd_diff | se_diff | weight |
---|---|---|---|---|---|
SambarKeyVars | -867.703 | 42.533 | 0.000 | 0.000 | 0.661 |
SambarNoRE | -871.173 | 46.688 | -3.470 | 2.380 | 0.000 |
SambarGlobal | -871.389 | 50.323 | -3.686 | 2.033 | 0.000 |
SambarRE | -877.017 | 61.243 | -9.314 | 5.528 | 0.142 |
SambarNullOcc | -896.905 | 35.038 | -29.202 | 7.574 | 0.000 |
SambarNullDet | -933.880 | 23.441 | -66.177 | 20.935 | 0.126 |
SambarNull | -959.889 | 9.542 | -92.185 | 22.808 | 0.071 |
Based on the above model selection table, all assessed models apart from the null models have relatively similar predictive accuracy. This outcome provides us with two options in undertaking further analyses. Firstly, we could integrate over the posterior distributions for the non-zero weighted models in the above table. Essentially this would average 7 models and provide us with results that account for model uncertainty. Secondly, we could use the global model with regularised priors; which essentially down-weights parameters without strong informative influence on the presence or detection of Sambar Deer. The latter is a more straightforward task. Given this, SambarGlobal
will be henceforth called SambarTopModel
in the analysis. Specifically SambarGlobal
has an elpd_diff
within 2 x the se_diff
of the top model.
The regularised top model has several key features:
- laplace/double exponential priors.
- Restricted Spatial Regression (RSR) random effect (Johnson et al. 2013) with a threshold of 20km.
- Detection submodel with interactions between method and (i) Precipitation and (ii) Herbaceous Understory.
Below we present the model summary (marginal coefficients on the logit-scale) of the occupancy submodel for Sambar deer.
# SambarTopModel <- SambarCandidateModels@models[[rownames(SambarModSel[1,])]]
SambarTopModel <- SambarCandidateModels@models[["SambarGlobal"]]
sambar_top_summary <- summary(SambarTopModel, submodel = "state")
sambar_top_summary_sig <- which((sambar_top_summary$`2.5%` * sambar_top_summary$`97.5%`) > 0)
sambar_top_summary%>%
dplyr::select(-`25%`, -`50%`, -`75%`) %>%
kbl("html", caption = "Model estimates showing the effects of various covariates on the probability of occupancy for Sambar Deer. Estimates with a 95% CI not overlapping zero are shown in bold", digits = 3) %>%
kable_styling(full_width = FALSE) %>%
row_spec(sambar_top_summary_sig, bold = TRUE)
mean | se_mean | sd | 2.5% | 97.5% | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
(Intercept) | -1.311 | 0.015 | 0.484 | -2.368 | -0.482 | 1043.379 | 1.004 |
scale(BIO12) | -0.103 | 0.015 | 0.845 | -1.946 | 1.573 | 3302.707 | 1.000 |
scale(BIO04) | 0.363 | 0.014 | 0.685 | -0.937 | 1.835 | 2536.609 | 1.001 |
scale(BIO01) | -0.630 | 0.012 | 0.644 | -2.052 | 0.453 | 2860.975 | 1.002 |
scale(sqrt(BareSoil)) | -0.912 | 0.026 | 1.167 | -3.727 | 0.917 | 2053.415 | 1.001 |
scale(log(DistancetoWater)) | 0.358 | 0.013 | 0.644 | -0.835 | 1.816 | 2480.713 | 1.000 |
scale(BIO12 * BIO04) | 0.199 | 0.015 | 0.838 | -1.486 | 1.963 | 3233.612 | 1.000 |
scale(BIO01 * BIO12) | 0.206 | 0.015 | 0.659 | -1.036 | 1.648 | 2013.326 | 1.003 |
scale(sqrt(BareSoil) * BIO12) | 1.218 | 0.016 | 0.736 | 0.012 | 2.910 | 2120.000 | 1.002 |
scale(BIO04 * log(DistancetoWater)) | -0.213 | 0.015 | 0.775 | -1.924 | 1.214 | 2581.848 | 1.001 |
scale(TreeDensity) | 0.198 | 0.013 | 0.593 | -0.971 | 1.453 | 2112.457 | 1.004 |
scale(sqrt(SLOPE) * BIO04) | 1.388 | 0.021 | 0.754 | 0.049 | 3.027 | 1328.784 | 1.003 |
scale(NonPhotosyntheticVeg) | -0.175 | 0.014 | 0.547 | -1.369 | 0.843 | 1458.317 | 1.001 |
scale(log(PastureDistance)) | 0.339 | 0.007 | 0.373 | -0.325 | 1.104 | 3109.023 | 1.001 |
scale(sqrt(CrownGrazingDistance)) | -0.246 | 0.007 | 0.357 | -0.992 | 0.402 | 2470.382 | 1.001 |
scale(TWIND) | -0.149 | 0.008 | 0.471 | -1.136 | 0.770 | 3170.897 | 1.001 |
scale(BIO15) | -0.724 | 0.009 | 0.487 | -1.751 | 0.120 | 2703.923 | 1.000 |
RSR [tau] | 63.166 | 18.696 | 108.698 | 0.002 | 386.572 | 33.802 | 1.070 |
Below we present the model summary (marginal coefficients on the logit-scale) of the detection submodel for Sambar deer. Interactions are not of interest as the reference level (camera) is the key variable that the continuous variables change over (precipitation and herbaceous understory).
sambar_top_summary_det <- summary(SambarTopModel, submodel = "det")
sambar_top_summary_sig_det <- which((sambar_top_summary_det$`2.5%` * sambar_top_summary_det$`97.5%`) > 0)
sambar_top_summary_det %>%
dplyr::select(-`25%`, -`50%`, -`75%`) %>%
kbl("html", caption = "Model estimates showing the effects of various covariates on the probability of detection for Sambar Deer. Estimates with a 95% CI not overlapping zero are shown in bold", digits = 3) %>%
kable_styling(full_width = FALSE) %>%
row_spec(sambar_top_summary_sig_det, bold = TRUE)
mean | se_mean | sd | 2.5% | 97.5% | n_eff | Rhat | |
---|---|---|---|---|---|---|---|
(Intercept) | -3.155 | 0.003 | 0.135 | -3.422 | -2.907 | 1575.793 | 1.001 |
MethodFootprint | 1.681 | 0.007 | 0.290 | 1.109 | 2.226 | 1836.700 | 1.000 |
MethodPellet | 1.654 | 0.006 | 0.302 | 1.022 | 2.224 | 2505.193 | 1.000 |
MethodRubbing | 0.481 | 0.008 | 0.382 | -0.286 | 1.203 | 2388.648 | 1.001 |
MethodWallow | -1.994 | 0.034 | 1.097 | -4.637 | -0.291 | 1069.413 | 1.010 |
scale(sqrt(Precipitation)) | -0.194 | 0.002 | 0.085 | -0.365 | -0.036 | 2103.555 | 1.002 |
scale(sqrt(HerbaceousUnderstoryCover)) | 0.727 | 0.003 | 0.123 | 0.497 | 0.972 | 2165.896 | 1.001 |
MethodFootprint:scale(sqrt(Precipitation)) | 0.279 | 0.003 | 0.203 | -0.122 | 0.678 | 4063.211 | 1.003 |
MethodPellet:scale(sqrt(Precipitation)) | 0.807 | 0.003 | 0.192 | 0.437 | 1.188 | 3133.304 | 1.003 |
MethodRubbing:scale(sqrt(Precipitation)) | 0.111 | 0.004 | 0.291 | -0.490 | 0.662 | 4618.527 | 1.000 |
MethodWallow:scale(sqrt(Precipitation)) | -0.583 | 0.024 | 1.007 | -2.871 | 0.987 | 1818.775 | 1.004 |
MethodFootprint:scale(sqrt(HerbaceousUnderstoryCover)) | -0.639 | 0.007 | 0.307 | -1.234 | -0.030 | 1906.878 | 1.001 |
MethodPellet:scale(sqrt(HerbaceousUnderstoryCover)) | -0.166 | 0.005 | 0.299 | -0.748 | 0.417 | 3173.286 | 1.001 |
MethodRubbing:scale(sqrt(HerbaceousUnderstoryCover)) | -0.454 | 0.009 | 0.406 | -1.257 | 0.358 | 2205.935 | 1.000 |
MethodWallow:scale(sqrt(HerbaceousUnderstoryCover)) | -0.615 | 0.019 | 0.953 | -2.398 | 1.349 | 2493.971 | 1.001 |
We undertake several additional checks of model convergence and posterior predictive capability.
Experimental Bayesian R2 for occupancy models has been implemented here. With 100 posterior draws of the R2 for both detection and occupancy submodels.
TopModelR2 <- bayes_R2_res_ubms(SambarTopModel, re.form = NULL, draws = 1000) %>%
as.data.table() %>%
melt()
R2sum <- TopModelR2 %>%
group_by(variable) %>%
summarise(R2 = quantile(value, 0.5),
LCI = quantile(value, 0.05),
UCI = quantile(value, 0.95)) %>%
as.data.table()
TopModelR2 %>%
# filter(variable %in% c("det", "state")) %>%
ggplot(aes(x = value, fill = variable)) +
geom_density(alpha = 0.5) +
xlim(0,1) +
ggtitle("Bayes R2 of detection and occupancy models") +
scale_fill_brewer(type = "qual", palette = 2) +
labs(fill = "submodel") +
theme_bw()
Figure 4: Bayes R2 values for detection and occupancy submodels
The average Bayes R2 for the ‘state’ submodel was 0.34 [90 % CI: 0.13, 0.49]. The average Bayes R2 for the ‘detection’ submodel was 0.22 [90 % CI: 0.11, 0.39]. Together, this generates an R2 for both models as being 0.57 [90 % CI: 0.52, 0.63]
Using simulated posterior values we compare simulated presences (and absences against the mean true value of presence) for (A) observation-level detections and (B) site-level detections. If the red line falls approximately in the middle of the distribution it is evidence that the model has posterior predictive power.
n_draws <- 500
nrows <- nrow(SambarStack@siteCovs)
# species <- unique(umf_lbp@siteCovs)
sim_y <- ubms::posterior_predict(SambarTopModel, "y", draws=n_draws, re.form = NULL)
prop0 <- apply(sim_y, 1, function(x) mean(x==0, na.rm=TRUE)) %>%
data.table::as.data.table() %>%
melt()
prop1 <- apply(sim_y, 1, function(x) mean(x==1, na.rm=TRUE)) %>%
data.table::as.data.table() %>%
melt()
actual_prop0 <- mean(getY(SambarTopModel) == 0, na.rm=TRUE)
actual_prop1 <- mean(getY(SambarTopModel) == 1, na.rm=TRUE)
# For each model get the prop quantile
propObvsQ <- prop1 %>%
group_by(model = variable) %>%
summarise(percentile = ecdf(value)(actual_prop1))
#Model posterior predictive comparisons
broad_props <- prop1 %>%
ggplot(aes(x = value)) +
geom_density(fill = "grey90") +
geom_vline(xintercept = actual_prop1, colour = "red") +
xlim(0.01, 0.05) +
xlab("Proportion of observations that \nrecorded presence")+
theme_bw()
obs_it <- dim(SambarStack@obsToY)[1]
vec <- integer()
for(n in 1:nrow(SambarStack@siteCovs)) {
vec <- c(vec, ((n*obs_it)-(obs_it-1)):(n*obs_it))
}
prop0 <- numeric()
prop1 <- numeric()
for(i in 1:n_draws) {
mat <- apply(matrix(sim_y[i,vec], ncol = obs_it, byrow = TRUE), 1, function(x) max(x, na.rm = TRUE))
mat[which(!is.finite(mat))] <- NA
prop1[i] <- mean(mat, na.rm = TRUE)
prop0[i] <- 1 - prop1[i]
}
p1 <- mean(apply(getY(SambarTopModel), 1, function(x) {max(x,na.rm = T)}))
sim_y_props <- data.frame(model = "Sambar Top Model",
prop0 = prop0,
prop1 = prop1,
actual_prop0 = 1-p1,
actual_prop1 = p1)
bound_sim <- bind_rows(sim_y_props)
station_props <- bound_sim %>%
ggplot(aes(x = prop1)) +
geom_density(fill = "grey90") +
geom_vline(aes(xintercept = actual_prop1), colour = "red") +
xlim(0.25, 0.45) +
xlab("Proportion of stations\n where species were recorded")+
theme_bw()
cowplot::plot_grid(broad_props, station_props, labels = "AUTO")
Figure 5: Comparison of posterior predictions from the Bayesian models to true mean values of presences. The results indicate that predictive power is high for being able to predict the broad detection patterns across all sites (A) and the proportion of sites that Sambar Deer were recorded at (B).
Below we generate plots of the model covariates and their response on either the probability of occupancy or the probability of detection.
newdata_means <- get_mean_df(SambarTopModel["det"], lev = 1)
nd_params <- list()
for(i in 1:ncol(newdata_means)) {
cols_but <- setdiff(1:ncol(newdata_means), i)
nd_params[[colnames(newdata_means)[i]]] <- newdata_means[,cols_but] %>%
left_join(data.frame(d = if(is.factor(SambarTopModel["det"]@data[[colnames(newdata_means)[i]]])) {
levels(SambarTopModel["det"]@data[[colnames(newdata_means)[i]]])
} else {
seq.int(from = min(SambarTopModel["det"]@data[[colnames(newdata_means)[i]]], na.rm = TRUE),
to = max(SambarTopModel["det"]@data[[colnames(newdata_means)[i]]], na.rm = TRUE), length.out = 50) %>% round(., digits = 4)
}) %>%
`colnames<-`(colnames(newdata_means)[i]), by = character())
}
# wt_used <- weights_ordered %>%
# filter(include) %>%
# pull(weight)
ppd <- list()
for(i in 1:length(nd_params)) {
ppd[[i]] <- ubms::posterior_linpred(SambarTopModel,
newdata = nd_params[[i]],
transform = TRUE,
draws = 1000, submodel = "det")
df <- ppd[[i]] %>%
`colnames<-`(nd_params[[i]][,names(nd_params)[i]]) %>%
as.data.table() %>%
melt(variable.name = names(nd_params)[i])
if(!is.character(nd_params[[i]][,ncol(nd_params[[i]])])) {
df[[names(nd_params)[i]]] <- as.numeric(as.character(df[[names(nd_params)[i]]])) %>% round(., digits = 4)
}
ppd[[i]] <- nd_params[[i]] %>%
full_join(df)
}
names(ppd) <- names(nd_params)
ppd_av <- purrr::map(ppd, ~ .x %>%
group_by_at(colnames(.x %>% dplyr::select(-value))) %>%
summarise(median = median(value, na.rm = TRUE),
LCI_95 = quantile(value, 0.025),
LCI_50 = quantile(value, 0.25),
UCI_50 = quantile(value, 0.75),
UCI_95 = quantile(value, 0.975)))
plot_numeric <- function(data_list, number) {
data_list[[number]] %>%
ggplot(aes_string(x = names(data_list)[number], y = "median")) +
geom_ribbon(aes(ymin = LCI_95, ymax = UCI_95), alpha = 0.3, fill = delwp_cols2[number]) +
geom_ribbon(aes(ymin = LCI_50, ymax = UCI_50), alpha = 0.3, fill = delwp_cols2[number]) +
geom_line(colour = delwp_cols2[number], size = 1.5) +
ylab("Detection Probability (0-1)") +
ylim(c(0, 1)) +
theme_bw()
}
plot_cat <- function(data_list, number) {
data_list[[number]] %>%
ggplot(aes_string(x = names(data_list)[number], y = "value")) +
# ggbeeswarm::geom_quasirandom(shape = 21, fill = delwp_cols2[number]) +
geom_violin(draw_quantiles = c(0.025,0.5,0.975), scale = "width", size = 1, alpha = 0.5, colour = "black", fill = delwp_cols2[number]) +
ylab("Detection Probability (0-1)") +
ylim(c(0, 1)) +
theme_bw()
}
ppd_mod <- ppd
ppd_mod[["Method"]] <- ppd_mod[["Method"]] %>%
mutate(value = case_when(Method == "Camera" ~ 1-((1-value)^42),
TRUE ~ 1-((1-value)^3))) %>%
as.data.frame()
meth <- plot_cat(ppd_mod, 1) +
ylab("Detection Probability (0-1)")
meth_table <- ppd_mod[["Method"]] %>%
group_by(Method) %>%
summarise(Median = median(value) %>% round(2),
`2.5%` = quantile(value, 0.025) %>% round(2),
`97.5%` = quantile(value, 0.975) %>% round(2))
md_sum <- unname(round(1-apply(1-meth_table[,2:4], 2, prod), 2))
# tmax <- plot_numeric(ppd_av, 1) +
# xlab("Maximum Daily Temperature") +
# ylim(c(0,0.25))
precip <- plot_numeric(ppd_av, 2) +
xlab("Daily precipitation (mm)") +
ylim(c(0,0.16))
herbund <- plot_numeric(ppd_av, 3) +
xlab("Herbaceous understory (%)") +
ylim(c(0,0.16))
cowplot::plot_grid(meth, precip, herbund, labels = "AUTO", ncol = 2)
Figure 6: Model predictions for the variables that impact detection probability. Predictions have been generated for each variable independently, with other covariates fixed at their mean value. For the ‘method’ covariate (A), we transform the the predictions so that camera detection is for a 6 week period (42 days), and the other deer signs are based on walking three transects. For continuous plots (B and C), 50 % and 95 % CIs are shown with respective shading around the mean line.
Using the estimated detection probabilities we can see that cameras, followed by pellet detection, footprints, rubbings and wallows are methods that can detect deer. When combined the likelihood of detecting Sambar deer at a site is: 0.97 (95% CI: 0.92, 0.99).
newdata_means <- get_mean_df(SambarTopModel["state"], lev = 1)
nd_params <- list()
for(i in 1:ncol(newdata_means)) {
cols_but <- setdiff(1:ncol(newdata_means), i)
nd_params[[colnames(newdata_means)[i]]] <- newdata_means[,cols_but] %>%
left_join(data.frame(d = if(is.factor(SambarTopModel["state"]@data[[colnames(newdata_means)[i]]])) {
levels(SambarTopModel["state"]@data[[colnames(newdata_means)[i]]])
} else {
seq.int(from = min(SambarTopModel["state"]@data[[colnames(newdata_means)[i]]], na.rm = TRUE),
to = max(SambarTopModel["state"]@data[[colnames(newdata_means)[i]]], na.rm = TRUE), length.out = 50) %>% round(., digits = 4)
}) %>%
`colnames<-`(colnames(newdata_means)[i]), by = character())
}
# wt_used <- weights_ordered %>%
# filter(include) %>%
# pull(weight)
ppd <- list()
for(i in 1:length(nd_params)) {
ppd[[i]] <- ubms::posterior_linpred(SambarTopModel,
newdata = nd_params[[i]],
transform = TRUE, re.form = NA,
draws = 1000, submodel = "state")
df <- ppd[[i]] %>%
`colnames<-`(nd_params[[i]][,names(nd_params)[i]]) %>%
as.data.table() %>%
melt(variable.name = names(nd_params)[i])
if(!is.character(nd_params[[i]][,ncol(nd_params[[i]])])) {
df[[names(nd_params)[i]]] <- as.numeric(as.character(df[[names(nd_params)[i]]])) %>% round(., digits = 4)
}
ppd[[i]] <- nd_params[[i]] %>%
full_join(df)
}
names(ppd) <- names(nd_params)
# ppd[["TSLF_bin"]] <- ppd[["TSLF_bin"]] %>%
# mutate(TSLF_bin = factor(TSLF_bin, levels = c("2 - 5", "5 - 20", "> 20", "Unburnt")))
ppd_av <- purrr::map(ppd, ~ .x %>%
group_by_at(colnames(.x %>% dplyr::select(-value))) %>%
summarise(median = median(value, na.rm = TRUE),
LCI_95 = quantile(value, 0.025),
LCI_50 = quantile(value, 0.25),
UCI_50 = quantile(value, 0.75),
UCI_95 = quantile(value, 0.975)))
plot_numeric <- function(data_list, number) {
data_list[[number]] %>%
ggplot(aes_string(x = names(data_list)[number], y = "median")) +
geom_ribbon(aes(ymin = LCI_95, ymax = UCI_95), alpha = 0.3, fill = delwp_cols2[number]) +
geom_ribbon(aes(ymin = LCI_50, ymax = UCI_50), alpha = 0.3, fill = delwp_cols2[number]) +
geom_line(colour = delwp_cols2[number], size = 1.5) +
ylab("Occupancy Probability (0-1)") +
ylim(c(0, 1)) +
theme_bw()
}
plot_cat <- function(data_list, number) {
data_list[[number]] %>%
ggplot(aes_string(x = names(data_list)[number], y = "value")) +
ggbeeswarm::geom_quasirandom(shape = 21, fill = delwp_cols2[number]) +
geom_violin(draw_quantiles = c(0.5), size = 1, alpha = 0.5) +
ylab("Occupancy Probability (0-1)") +
ylim(c(0, 1)) +
theme_bw()
}
pasturedist <- plot_numeric(ppd_av, 9) +
xlab("Distance to pasture (m)") +
scale_x_log10() +
ylim(c(0,1))
treedensity <- plot_numeric(ppd_av, 6) +
xlab("Tree density (%)") +
scale_x_sqrt() +
ylim(c(0,1))
npv <- plot_numeric(ppd_av, 8) +
xlab("Non-photosynthetic vegetation (%)") +
scale_x_sqrt() +
ylim(c(0,1))
slope <- plot_numeric(ppd_av, 10) +
xlab("Crown grazing distance (%)") +
scale_x_sqrt() +
ylim(c(0,1))
twind <- plot_numeric(ppd_av, 11) +
xlab("Topographic wetness index (TWIND)") +
scale_x_sqrt() +
ylim(c(0,1))
bio15 <- plot_numeric(ppd_av, 12) +
xlab('BIO15: Precipitation Seasonality (kg m^(2))') +
ylim(c(0,1)) +
theme(axis.title.x = element_markdown())
# sambarhunt <- plot_cat(ppd, 12) +
# xlab("Sambar Hunting Area") +
# ylim(c(0,1))
#### Interctions on a continous scale ####
# BIO01
# BIO12
# BIO04
# BareSoil
# Water
# Slope
nd_params_int <- list()
nd_params_int[["BIO12_BIO04"]] <- nd_params[["BIO12"]] %>%
dplyr::select(-BIO04) %>%
left_join(nd_params[["BIO04"]] %>%
dplyr::select(BIO04), by = character())
nd_params_int[["BIO04_DistancetoWater"]] <- nd_params[["BIO04"]] %>%
dplyr::select(-DistancetoWater) %>%
left_join(nd_params[["DistancetoWater"]] %>%
dplyr::select(DistancetoWater), by = character())
nd_params_int[["BareSoil_BIO12"]] <- nd_params[["BareSoil"]] %>%
dplyr::select(-BIO12) %>%
left_join(nd_params[["BIO12"]] %>%
dplyr::select(BIO12), by = character())
nd_params_int[["BIO01_BIO12"]] <- nd_params[["BIO01"]] %>%
dplyr::select(-BIO12) %>%
left_join(nd_params[["BIO12"]] %>%
dplyr::select(BIO12), by = character())
nd_params_int[["SLOPE_BIO04"]] <- nd_params[["SLOPE"]] %>%
dplyr::select(-BIO04) %>%
left_join(nd_params[["BIO04"]] %>%
dplyr::select(BIO04), by = character())
ppd_int <- list()
for(i in 1:length(nd_params_int)) {
ppd_int[[i]] <- ubms::posterior_linpred(SambarTopModel,
newdata = nd_params_int[[i]],
transform = TRUE, re.form = NA,
draws = 100, submodel = "state")
two_names <- as.character(stringr::str_split(names(nd_params_int)[i], "_", simplify = TRUE))
df <- ppd_int[[i]] %>%
`colnames<-`(apply(nd_params_int[[i]][,two_names], 1, paste, collapse = "_")) %>%
as.data.table() %>%
melt(variable.name = names(nd_params_int)[i]) %>%
tidyr::separate(col = !!names(nd_params_int)[i], into = two_names, sep = "_")
if(!is.character(nd_params_int[[i]][,ncol(nd_params_int[[i]])])) {
df[,c(two_names) := lapply(.SD, as.numeric), .SDcols = two_names]
}
ppd_int[[i]] <- nd_params_int[[i]] %>%
full_join(df)
}
names(ppd_int) <- names(nd_params_int)
ppd_int_av <- purrr::map(ppd_int, ~ .x %>%
group_by_at(colnames(.x %>% dplyr::select(-value))) %>%
summarise(median = median(value, na.rm = TRUE),
LCI_95 = quantile(value, 0.025),
LCI_50 = quantile(value, 0.25),
UCI_50 = quantile(value, 0.75),
UCI_95 = quantile(value, 0.975)))
plot_numeric_int <- function(data_list, number) {
name_split <- as.character(stringr::str_split(names(data_list)[number], "_", simplify = TRUE))
data_list[[number]] %>%
ggplot(aes_string(x = name_split[1], y = name_split[2])) +
# geom_ribbon(aes(ymin = LCI_95, ymax = UCI_95), alpha = 0.3, fill = delwp_cols2[number]) +
# geom_ribbon(aes(ymin = LCI_50, ymax = UCI_50), alpha = 0.3, fill = delwp_cols2[number]) +
geom_contour_filled(aes_string(z = "median"), bins = 11, breaks = seq(0, 1, 0.2), alpha = 0.8) +
geom_point(data = SambarTopModel@data@siteCovs %>%
mutate(Presence = as.character(`Rusa unicolor`)),
aes(colour = `Presence`), shape = 21, lwd = 2.5, stroke = 2.5, fill = "White") +
scale_colour_manual(values = c(`1` = delwp_cols[["Navy"]], `0` = delwp_cols[["Environment"]])) +
labs(fill = "Occupancy\nProbability") +
lims(fill = c(0,1)) +
theme_bw()
}
bio12_bio4 <- plot_numeric_int(ppd_int_av, 1) +
scale_fill_viridis_d(option = "D", direction = -1, guide = guide_legend(reverse = T)) +
xlab("BIO12: Annual precipitation (kg m^(2))") +
ylab("BIO04: Temperature seasonality (°C)") +
theme(axis.title.x = element_markdown())
bio4_water <- plot_numeric_int(ppd_int_av, 2) +
scale_fill_viridis_d(option = "D", direction = -1, guide = guide_legend(reverse = T)) +
xlab("BIO04: Temperature seasonality (°C)") +
ylab("Distance to water (m)") +
scale_y_log10()
baresoil_bio12 <- plot_numeric_int(ppd_int_av, 3) +
scale_fill_viridis_d(option = "D", direction = -1, guide = guide_legend(reverse = T)) +
xlab("Bare soil (%)") +
ylab("BIO12: Annual precipitation (kg m^(2))") +
scale_x_sqrt() +
theme(axis.title.y = element_markdown())
bio01_bio12 <- plot_numeric_int(ppd_int_av, 4) +
scale_fill_viridis_d(option = "D", direction = -1, guide = guide_legend(reverse = T)) +
xlab("BIO01: Annual mean temperature (°C)") +
ylab("BIO12: Annual precipitation (kg m^(2))") +
theme(axis.title.y = element_markdown())
slope_bio4 <- plot_numeric_int(ppd_int_av, 5) +
scale_fill_viridis_d(option = "D", direction = -1, guide = guide_legend(reverse = T)) +
xlab("Slope") +
ylab("BIO04: Temperature seasonality (°C)") +
scale_x_sqrt()
cowplot::plot_grid(cowplot::plot_grid(pasturedist, treedensity, npv, slope, twind, bio15, labels = "AUTO", ncol = 3),
cowplot::plot_grid(bio12_bio4, bio4_water, baresoil_bio12, bio01_bio12, slope_bio4, labels = c("G", "H", "I", "J", "K"), ncol = 2), ncol = 1, rel_heights = c(0.4,0.6))
Figure 7: Model predictions for the variables that impact occupancy probability. Predictions have been generated for each variable independently, with other covariates fixed at their mean value. For continuous plots (A-F), 50 % and 95 % CIs are shown with respective shading around the mean line. For interaction plots (G-K) the median prediction is shown (no confidence intervals are displayed). However, presences and absences are plotted based on their respective covariate values to highlight the range of covariates from the sampled sites.
We make predictions to Victoria. This prediction generates median predictions as well as 95 % confidence intervals. The resolution of these predictions is set at 1km by 1km grid cells.
# Read in crown land mask
crownland_mask <- terra::rast("/Volumes/DeerVic\ Photos/MaxentStack/maxent_stack_masked_cl.grd")[[1]]
top_model_vars <- SambarTopModel@call[["formula"]][[3]] %>% all.vars()
if(!file.exists("/Volumes/DeerVic\ Photos/MaxentStack/vic_rast_ubms_mask.tif")) {
vic_rast_ubms_mask <- terra::mask(processed_stack, crownland_mask)
terra::writeRaster(vic_rast_ubms_mask,
filename = "/Volumes/DeerVic\ Photos/MaxentStack/vic_rast_ubms_mask.tif",
overwrite = TRUE)
vic_bound_terra_proj <- rast(raster(vic_bound, res = 1000))
vic_model_data_resampled <- terra::resample(x = vic_rast_ubms_mask,
y = vic_bound_terra_proj,
method = "bilinear")
# vic_model_data_resampled <- terra::spatSample(x = vic_rast_ubms_mask,
# 1000000,
# "regular",
# as.raster = TRUE)
terra::writeRaster(vic_model_data_resampled,
filename = "/Volumes/DeerVic\ Photos/MaxentStack/vic_model_data_resampled.tif",
overwrite = TRUE)
# vic_rast_data <- vic_rast_ubms_mask[[top_model_vars]] %>%
# terra::as.data.frame(., xy = TRUE)
# saveRDS(vic_rast_data, "/Volumes/DeerVic\ Photos/MaxentStack/vic_rast_data.rds")
} else {
vic_rast_ubms_mask <- rast("/Volumes/DeerVic\ Photos/MaxentStack/vic_rast_ubms_mask.tif")
vic_model_data_resampled <- rast("/Volumes/DeerVic\ Photos/MaxentStack/vic_model_data_resampled.tif")
}
vic_rast_data <- terra::as.data.frame(vic_model_data_resampled, xy = TRUE) %>%
mutate(SambarHunt = factor(SambarHunt))
vic_rast_data$BIOREGION <- factor(round(vic_rast_data$BIOREGION))
levels(vic_rast_data$BIOREGION) <- levels(processed_stack$BIOREGION)[[1]][1:28]
if(!file.exists("models/vic_preds.rds")) {
vic_preds <- ubms::predict(object = SambarTopModel,
submodel = "state",
newdata = vic_rast_data,
transform = TRUE, re.form = NA)
saveRDS(vic_preds, "models/vic_preds.rds")
} else {
vic_preds <- readRDS("models/vic_preds.rds")
}
vic_preds_bound <- cbind(vic_rast_data, vic_preds)
vic_preds_terra <- terra::rast(vic_preds_bound[,c("x","y", "Predicted")])
vic_preds_terra_LCI <- terra::rast(vic_preds_bound[,c("x","y", "2.5%")])
vic_preds_terra_UCI <- terra::rast(vic_preds_bound[,c("x","y", "97.5%")])
vic_preds_terra_SD <- terra::rast(vic_preds_bound[,c("x","y", "SD")])
plotstatePA <- function(x) {
rasterVis::gplot(x, maxpixels = 1000000) +
geom_sf(data = regions_3111, fill = "grey", inherit.aes = FALSE, alpha = 0.2, colour = "grey70") +
geom_tile(aes(fill = value), na.rm = TRUE) +
scale_fill_gradient2(na.value = "transparent", low = delwp_cols[["Environment"]], mid = delwp_cols[["Teal"]], high = delwp_cols[["Navy"]], midpoint = 0.5, limits = c(0,1)) +
ylab("Latitude") +
xlab("Longitude") +
labs(fill = "Occupancy \nProbability") +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
legend.title = element_text(size=20),
legend.text = element_text(size=15),
legend.key.size = unit(3, 'cm')) +
theme_bw()
}
sw_bayes_mean <- plotstatePA(vic_preds_terra) + ggtitle("Median")
sw_bayes_lci <- plotstatePA(vic_preds_terra_LCI) + ggtitle("2.5% CI") + theme(legend.position = "none")
sw_bayes_uci <- plotstatePA(vic_preds_terra_UCI)+ ggtitle("97.5% CI") + theme(legend.position = "none")
bottom_row <- cowplot::plot_grid(sw_bayes_lci, sw_bayes_uci, labels = c('B', 'C'), ncol = 2)
bayes_vic_grid <- cowplot::plot_grid(sw_bayes_mean, bottom_row, labels = c('A', ''), ncol = 1, rel_heights = c(1.5,1))
ggsave(plot = bayes_vic_grid,
filename = "plots/bayes_vic_grid.pdf",
width = 16, height = 16, device='pdf', dpi=700)
bayes_vic_grid
Figure 8: Predictions of Smabar Deer occupancy across Victorian crown land from a presence-absence imperfect detection occupancy model. (A) shows the median derived occupancy from the posterior distribution with (B) and (C) showing the respective 95 % confidence intervals.
Based on the top Sambar model we determine the logistic threshold to classify area as either present or absent. From this threshold we can estimate the range size of Sambar Deer of crown land.
predSum <- data.frame(SiteID = names(apply(SambarTopModel@response@y, 1, max, na.rm = T)),
Presence = apply(SambarTopModel@response@y, 1, max, na.rm = T)) %>%
bind_cols(predict(SambarTopModel, "state"))
pred <- prediction(predSum$Predicted, predSum$Presence)
perf <- performance(pred, measure="fpr")
pdata <- tibble(Measure = "False positives",
`Logistic threshold` = perf@x.values[[1]],
`Fractional value` = perf@y.values[[1]])
perf2 <- performance(pred, measure="fnr")
pdata2 <- tibble(Measure = "False negatives",
`Logistic threshold` = perf2@x.values[[1]],
`Fractional value` = perf2@y.values[[1]])
perfcombined <- bind_rows(pdata, pdata2) %>%
group_by(`Logistic threshold`) %>%
mutate(CombinedVal = sum(`Fractional value`, na.rm = T),
DiffVal = abs(diff(`Fractional value`))) %>%
ungroup()
lt <- perfcombined[which.min(perfcombined$DiffVal),][["Logistic threshold"]]
rv <- perfcombined[which.min(perfcombined$DiffVal),][["Fractional value"]]
th_conv <- function(x, lte = lt) {
vic_preds_terra_th <- x
values(vic_preds_terra_th)[values(vic_preds_terra_th) >= lte] <- 1
values(vic_preds_terra_th)[values(vic_preds_terra_th) < lte] <- 0
values(vic_preds_terra_th) <- factor(values(vic_preds_terra_th))
levels(vic_preds_terra_th) <- c("Absent", "Present")
names(vic_preds_terra_th) <- "Predicted"
return(vic_preds_terra_th)
}
vic_preds_terra_th <- th_conv(vic_preds_terra)
vic_preds_terra_th_LCI <- th_conv(vic_preds_terra_LCI)
vic_preds_terra_th_UCI <- th_conv(vic_preds_terra_UCI)
area_dist <- vic_preds_bound %>%
filter(Predicted >= lt) %>%
nrow()
area_dist_lci <- vic_preds_bound %>%
filter(`2.5%` >= lt) %>%
nrow()
area_dist_uci <- vic_preds_bound %>%
filter(`97.5%` >= lt) %>%
nrow()
area_dist_form <- paste0(format(units::set_units(area_dist, "km2"), big.mark = ","), " [95% CI:", format(units::set_units(area_dist_lci, "km2"), big.mark = ","), ", ", format(units::set_units(area_dist_uci, "km2"), big.mark = ","), "]")
perfcombined %>%
ggplot(aes(x = `Logistic threshold`, y = `Fractional value`, colour = Measure)) +
geom_line() +
geom_vline(xintercept = lt,
linetype = "dotted",
colour = "red") +
geom_hline(yintercept = rv,
linetype = "dotted",
colour = "red") +
ylim(0,1) +
xlim(0,1) +
scale_colour_manual(values = c(`False positives` = "Dark Green",
`False negatives` = "Dark blue")) +
theme_classic()
Based on the assessment of false positive and false negative errors the logistic threshold to minimize false positive and false negative error rates is 0.53. If we use this logistic threshold to restrict the Sambar Deer predictions we obtain the following map/distribution. The total crown land covered by this threshold is 31,789 [km^2] [95% CI:17,710 [km^2], 41,521 [km^2]].
plotstatePATH <- function(x) {
name_x <- names(x)
ggplot() +
geom_sf(data = regions_3111, fill = "grey", inherit.aes = FALSE, alpha = 0.2, colour = "grey70") +
tidyterra::geom_spatraster(aes_string(fill = name_x), na.rm = TRUE, data = x) +
scale_fill_manual(na.value = "transparent",
values = c(`Absent` = delwp_cols[["Environment"]],
`Present` = delwp_cols[["Navy"]])) +
geom_point(aes(x = X, y = Y),
size = 1.5,
shape = 21,
alpha = 1,
fill = delwp_cols2[["Water"]],
data = combined_model_data_allMethod_loc %>%
filter(Species == "Rusa unicolor" & Presence == "Absent"),
inherit.aes = FALSE) +
geom_point(aes(x = X, y = Y),
size = 2.5,
shape = 21,
alpha = 1,
fill = delwp_cols2[["FFR"]],
data = combined_model_data_allMethod_loc %>%
filter(Species == "Rusa unicolor" & Presence == "Present"),
inherit.aes = FALSE) +
ylab("Latitude") +
xlab("Longitude") +
labs(fill = "Occupancy \nprobability") +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
legend.title = element_text(size=20),
legend.text = element_text(size=15),
legend.key.size = unit(3, 'cm')) +
theme_bw()
}
# plotstatePATH(vic_preds_terra_th_LCI) +
# ggtitle("Threshold Sambar Distribution")
#
# plotstatePATH(vic_preds_terra_th_UCI) +
# ggtitle("Threshold Sambar Distribution")
thmean <- plotstatePATH(vic_preds_terra_th) +
ggtitle("Median")
th_lci <- plotstatePATH(vic_preds_terra_th_LCI) +
ggtitle("2.5% CI") +
theme(legend.position = "none")
th_uci <- plotstatePATH(vic_preds_terra_th_UCI) +
ggtitle("97.5% CI") +
theme(legend.position = "none")
th_bot <- cowplot::plot_grid(th_lci, th_uci, ncol = 2,labels = c('B', 'C'))
cowplot::plot_grid(thmean, th_bot, nrow = 2, rel_heights = c(1.5,1), labels = c('A', ''))
Figure 9: Sambar Deer Distribution using a threshold that minimises false negatives and false postives results in a distribution of 31,439 [km^2] over crown land area. Orange dots represent the site locations where Sambar Deer were detected, while blue dots are absences. Plots B and C represent the lower and upper 95 % confidence intervals respectively.
# To Compare predictions vs Gormley read in Gormley data
gormley_dist_rat <- terra::rast("data/CurrentDist.asc")
gormleySambarOcc2 <- terra::rast("data/SambarOcc2.asc")
terra::crs(gormleySambarOcc2) <- terra::crs(processed_stack)
terra::crs(vic_preds_terra_th) <- terra::crs(processed_stack)
terra::crs(gormley_dist_rat) <- terra::crs(processed_stack)
values(gormley_dist_rat)[values(gormley_dist_rat) == 0] <- NA
values(gormleySambarOcc2) <- values(gormleySambarOcc2)/100
# Use the same logistic threshold to crop gormley
gormley_th <- th_conv(gormleySambarOcc2, lte = 0.4)
gormley_th_proj <- terra::project(gormley_th, vic_preds_terra_th)
gormley_th_mask <- terra::mask(gormley_th_proj, vic_preds_terra_th)
gormley_th_anti_mask <- terra::mask(gormley_th_proj, vic_preds_terra_th, inverse = T)
gormley_dist_rat_proj <- terra::project(gormley_dist_rat, vic_preds_terra_th)
gormley_th_mask_kde <- terra::mask(gormley_th_mask, gormley_dist_rat_proj)
gormley_th_mask_df <- as.data.frame(gormley_th_mask)
gormley_th_anti_mask_df <- as.data.frame(gormley_th_anti_mask)
gormley_th_mask_df_kde <- as.data.frame(gormley_th_mask_kde, xy = TRUE, na.rm = FALSE)
gormley_dist <- gormley_th_mask_df %>%
filter(Predicted == "Present") %>%
nrow()
gormley_dist_anti <- gormley_th_anti_mask_df %>%
filter(Predicted == "Present") %>%
nrow()
gormley_dist_kde <- gormley_th_mask_df_kde %>%
filter(Predicted == "Present") %>%
nrow()
expert_elicitation <- sf::read_sf("data/Sambar_Deer_Distributions_Final_20201203")
expert_elicitation_rast <- terra::rasterize(vect(expert_elicitation %>%
mutate(Predicted = 1L) %>%
sf::st_transform(3111)), vic_preds_terra_th, field = "Predicted", background = NA)
expert_elicitation_rast_cl <- terra::mask(expert_elicitation_rast, vic_preds_terra_th)
expert_elicitation_rast_cl_df <- as.data.frame(expert_elicitation_rast_cl)
ee_dist <- expert_elicitation_rast_cl_df %>%
filter(Predicted == 1) %>%
nrow()
gormley_dist_form <- format(units::set_units(gormley_dist, "km2"), big.mark = ",")
gormley_dist_form_kde <- format(units::set_units(gormley_dist_kde, "km2"), big.mark = ",")
# Create table comparing estimates
all_preds_list <- list()
all_preds_list[["EE_2015"]] <- as.data.frame(expert_elicitation_rast_cl, xy = TRUE, na.rm = FALSE) %>%
rename(`Expert Elicitation` = Predicted)
all_preds_list[["Gormley_2011"]] <- gormley_th_mask_df_kde %>%
mutate(Predicted = case_when(Predicted == "Present" ~ 1L,
TRUE ~ 0L)) %>%
rename(`KDE + P/A + PO` = Predicted)
all_preds_list[["Median"]] <- as.data.frame(vic_preds_terra_th, xy = TRUE, na.rm = FALSE) %>%
mutate(Predicted = case_when(Predicted == "Present" ~ 1L,
TRUE ~ 0L)) %>%
rename(`Median Estimate` = Predicted)
df_distribution <- all_preds_list %>% purrr::reduce(full_join, by = c("x", "y"))
df_distribution[is.na(df_distribution)] <- 0
df_distribution_summary <- df_distribution %>%
mutate(Overlap = paste0(`Expert Elicitation`, `KDE + P/A + PO`, `Median Estimate`)) %>%
group_by(Overlap) %>%
summarise(Area = n()) %>%
filter(Overlap != "000") %>%
mutate(Name = case_when(Overlap == "001" ~ "This Study ONLY",
Overlap == "010" ~ "Gormley 2011 ONLY",
Overlap == "011" ~ "This Study and Gormley 2011",
Overlap == "100" ~ "Forsyth 2015 ONLY",
Overlap == "101" ~ "This Study and Forsyth 2015",
Overlap == "110" ~ "Gormley 2011 and Forsyth 2015",
Overlap == "111" ~ "All",
TRUE ~ "No Prediction"),
Area = units::set_units(Area, "km2")) %>%
arrange(desc(Area)) %>%
dplyr::select(Name, Area)
df_distribution_summary %>%
kbl("html", caption = "Comparison of deer range between three studies on Sambar deer", digits = 0) %>%
kable_styling(full_width = FALSE)
Name | Area |
---|---|
All | 28105 [km^2] |
Forsyth 2015 ONLY | 7035 [km^2] |
Gormley 2011 and Forsyth 2015 | 4482 [km^2] |
This Study and Forsyth 2015 | 3479 [km^2] |
This Study ONLY | 168 [km^2] |
This Study and Gormley 2011 | 37 [km^2] |
Gormley 2011 ONLY | 20 [km^2] |
all_distrib <- df_distribution %>%
mutate(Overlap = paste0(`Expert Elicitation`, `KDE + P/A + PO`, `Median Estimate`)) %>%
filter(Overlap != "000") %>%
mutate(Name = case_when(Overlap == "001" ~ "This Study ONLY",
Overlap == "010" ~ "Gormley 2011 ONLY",
Overlap == "011" ~ "This Study and Gormley 2011",
Overlap == "100" ~ "Forsyth 2015 ONLY",
Overlap == "101" ~ "This Study and Forsyth 2015",
Overlap == "110" ~ "Gormley 2011 and Forsyth 2015",
Overlap == "111" ~ "All",
TRUE ~ "No Prediction"))
ggplot() +
geom_sf(data = regions_3111, fill = "grey", inherit.aes = FALSE, alpha = 0.2, colour = "grey70") +
geom_tile(aes(x=x, y=y, fill=Name), data=vic_preds_bound, fill = "grey70", inherit.aes = FALSE, na.rm = TRUE) +
geom_tile(aes(x=x, y=y, fill=Name), data=all_distrib) +
geom_point(data = historic_deer_ala_records %>%
filter(scientificName == "Rusa unicolor") %>%
st_transform(3111) %>%
st_intersection(regions_3111) %>%
st_coordinates() %>%
as.data.frame(),
aes(x = X, y = Y),
shape = 21, fill = "Orange", size = 1) +
viridis::scale_fill_viridis(discrete=TRUE, direction=1, option="D", na.value = "transparent") +
ylab("Latitude") +
xlab("Longitude") +
labs(fill = "Predicted distribution") +
theme(axis.title = element_text(size = 20),
axis.text = element_text(size = 15),
legend.title = element_text(size=20),
legend.text = element_text(size=15),
legend.key.size = unit(3, 'cm')) +
theme_bw()
Figure 10: Comparison of threshold from this study to two previous studies estimating Sambar Deer Distribution. Atlas of Living Australia records from the past 30 years are plotted as orange points.
Deer density is anticipated to impact on vegetation measures such as seedling and sapling count. Here we investigate whether the predicted Sambar occupancy relates to seedlings or saplings via a multivariate response model.
sambarseeddata <- data.frame(SiteID = names(apply(SambarTopModel@response@y, 1, max, na.rm = T)),
Presence = apply(SambarTopModel@response@y, 1, max, na.rm = T)) %>%
bind_cols(predict(SambarTopModel, "state")) %>%
left_join(vegetation %>%
dplyr::select(SiteID, Saplings, Seedlings),
by = "SiteID") %>%
left_join(combined_spatial_data %>%
dplyr::select(SiteID, BIOREGION, EVC) %>%
mutate(SiteID = as.character(SiteID)),
by = "SiteID")
sambarseeddata %>%
group_by(Presence) %>%
summarise(Seed = median(Seedlings),
Saplings = median(Saplings))
#> # A tibble: 2 × 3
#> Presence Seed Saplings
#> <dbl> <dbl> <dbl>
#> 1 0 0 1
#> 2 1 5 8
bform1 <- bf(mvbind(Saplings, Seedlings) ~ Predicted)
bform2 <- bf(mvbind(Saplings, Seedlings) ~ Predicted + (BIOREGION|EVC))
if(!file.exists("models/vegfit1.rds")) {
vegfit1 <- brm(bform1, family = zero_inflated_poisson(), data = sambarseeddata, chains = 3, iter = 1000)
saveRDS(vegfit1, "models/vegfit1.rds")
} else {
vegfit1 <- readRDS("models/vegfit1.rds")
}
summary(vegfit1)
#> Family: MV(zero_inflated_poisson, zero_inflated_poisson)
#> Links: mu = log; zi = identity
#> mu = log; zi = identity
#> Formula: Saplings ~ Predicted
#> Seedlings ~ Predicted
#> Data: sambarseeddata (Number of observations: 130)
#> Draws: 3 chains, each with iter = 1000; warmup = 500; thin = 1;
#> total post-warmup draws = 1500
#>
#> Population-Level Effects:
#> Estimate Est.Error l-95% CI u-95% CI Rhat
#> Saplings_Intercept 3.62 0.03 3.56 3.68 1.01
#> Seedlings_Intercept 4.93 0.02 4.89 4.97 1.00
#> Saplings_Predicted 0.49 0.05 0.40 0.58 1.00
#> Seedlings_Predicted -0.39 0.03 -0.45 -0.32 1.00
#> Bulk_ESS Tail_ESS
#> Saplings_Intercept 1548 908
#> Seedlings_Intercept 1309 1051
#> Saplings_Predicted 1691 1242
#> Seedlings_Predicted 1604 1163
#>
#> Family Specific Parameters:
#> Estimate Est.Error l-95% CI u-95% CI Rhat Bulk_ESS
#> zi_Saplings 0.41 0.04 0.33 0.49 1.00 1564
#> zi_Seedlings 0.49 0.04 0.41 0.58 1.00 2020
#> Tail_ESS
#> zi_Saplings 871
#> zi_Seedlings 1046
#>
#> Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
#> and Tail_ESS are effective sample size measures, and Rhat is the potential
#> scale reduction factor on split chains (at convergence, Rhat = 1).
# pp_check(vegfit1, resp = "Seedlings")
plot(conditional_effects(vegfit1), ask = FALSE, theme = theme_classic())
Figure 11: Relationship between Predicted Sambar Deer occupancy and Seedlings/Saplings count
Figure 12: Relationship between Predicted Sambar Deer occupancy and Seedlings/Saplings count
If you see mistakes or want to suggest changes, please create an issue on the source repository.
For attribution, please cite this work as
Cally (2022, Aug. 2). Justin's Code Blog: Monitoring deer distribution, abundance and impacts across Victoria. Retrieved from https://justincally.github.io/blog/posts/2022-06-02-sambar-occupancy/
BibTeX citation
@misc{cally2022sambaraoccupancy, author = {Cally, Justin G}, title = {Justin's Code Blog: Monitoring deer distribution, abundance and impacts across Victoria}, url = {https://justincally.github.io/blog/posts/2022-06-02-sambar-occupancy/}, year = {2022} }