#####################################################################
## Ant_Nest_Density.R	                                           ##
#####################################################################
## This script compares the density of ant nests in two different  ##
## habitat types using a Monte Carlo approach (via a simple        ##
## complete randomization test), a frequentist approach (via a     ##
## parametric analysis of variance test and a Bayesian approach.   ##                                             ##
#####################################################################

rm(list=ls())
library(rethinking)
library(rstan)
library(bbmle)
##Set working directory manually or with the following command##
#setwd("insert_your_pathway")
#setwd("C:/Users/Pedro/Documents/Classes/Methods in Ecology/2013 fall") # May have to adapt that

## Part I##
## USER-DEFINED VARIABLES ##

## the name of the tab-delimited text file containing the data
file_name <- "Ant_Nest_Data.txt"

## the number of randomizations for the Monte Carlo analysis
iterations <- 5000

## PROGRAM CODE ##

## load the ant nest density data from the text file
nd <- read.table(file_name, header=T)
nd$Habitat<-as.factor(nd$Habitat)
## PLOT THE DATA

boxplot(nd$NestsPerQuadrat ~ nd$Habitat,col=c("orange","lightblue3"),
        xlab="Habitat",ylab="Number")

## ANALYSIS

## Part II: Frequentist Analysis ##

## calculate the sample mean for each habitat type
mean_forest <- mean(nd$NestsPerQuadrat[nd$Habitat=="Forest"])
mean_field <- mean(nd$NestsPerQuadrat[nd$Habitat=="Field"])
obs <-c(mean_forest,mean_field)

## create a linear model (lm) relating nests/quadrat to habitat
model <- lm(NestsPerQuadrat ~ Habitat, data = nd)

## now perform a one-way ANOVA on the model and display the results
print(anova(model))

## Part III: Monte Carlo Analysis ##

## calculate the observed test statistic, which is just the absolute
## value (abs) of the difference between the group means
diff_obs <- mean_forest - mean_field

## create an empty array to hold the test statistic for each iteration
diffs <- numeric(iterations)

## for each iteration, randomize the data and compute new test statistic
for (i in 1:iterations) {
  
  ## randomize the nest data
  Habitat_random <- sample(nd$Habitat, length(nd$Habitat))
  Nests_random <- nd$NestsPerQuadrat
  random_data <- data.frame(Habitat_random, Nests_random)
  
  ## compute the group means for the randomized data
  mean_forest <- mean(Nests_random[Habitat_random=="Forest"])
  mean_field <- mean(Nests_random[Habitat_random=="Field"])
  
  ## compute and save the test statistic
  diffs[i] <- mean_forest - mean_field
}

## show a histogram of the test statistic
hist(diffs,10)
abline(v=diff_obs, col = "red")

## calculate the tail probability
P_Mc <- length(diffs[diffs <= diff_obs])/iterations
print(P_Mc)

## Part IV: Bayesian Analysis ##


model1 <- ulam(
  alist(
    NestsPerQuadrat ~ dnorm(mu,sigma),
    mu <- a,
    a ~ dnorm(0,100),
    sigma ~ dunif(0,100)
  ),
  data = nd,chains =3, iter=4000, log_lik = TRUE
)

precis(model1,digits=2)
plot(model1,pars=c("a","sigma"))
pairs(model1, horInd = c(1:2), verInd = c(1:2))
# pairs(model1,pars=c("a","sigma"))

post1 <- extract.samples(model1)
par(mfrow=c(1,2))
plot(post1$a, type = "l")
plot(post1$sigma, type = "l")

nd$Forest <- 0
nd$Forest[nd$Habitat=="Forest"] <-1

model2 <- ulam(
  alist(
    NestsPerQuadrat ~ dnorm(mu,sigma),
    mu <- a + b*Forest,
    a ~ dnorm(0,100),
    b ~ dnorm(0,100),
    sigma ~ dunif(0,1000)
  ),
  data = nd,chains =3, iter = 4000, log_lik = TRUE 
)

compare(model1,model2)

precis(model2,digits=2)
par(mfrow=c(1,1))
plot(model2,pars=c("a","b","sigma"))
pairs(model2, horInd = c(1:3), verInd = c(1:3))
# pairs(model2,pars=c("a","sigma"))

post2 <- extract.samples(model2)
par(mfrow=c(1,3))
plot(post2$a, type = "l")
plot(post2$b, type = "l")
plot(post2$sigma, type = "l")

post <- extract.samples(model2)

par(mfrow=c(1,3),mar=c(4,4,3,2),mgp=c(2.2,0.6,0))
plot(density(post$a),xlim=c(-5,20),main="a",cex.axis=1.5,cex.main=2.1,cex.lab=1.8,lwd=1.5)
lines(density(rnorm(10000,0,100)),col="red",lwd=1.5)
plot(density(post$b),xlim=c(-20,20),main="b",cex.axis=1.5,cex.main=2.1,cex.lab=1.8,lwd=1.5)
lines(density(rnorm(10000,0,100)),col="red",lwd=1.5)
plot(density(post$sigma),xlim=c(0,10),main="sigma",cex.axis=1.5,cex.main=2.1,cex.lab=1.8,lwd=1.5)
lines(density(runif(1000000,0,100)),col="red",lwd=1.5)

sum((post$b < -4)*1)/length(post$b)

par(mfrow=c (1,2))
plot(density(post$a),xlim=c(2,18),main="field",cex.axis=1.5,cex.main=2.1,cex.lab=1.8,lwd=1.5)
plot(density(post$a+post$b),xlim=c(2,18),main="forest",cex.axis=1.5,cex.main=2.1,cex.lab=1.8,lwd=1.5)