#### 1. Getting ready #### rm(list=ls()) getwd() # Use output from this line to modify the next one ... #setwd("PATHWAY TO YOUR WORKING DIRECTORY") Hc_data <- read.table("hypericum_data_94_07.txt", header=T) Hc_pop <- read.table("popmeanHc.txt", header=T) Height_data <-Hc_data$ht_init[Hc_data$stage !="sg" & Hc_data$ht_init<90] #### 2. Plots and calculation of averages using a frequentist approach #### hist(Height_data,100,main="Histogram of Hypericum cumulicola height (cm)",xlab= "Height(cm)") abline(v=Hc_pop$pop_mean, col="blue") abline(v=mean(Height_data), col="red",lwd=3) mean(Height_data) var(Height_data) sqrt(var(Height_data)) round(sqrt(var(Height_data))/sqrt(length(Height_data)),4) summary(lm(Height_data~1)) mean.pop.mean <- mean(Hc_pop$pop_mean) mean.pop.sd <- mean(Hc_pop$pop_sd) par(mfrow=c (1,2)) hist(Hc_pop$pop_mean,8,main="population mean", xlab="height mean") abline(v = mean(Hc_pop$pop_mean), col="blue") abline(v=mean(Height_data), col="red",lwd=3) hist(Hc_pop$pop_sd^2,8,main="population variance",xlab="height variance") abline(v = mean(Hc_pop$pop_sd^2), col="blue") abline(v=var(Height_data), col="red",lwd=3) #### 3. Sampling from the dataset #### size <-10 n <- length(size) pop.mean.mean <- mean(Hc_pop$pop_mean) pop.mean.sd <- sqrt(var(Hc_pop$pop_mean)) log.pop.var.mean <- log(mean(Hc_pop$pop_sd)) log.pop.sd.sd <- log(sqrt(var(Hc_pop$pop_sd))) #### 4. Calculation of averages using a Bayesian approach #### library(rethinking) library(rstan) library(ggplot2) x <- sample(Height_data,size) samp_data <- as.data.frame(cbind(1:size,x)) colnames(samp_data) <- c("id","height") ## a) Bayesian analysis with diffuse priors ## model.difuse <- ulam( alist( height ~ dnorm(mu,sigma), mu <- a, a ~ dnorm(0,100), #sigma ~ dunif(0,100) sigma ~ dcauchy(0,1) ), data = samp_data ,chains =3, ) precis(model.difuse,digits=2) summary(lm(height~1,data=samp_data)) par(mfrow=c(1,1)) plot(model.difuse,pars=c("a","sigma")) pairs(model.difuse,pars=c("a","sigma")) vv1 <- precis(model.difuse,digits=2) post1 <- extract.samples(model.difuse) par(mfrow=c(1,2)) for(i in names(post1)){ dfpost1 <- data.frame(post1[i]) plot(as.numeric(rownames(dfpost1)), dfpost1[,1], type = "l", xlab = "Index", ylab = i) } ht <- 33 mu <- link(model.difuse,data=data.frame(height=ht)) mu.mean1 <- apply(mu,2,mean) mu.PI1 <- apply(mu,2,PI,prob=0.89) par(mfrow=c (1,2)) plot(density(post1$a), main="") lines(density(rnorm(10000,0,100)),col="red",lty=2) plot(density(post1$sigma), main="") lines(density(runif(10000,0,100)),col="red",lty=2) median(post1$sigma) ## a) Bayesian analysis with informed priors ## model.informed <- ulam( alist( height ~ dnorm(mu,sigma), mu <- a, a ~ dnorm(34.47,4.19), sigma ~ dlnorm(2.52,0.82) ), data = samp_data ,chains =3, ) par(mfrow=c(1,1)) precis(model.informed,digits=2) plot(model.informed,pars=c("a","sigma")) pairs(model.informed,pars=c("a","sigma")) vv2 <- precis(model.informed,digits=2) post2 <- extract.samples(model.informed) par(mfrow=c(1,2)) for(i in names(post2)){ dfpost2 <- data.frame(post2[i]) plot(as.numeric(rownames(dfpost2)), dfpost2[,1], type = "l", xlab = "Index", ylab = i) } ht <- 33 mu <- link(model.informed,data=data.frame(height=ht)) mu.mean2 <- apply(mu,2,mean) mu.PI2 <- apply(mu,2,PI,prob=0.89) #### 5. Comparing all approaches #### Results <- array(0,c(5,8)) colnames(Results) <- c("Mean", "StdDev", "l m 0.89", "u m 0.89", "Sigma", "Stdev", "l sd 0.89", "u sd 0.89") rownames(Results) <- c("Frequentist", "B diffuse priors","B informed priors", "Population","Overall") #Frequentist# rx <- summary(lm(x~1)) sample_mean <-mean(Hc_pop$pop_mean) Results[1,1] <- rx$coefficients[1] Results[1,2] <- rx$coefficients[2] Results[1,3] <- sample_mean - qt(0.89, size-1)*sqrt(var(x)/size) Results[1,4] <- sample_mean + qt(0.89, size-1)*sqrt(var(x)/size) Results[1,5] <- rx$sigma Results[1,6] <- NA Results[1,7] <- NA Results[1,8] <- NA #Bayesian - diffuse priors# Results[2,1] <- vv1[1,1] Results[2,2] <- vv1[1,2] Results[2,3] <- vv1[1,3] Results[2,4] <- vv1[1,4] Results[2,5] <- vv1[2,1] Results[2,6] <- vv1[2,2] Results[2,7] <- vv1[2,3] Results[2,8] <- vv1[2,4] #Bayesian - informed priors# Results[3,1] <- vv2[1,1] Results[3,2] <- vv2[1,2] Results[3,3] <- vv2[1,3] Results[3,4] <- vv2[1,4] Results[3,5] <- vv2[2,1] Results[3,6] <- vv2[2,2] Results[3,7] <- vv2[2,3] Results[3,8] <- vv2[2,4] #Population data# Results[4,1] <- sample_mean <-mean(Hc_pop$pop_mean) Results[4,2] <- NA Results[4,3] <- NA Results[4,4] <- NA Results[4,5] <- mean(Hc_pop$pop_sd) Results[4,6] <- NA sample_std_error <- sd(Height_data)/sqrt(length(Height_data)) #Individual data# rxp <- summary(lm(Height_data~1)) Results[5,1] <- mean(Height_data) Results[5,2] <- rxp$coefficients[2] Results[5,3] <- mean(Height_data) - qt(0.89, length(Height_data)-1)*sample_std_error Results[5,4] <- mean(Height_data) + qt(0.89, length(Height_data)-1)*sample_std_error Results[5,5] <- sqrt(var(Height_data)) Results[5,6] <- NA round(Results,2) par(mfrow=c(1,4)) b=seq(-10000,10000,5) b1=seq(-50,100,1) hist(rnorm(1000,0,sqrt(1e+06)),breaks=b, ylim=c(0,700),xlim=c(0,100), xlab="height",main ="diffuse prior") hist(rnorm(10000,pop.mean.mean,pop.mean.sd) ,breaks=b1,xlim=c(0,100), xlab="height",main ="informed prior") hist(Height_data,breaks=b1,xlim=c(0,100),xlab="height",main ="all data") hist(sample(Height_data,10),breaks=b1, ylim=c(0,10),xlab="height",main ="sample of data") par(mfrow=c(1,2)) hist(rcauchy(1000, location = 0, scale = 1),xlab="sd height", main ="diffuse prior",xlim=c(0,100)) hist(rlnorm(1000,log.pop.var.mean,log.pop.sd.sd),100, xlab="sd height",main ="informed prior",xlim=c(0,60)) abline(v=exp(log.pop.var.mean),col="red") ############################################################## s_p <- 100000 prior_m_no <-rnorm(s_p,0, 100) prior_m_in <-rnorm(s_p,pop.mean.mean,pop.mean.sd) post_m_no <- rnorm(s_p, 41.02, 4.56) post_m_in <- rnorm(s_p, 37.61, 3.08) ############################################################## priors <-c(prior_m_no,prior_m_in,post_m_no,post_m_in) length(priors) type <- c(rep(1,s_p),rep(2,s_p),rep(3,s_p),rep(4,s_p)) levels(type) <- c("prior_diffuse","prior informed","posterior diffuse","posterior informed") length(type) p.graph <- cbind(type,priors) p.graph <-as.data.frame(p.graph) p.graph$type <- factor(p.graph$type) levels(p.graph$type) <- c("difussed","informed","posterior_diffuse","posterior informed") qplot(p.graph$priors,geom="density",colour=p.graph$type,size=p.graph$type, xlab="Height")+ scale_color_manual(values = c("orange","red","darkgreen","blue")) + scale_size_manual(values=c(0.7,0.7,0.7,0.7))+ geom_vline(xintercept =sample_mean,lwd=1.5) + xlim(-30,80)+ylim(-0,0.15) ############################################################## priors <-c(prior_m_in,post_m_no,post_m_in) length(priors) type <- c(rep(1,s_p),rep(2,s_p),rep(3,s_p)) levels(type) <- c("prior informed","posterior diffuse","posterior informed") length(type) p.graph <- cbind(type,priors) p.graph <-as.data.frame(p.graph) p.graph$type <- factor(p.graph$type) levels(p.graph$type) <- c("informed","posterior_diffuse","posterior informed") qplot(p.graph$priors,geom="density",colour=p.graph$type,size=p.graph$type, xlab="Height")+ scale_color_manual(values = c("red","darkgreen","blue")) + scale_size_manual(values=c(0.7,0.7,0.7,0.7))+ geom_vline(xintercept =sample_mean,lwd=1.5) + xlim(-0,60)+ylim(-0,0.15) #####################################################################