### Part I: Preparing the data rm(list=ls()) getwd() #setwd("pathway") # Read the Hypericum survival data from file orig_data <- read.table("Hypericum_survival.txt", header=T) attach(orig_data) # Change variables to include only reproductive individuals survival <- survival[rep_structures>0] rep_structures <- rep_structures[rep_structures>0] ### Part II: Logistic regression with a Frequentist approach # A. Construct a logistic regression model of survival vs. reproductive structures model1 <- glm(survival ~ rep_structures, family=binomial) summary(model1) # Calculate the G^2 statistic from the deviance values and get the p-value by comparing to the Chi-square distribution G_sq <- model1$null.deviance - model1$deviance pchisq(G_sq, 1, lower.tail=F) # Plot model 1 par(mfrow=c(1,2)) # LEFT: in the natural scale x_values_rs <- seq(0, max(rep_structures), 1) y_values_rs <- predict(model1, list(rep_structures=x_values_rs), type="response") plot(rep_structures, survival,type="n",ylab="P(survival)") rug(jitter(rep_structures[survival==0]),ticksize = 0.03,) rug(jitter(rep_structures[survival==1]),ticksize = 0.03, side=3) lines(x_values_rs, y_values_rs) b <- seq(0, max(rep_structures),30) z <- cut(rep_structures, b) prebyden <- tapply(survival,z,sum) tab <- table(z) probs <-prebyden/tab probs <- as.vector(probs) points(b[2:length(b)],probs,pch=16,cex=1) se2<-sqrt(probs*(1-probs)/tab) up2 <-probs+as.vector(se2) down2 <-probs-as.vector(se2) for (i in 1:length(b-1)){ lines(c(b[i+1],b[i+1]),c(up2[i],down2[i]))} # Add confidence intervals MyData <- expand.grid(x_values_rs) X <- model.matrix(~x_values_rs,data=MyData) eta <- X%*%coef(model1) MyData$pi <- exp(eta)/ (1+exp(eta)) SE <- sqrt(diag(X%*%vcov(model1)%*%(t(X)))) MyData$SEup <- exp(eta+1.96*SE)/(1+exp(eta+1.96*SE)) MyData$SElow <- exp(eta-1.96*SE)/(1+exp(eta-1.96*SE)) lines(MyData$Var1,MyData$SEup,lty=2) lines(MyData$Var1,MyData$SElow,lty=2) # RIGHT: in the logit scale coef <-model1$coefficients plot(b[2:length(b)],log(probs/(1-probs)),xlab="# reproductive structures") abline(a=coef[1],b=coef[2]) # Use model1 to predict survival probability of plant with 500 reproductive structures rs <- 500 coeffs <- summary(model1)$coefficients odds_ratio <- coeffs[1,1] + coeffs[2,1]*rs prob <- 1/(1 + (1/exp(odds_ratio))) prob # B. Construct a logistic regression model of survival vs. log(reproductive structures) rep <- log(rep_structures) model2 <-glm(survival ~ rep, family=binomial) summary(model2) # Calculate the G^2 statistic from the deviance values and get the p-value by comparing to the Chi-square distribution G_sq <- model2$null.deviance - model2$deviance pchisq(G_sq, 1, lower.tail=F) # Plot model 2 par(mfrow=c(1,2)) # LEFT: in the natural scale x_values_lrs <- seq(0, max(rep)+1, 1) y_values_lrs <- predict(model2, list(rep=x_values_lrs), type="response") plot(rep, survival,type="n",ylab="P(survival)",xlab="log(# reproductive structures)") rug(jitter(rep[survival==0]),ticksize = 0.03,) rug(jitter(rep[survival==1]),ticksize = 0.03, side=3) lines(x_values_lrs, y_values_lrs) b <- seq(0, max(rep),0.5) z <- cut(rep, b) prebyden <- tapply(survival,z,sum) tab <- table(z) probs <-prebyden/tab probs <- as.vector(probs) points(b[2:length(b)],probs,pch=16,cex=1) se2<-sqrt(probs*(1-probs)/tab) up2 <-probs+as.vector(se2) down2 <-probs-as.vector(se2) for (i in 1:length(b-1)){ lines(c(b[i+1],b[i+1]),c(up2[i],down2[i]))} # Add confidence intervals MyData <- expand.grid(x_values_lrs) X <- model.matrix(~x_values_lrs,data=MyData) eta <- X%*%coef(model2) MyData$pi <- exp(eta)/ (1+exp(eta)) SE <- sqrt(diag(X%*%vcov(model2)%*%(t(X)))) MyData$SEup <- exp(eta+1.96*SE)/(1+exp(eta+1.96*SE)) MyData$SElow <- exp(eta-1.96*SE)/(1+exp(eta-1.96*SE)) lines(MyData$Var1,MyData$SEup,lty=2) lines(MyData$Var1,MyData$SElow,lty=2) # RIGHT: in the logit scale coef <-model2$coefficients plot(b[2:length(b)],log(probs/(1-probs)),xlab="log(reproductive structures)") abline(a=coef[1],b=coef[2]) # Use model2 to predict survival probability of plant with 500 reproductive structures rs <- log(500) coeffs <- summary(model2)$coefficients odds_ratio <- coeffs[1,1] + coeffs[2,1]*rs prob <- 1/(1 + (1/exp(odds_ratio))) prob # C. Compare and plot the two models AIC(model1,model2) par(mfrow=c(1,1)) plot(rep_structures, survival,type="n",ylab="P(survival)",xlab="# reproductive structures") rug(jitter(rep_structures[survival==0]),ticksize = 0.03,) rug(jitter(rep_structures[survival==1]),ticksize = 0.03, side=3) x_values_rs <- seq(0, max(rep_structures),1) y_values_rs <- predict(model1, list(rep_structures=x_values_rs), type="response") lines(x_values_rs, y_values_rs, col="blue") x_values_lrs <- seq(0, max(rep), 0.1) y_values_lrs <- predict(model2, list(rep=x_values_lrs), type="response") lines(exp(x_values_lrs), y_values_lrs, col="red") b <- seq(0, max(rep_structures),30) z <- cut(rep_structures, b) prebyden <- tapply(survival,z,sum) tab <- table(z) probs <-prebyden/tab probs <- as.vector(probs) points(b[2:length(b)],probs,pch=16,cex=1) se2<-sqrt(probs*(1-probs)/tab) up2 <-probs+as.vector(se2) down2 <-probs-as.vector(se2) for (i in 1:length(b-1)){ lines(c(b[i+1],b[i+1]),c(up2[i],down2[i]))} # Use the predict function to check our result new_data <- data.frame(list(rep=log(500))) predict(model2, newdata=new_data, type="response") ###Part III: Logistic regression with a Bayesian approach # A. Untransformed (model 1) library(rjags) n <- NROW(rep_structures) x <- rep_structures y <- survival model = "Logmodel_1.R" # Bundle data # Bundle data win.data=list( x=x, y=y, n=n ) zeta1 <- 0.01 zeta2 <- 0.02 # Inits inits=list( list(b0=runif(1,zeta1,zeta2), b1=runif(1,zeta1,zeta2)), list(b0=runif(1,zeta1,zeta2), b1=runif(1,zeta1,zeta2)), list(b0=runif(1,zeta1,zeta2), b1=runif(1,zeta1,zeta2)) ) # Parameters to estimate params <- c("b0","b1","prob_est") # MCMC settings nc = 3 ni=10000 nb=1000 # MCMC settings, start Gibbs sampler and plot results and diagnostics jm=jags.model(model,data=win.data,inits=inits,n.chains=nc,n.adapt=nb) update(jm, n.iter=ni) zc=coda.samples(jm,variable.names=params,n.iter=10000) # Display results gelman.diag(zc) plot(zc) summary(zc) out1<- summary(zc) # B. Log-transformed (model 2) model = "Logmodel_2.R" # Bundle data win.data_1=list( x=rep, y=survival, n=n ) # MCMC settings nc = 3 ni=10000 nb=1000 # MCMC settings, start Gibbs sampler and plot results and diagnostics jm1=jags.model(model,data=win.data_1,inits=inits,n.chains=nc,n.adapt=nb) update(jm1, n.iter=ni) zc1=coda.samples(jm1,variable.names=params,n.iter=10000) # Display results gelman.diag(zc1) plot(zc1) summary(zc1) out2 <- summary(zc1) # Plot Bayesian models with credibility envelopes and frequentist models together (WARNING: may take a few minutes) estim1 <- estim2 <- array(0,c(140,100)) x <- seq(1,1400,10) s1 <- round(runif(100)*1000,0) s2 <- round(runif(100)*1000,0) for(i in 1:100){ for(j in 1:140){ estim1[j,i] <- 1/(1 + (1/exp(as.numeric(zc[[3]][s1[i],1]) + as.numeric(zc[[3]][s1[i],2])*x[j]))) estim2[j,i] <- 1/(1 + (1/exp(as.numeric(zc1[[3]][s2[i],1]) + as.numeric(zc1[[3]][s2[i],2])*log(x[j])))) } } plot(x_values_rs, y_values_rs, col="blue", type="l",xlab ="Number of reproductive structures", ylab ="Probability") for (i in 1:100){ lines(exp(log(seq(1,1400,10))),estim1[,i],type="l",col="gray") lines(seq(1,1400,10),estim2[,i],type="l",col="cyan") } points(exp(x_values_lrs), y_values_lrs, col="red",type="l") points(x_values_rs, y_values_rs, col="blue", type="l") b <- seq(0, max(rep_structures),30) z <- cut(rep_structures, b) prebyden <- tapply(survival,z,sum) tab <- table(z) probs <-prebyden/tab probs <- as.vector(probs) points(b[2:length(b)],probs,pch=16,cex=1) se2<-sqrt(probs*(1-probs)/tab) up2 <-probs+as.vector(se2) down2 <-probs-as.vector(se2) for (i in 1:length(b-1)){ lines(c(b[i+1],b[i+1]),c(up2[i],down2[i]))} # Detach the data detach(orig_data)