###### Ordinations in R, by Dave Jenkins & Leo Ohyama, using data collected by Leo! ################## # Here we use Principal Components Analysis (PCA) and # Non-metric Multi-dimensional Scaling (NMDS) on the same data set (diplo2017.csv). # Data list ant species (columns) per site (rows). # We rely on the awesome vegan package for both types of analyses, with the goal to answer: # 1. Do ant communities differ between the two habitat types? # 2. Are certain species more associated with specific habitat types? # 3. How dissimilar are habitats in terms of beta diversity? (for NMDS) ##### Data import and set up ## Import the "diplo2017.csv". dim(diplo2017) # 16 species x 32 sites matrix, containing species abundances # The first 16 rows are sites in pine flatwoods and the next 16 are in high pine sandhill habitats<-diplo2017[,1:2] # to make a dataframe of just habitat and site columns df<-diplo2017[,3:17] #to make a dataframe of just species abundances colSums(df) # Get total counts per species # Notice that there are four species only found once # These are called singletons and are often removed from the data because they are over- # influential in the analysis library(vegan) ##### Squint at data: correlations mean ordinations can simplify # where correlations may be L-shaped graphs (where one goes high, the other goes low) pairs(df, lower.panel=NULL) ##### Evaluate multivariate homogeneity of variance: a PCA is not legit without it ## first calculate Bray-Curtis distances between samples (requires the numbers-only data) dis <- vegdist(df) groups <- habitats[[1]] # where the [[1]] returns a vector (not a df) of the first column ### Calculate multivariate dispersions homog <- betadisper(dis, groups) homog ## Perform anova of group homogeneities anova(homog) ## And a permutation test for F between groups permutest(homog, pairwise = TRUE) ## And a Tukey's HSD for multi-group differences, with a plot (homog.HSD <- TukeyHSD(homog)) plot(homog.HSD) # if 95% CI overlaps 0 there is not a sig dif in variance between groups ##### Run a PCA: could use base princomp, but here use rda in vegan (another name for unconstrained pca is redundancy analysis) antspca <- rda(df) summary(antspca) # You should report the eigenvalues and % explained for PCA plots screeplot(antspca, bstick=T)# How many axes are enough to capture the story? Look for a break in the plot below the broken stick line # Now plot the PCA biplot(antspca) # black are sites, red are species vectors ordihull(antspca, group=habitats\$Habitat) # simple "hull: around points of groups ### Now the questions: # 1. Do ant communities differ between the two habitat types? # 2. Are certain species more associated with specific habitat types? # Note that we cannot really address Question 3 yet ##### Run NMDS on the df data, again in vegan, and needing no assumptions tests # We run the nmds analysis using the metaMDS() function antsmds=metaMDS(df, k=2, trymax=1000, autotransform = F, distance = 'bray') # Where: # k indicates the number of dimensions we choose to ordinate the data # trymax= indicates the number of runs the analysis is done with, it will # stop when the analysis successfully runs. Note the analysis ran till the 20th run # autotransform applies a transformation to the data if it's set to true # distance specifies which measure of dissimilarity we use to ordinate # Bray-Curtis dissimilarity index is used in this case because we deal with abundance-based data # This is a strong measure of beta diversity antsmds\$stress # you must report the stress value of the result (how well the analysis was able to ordinate the points in multivariate space) stressplot(antsmds) # a good visual of stress; ideally a linear step-wise increase #acceptable stress is seen between 0.10 to 0.20 although this is debatable # Try increasing k to reduce stress, which then calls for more axes in plots # Is the plot more linear? #For the sake of plotting let's go with the first analysis at k=2 plot(antsmds) #not really informative yet but can see what will be used next habs=c(rep("Flatwoods",16),rep("Sandhills",16)) #create an object specifying habitat types #used in the sampling. Note these correspond to each row, the first 16 rows are sites in #pine flatwoods and the next 16 were in high pine sandhill colors=c(rep("dodgerblue3",16),rep("grey",16)) #make another object specifying colors ordiplot(antsmds,type="n", main = "NMDS Between Habitats") #code above gives us a blank nmds plot with a title, to see what next plot is using ordispider(antsmds, groups = habs) # Plots lines from each habitat's centroid to its sites points(antsmds, cex = 1, pch = c(rep(15,16), rep(16,16)), col = c(rep("dodgerblue3",16), rep("grey",16))) # Greater distance in the plot indicates greater Bray-Curtis dissimilarity (beta diversity) # For species position in this space: orditorp(antsmds,display="species",col="black",air=0.6, cex=0.5) ### Now the questions again: # 1. Do ant communities differ between the two habitat types? # 2. Are certain species more associated with specific habitat types? # 3. How dissimilar are habitats in terms of beta diversity? # Let's get a littly fancy with the graphics for(i in unique(habs)){ ordihull(antsmds\$point[grep(i,habs),],draw="polygon", groups=habs[habs==i], col=colors[grep(i,habs)], alpha = 0.3,label=F)} orditorp(antsmds,display="species",col="black",air=0.6, cex=0.5) legend("topright", legend = c("Flatwoods", "Sandhill"), col = c("dodgerblue3", "grey"), pch = 15, bty = "l", cex = 1, text.col = c("black", "black"), xpd = T) #the polygon colors may be slightly overboard because the separation of sites and habitats #based on beta diversity is pretty clear #note the ordispider function is better for analyses run at 2+ dimensions or k > 2