# This set of commands calculate means, standard deviations, and confidence intervals for a data set # Written by Dave Jenkins, Oct 2018, based on code at http://www.cookbook-r.com/Manipulating_data/Summarizing_data # First run the entire summarySE function - through the } sign at line 32 summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { require(plyr) # New version of length which can handle NA's: if na.rm==T, don't count them length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # This does the summary. For each group's data frame, return a vector with # N, mean, and sd datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm) ) }, measurevar ) # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) datac\$se <- datac\$sd / sqrt(datac\$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval/2 + .5, datac\$N-1) datac\$ci <- datac\$se * ciMult return(datac) } # Now tell summarySE to work on some data. For example, Time ~ Group within the data file. # NOTICE that this assumes you have already loaded a file called "data" that contains these variables. summarySE(data, measurevar="Time", groupvars=c("Group"), na.rm=TRUE) # Here we use a print function to make a data frame containing the results of summarySE result <- print(summarySE(data, measurevar="Time", groupvars=c("Group"), na.rm=TRUE)) # And now graph the result file attach(result) # for convenience # now we plot outcomes, as means with CIs require(ggplot2) require(cowplot) require(gridExtra) # for SEs plot1 <- ggplot(result, aes(x = Group, y = Time)) + geom_errorbar(aes(ymin = Time-se, ymax = Time+se), width = 0.05, size = 0.5) + geom_point() + ylim(3,6) + ylab("Time (means & SEs)") + xlab("Groups") # for 95% CIs plot2 <- ggplot(result, aes(x = Group, y = Time)) + geom_errorbar(aes(ymin = Time-ci, ymax = Time+ci), width = 0.05, size = 0.5) + geom_point() + ylim(3,6) + ylab("Time (means & 95% CIs)") + xlab("Groups") # for SDs plot3 <- ggplot(result, aes(x = Group, y = Time)) + geom_errorbar(aes(ymin = Time-sd, ymax = Time+sd), width = 0.05, size = 0.5) + geom_point() + ylim(3,6) + ylab("Time (means & SDs)") + xlab("Groups") grid.arrange(plot1, plot2, plot3, nrow=1, ncol=3) # Notice how appearances change with different error bars? Only 95% CIs will translate well to inferred differences. # Also see http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/ # for plotting using ggplot2