# This code is for use in the freely available statistical programming # language 'R', accompanies Lloyd et al. 2008 (Dinosaurs and the Cretaceous # Terrestrial Revolution. Proc. Roy. Soc. B, 275, 2483-2490) and is based on code # taught by John Alroy as part of the PBDB intensive summer course, details # at: http://paleodb.org/ # # To use the code you must first install 'R' for your machine (Mac, Windows # & Linux versions are available) from: http://cran.r-project.org/ # You will also need to install the 'plotrix' library, which can be done # from within R. # # To run the code simply copy and paste this entire file into R (you may have # to hit enter) and it should run without a hitch. It will take a little while to # perform all the various calculations, but should eventually output a file # called 'Plots.pdf' which can be opened with Adobe Reader, available from: # http://www.adobe.com/ # # If you have any problems then feel free to contact me at: # graemetlloyd@gmail.com # First import dataset from web: data<-read.table("http://www.graemetlloyd.com/pubdata/dinorare.txt",header=T) # Load libraries: library(base) library(stats) library(graphics) library(plotrix) # Set number of trials to perform: trials<-1000 # Find unique time bins: uniquebins<-unique(data[,"Time_Bin"]) # How many occurrences in each bin?: binsize<-vector(mode="numeric",length=length(uniquebins)) names(binsize)<-uniquebins for (i in 1:length(data[,"Time_Bin"])) binsize[match(data[i,"Time_Bin"],uniquebins)]<-binsize[match(data[i,"Time_Bin"],uniquebins)]+1 # Set quota as smallest bin: quota<-min(binsize) # Create variables to store subsamples<-matrix(0,ncol=length(uniquebins),nrow=trials) rawdiversity<-vector(mode="numeric",length=length(uniquebins)) meansubsampleddiversity<-vector(mode="numeric",length=length(uniquebins)) # Main loop (performs actual subsampling, may take a while to run): seen<-vector(mode="numeric") for (i in 1:length(uniquebins)) { if (i == length(uniquebins)) occurrences<-data[match(uniquebins,as.vector(data[,"Time_Bin"]))[i]:length(data[,1]),"Taxon_Name"] if (i < length(uniquebins)) occurrences<-data[match(uniquebins,as.vector(data[,"Time_Bin"]))[i]:match(uniquebins,as.vector(data[,"Time_Bin"]))[i+1],"Taxon_Name"] rawdiversity[i]<-length(unique(occurrences)) for (j in 1:trials) { sampledGenera<-0 pool<-occurrences occsLeft<-length(pool) seen[1:10000]<-0 for (k in 1:quota) { cellNo<-floor(runif(1,min=1,max=(occsLeft))) if (seen[pool[cellNo]] == 0) { sampledGenera<-sampledGenera+1 seen[pool[cellNo]]<-1 } pool[cellNo]<-pool[occsLeft] occsLeft<-occsLeft-1 } subsamples[j,i]<-sampledGenera } meansubsampleddiversity[i]<-mean(subsamples[,i]) } # Diversification: diversify<-matrix(0,nrow=trials,ncol=length(uniquebins)-1) meandiversify<-vector(mode="numeric",length=length(uniquebins)-1) binlengths<-c(12.2,13.45,16,16.95,15.7,15.35,14.1,12.7,14.25,17.05) for (i in 1:length(uniquebins)-1) { for (j in 1:trials) diversify[j,i]<-((subsamples[j,i+1]-subsamples[j,i])/subsamples[j,i])/binlengths[i] meandiversify[i]<-mean(diversify[,i]) } # Create sorted versions: subsamples_sorted<-subsamples for (i in 1:length(uniquebins)) subsamples_sorted[,i]<-sort(subsamples[,i]) diversify_sorted<-diversify for (i in 1:length(uniquebins)-1) diversify_sorted[,i]<-sort(diversify[,i]) # Get upper and lower 95% bound values: upperdiversify<-vector(mode="numeric",length=length(uniquebins)-1) lowerdiversify<-vector(mode="numeric",length=length(uniquebins)-1) for (i in 1:length(uniquebins)-1) upperdiversify[i]<-diversify_sorted[floor(0.975*trials),i] for (i in 1:length(uniquebins)-1) lowerdiversify[i]<-diversify_sorted[ceiling(0.025*trials),i] # Calculate ghost diversity: ghostdiversity<-c(9,14,19,19,19,40,49,44,37,43,0) # These are the number of ghost lineages in each time bin ghostdiversity<-ghostdiversity+rawdiversity # Added to raw diversity gives estimated diversity # Now get diversification values for raw and ghost-corrected data: rawdiversify<-vector(mode="numeric",length=length(uniquebins)-1) ghostdiversify<-vector(mode="numeric",length=length(uniquebins)-1) for (i in 1:length(uniquebins)-1) rawdiversify[i]<-((rawdiversity[i+1]-rawdiversity[i])/rawdiversity[i])/binlengths[i] for (i in 1:length(uniquebins)-1) ghostdiversify[i]<-((ghostdiversity[i+1]-ghostdiversity[i])/ghostdiversity[i])/binlengths[i] # To create percentages multiple all by 100: meandiversify<-meandiversify*100 upperdiversify<-upperdiversify*100 lowerdiversify<-lowerdiversify*100 rawdiversify<-rawdiversify*100 ghostdiversify<-ghostdiversify*100 # Finish by creating a multi-page PDF file with plots (including Figure 2): plottimes<-c(222.25,210.05,196.6,180.6,163.65,147.95,132.6,118.5,105.8,91.55,74.5) pdf(file="Plots.pdf",paper="a4r",width=11,height=8) plot(plottimes,rawdiversity,ylim=c(0,max(rawdiversity)),type="l",xlim=c(max(plottimes),min(plottimes)),ylab="Species richness",xlab="Time (Ma)",main="Results from different diversity measures",col="blue",lwd=2,sub="(blue=raw diversity, green=ghost-range corrected diversity, red=subsampled diversity)") points(plottimes,ghostdiversity,type="l",col="green",lwd=2) points(plottimes,meansubsampleddiversity,type="l",col="red",lwd=2) plotCI(plottimes,meansubsampleddiversity,liw=(subsamples_sorted[ceiling(0.025*trials),]-meansubsampleddiversity)*-1,uiw=subsamples_sorted[floor(0.975*trials),]-meansubsampleddiversity,xlim=c(max(plottimes),min(plottimes)),ylim=c(0,max(subsamples)),ylab="Species richness",xlab="Time (Ma)",sfrac=0.005,main="Subsampled Diversity",sub="(error bars indicate 95% bounds)") plot(plottimes[2:length(plottimes)],rawdiversify,xlim=c(sort(plottimes,decreasing=T)[2],min(plottimes)),ylim=c(min(c(rawdiversify,ghostdiversify,meandiversify)),max(c(rawdiversify,ghostdiversify,meandiversify))),type="l",col="blue",lwd=2,xlab="Time (Ma)",ylab="% Change in richness per million years",main="Diversification Rates",sub="(blue=raw data, green=ghost-range corrected data, red=subsampled data, with 95% bounds)") points(plottimes[2:length(plottimes)],ghostdiversify,col="green",lwd=2,type="l") points(plottimes[2:length(plottimes)],meandiversify,col="red",lwd=2,type="l") points(plottimes[2:length(plottimes)],upperdiversify,col="red",lwd=1,type="l") points(plottimes[2:length(plottimes)],lowerdiversify,col="red",lwd=1,type="l") dev.off()