# R code written by Melanie M. Wall - first version 10/29/2010 # last updated 11/19/2010 when # a user defined random seed was added # This program performs a non-parametric bootstrap for the Total Information # Area Index (TIA) in order to provide a confidence interval for the TIA that # can then be compared across different item sets # This program calculates the Total Information Function and calculates the # TIA (Total information area index) for the # 2 parameter logistic IRT model. The TIA is calculated # as the integral of the total test information function. # It can equivalently be calculated # as the sum of the p discrimination parameters (where p is the number of items in the set). # Note the TIA reflects the total information across the entire (-infinity, +infinity) latent # trait, but it is straightforward to adapt the index (and the associated bootstrap in this # program) in order to summarize the Total information across # some prespecified region of the latent trait - This is done in the function # below by specifying the lower and upper range parameters to something # other than the default which are (-10,10). The default goes # from -10 to 10 since this is effectively the same as -infinity to +infinity on # the logit scale. # This program utilizes the 'ltm' latent trait modeling package, so it must first be loaded # onto your system from the CRAN site for the program to work library(ltm) ################Beginning of the function called boot.irt.info ########## boot.irt.info<-function(dataset,number.boot.samples,trait.lower.range,trait.upper.range,myseed){ ################### Inputs ################################################################# ### ### dataset: should be an nXp matrix or dataframe containing only the n individuals ### and p variables you want to use to fit your 2 parameter logistic IRT model ### ### number.boot.samples: is a number indicating the total number of bootstrap samples ### you want to use to obtain the confidence interval. The larger the value, ### the longer the program takes to run but the more reliable the bootstrap ### confidence interval will be. It is typical to take somewhere between ### 200 to 1000 bootstrap samples. WARNING: It is recommended that a smaller value ### (e.g. 10) is used first in order to gauge computational time. Running 500 bootstrap ### re-samples takes approximately 8 mintues on a standard PC for a sample with n=700 ### and p=12 variables. ### ### trait.lower.range: to obtain TIA across the whole range of the trait use ### trait.upper.range the values -10,10. A restricted range can also be given ### by simply giving different values, e.g. 0,10 would ### summarize the total information on the positive end of the trait ### ### myseed: The user should provide a value for the random seed so that the bootstrap ### results which randomly resample the data can be replicated ### The seed can be any integer, e.g. 54321 ### ############################################################################################## ########################################################################### #### The following uses the ltm function to obtain the ML estimators of the #### discrimination and severity parameters for the observed data ########################################################################### parms.of.observed = ltm(dataset ~ z1,IRT.param=TRUE) coef.observed<-coef(parms.of.observed) ########################################################################## ### The following makes the plot of the total information curves ### The test information curve is the sum of item information curves and ### provides a visual way to examine where the item set, i.e. "test", ### yields the most precise estimate of the underlying trait. ### Often the location of the peak is ### referred to as the place along the trait where the item set is ### "most discriminating", e.g. (Reise and Waller 2002). ### Reise, S. andWaller, N. (2002) Item response theory for dichotomous assessment data. In Drasgow, ### F. and Schmitt, N., editors, Measuring and Analyzing Behavior in Organizations. San Francisco: ### Jossey-Bass. ############################################################################# plotall<-plot(parms.of.observed , type = "IIC", items = 0,xlab = "Latent trait") ############################################################################### ### ### The following calculates the Total Information Area Index ### ### To get the total information, the information function in the ltm package uses the ### integrate() function built into R ############################################################################### tia.original<-information(parms.of.observed,range=c(trait.lower.range,trait.upper.range))$InfoTotal ############################################################################### ### ### The following performs the Bootstrap for the Total Information Area Index ### ### ############################################################################## set.seed(myseed) nb<-number.boot.samples n<-dim(dataset)[1] outinfo.boot<-rep(0,nb) for (i in c(1:nb)){ sample.individuals<-sample(c(1:n),n, replace = T) boot.sample<-dataset[sample.individuals,] parms.boot = ltm(boot.sample ~ z1,na.action=NULL,IRT.param=TRUE) ###If I want to add a plot of all the information curves do it here outinfo.boot[i]<-information(parms.boot,range=c(trait.lower.range,trait.upper.range))$InfoTotal } ##### Note you can change the percentiles of the confidence interval you want to use ##### here the default is 2.5 to 97.5 thus giving a 95% confidence interval q1<-quantile(outinfo.boot,c(.025,.975)) allout<-list(coef.observed=coef.observed,tia.original=tia.original, boot.confint.tia = q1) return(allout) } ########## End of the Function ############ ################### Here is an example of how to run the function called boot.irt.info. ### The example data is simulated data created below and stored in the ### object called "symptoms". The data has n=700 individuals and with p=12 dichotomous symptoms each ### 500 bootstrap re-samples are used to create a bootstrap 95% confidence interval of the TIA ### across the entire range of the latent trait, i.e. from -10 to 10 #######Example - simulated data set.seed(123499) n<-700 prob<-matrix(rep(0,n*12),ncol = 12) symptoms<-matrix(rep(99,n*12),ncol = 12) f<-rnorm(n,0,1) disc<-c(1,1,1,1,1,1,1,1,1,1,1,1) severity<-c(-1.5,-1.5,-1,-1,0,0,0,0,1,1,1.5,1.5) for (i in c(1:n)){ prob[i,]<- 1/(1+exp(-disc*(f[i]-severity))) symptoms[i,]<-rbinom(12,1,prob[i,]) } #######This is the way to call the function - this takes about 8 minutes to run, of course #######time will vary based on speed and memory of your computer boot.irt.info(symptoms,200,-10,10,12345) #Here are the results #$coef.observed # Dffclt Dscrmn #Item 1 -1.55058371 1.2111926 #Item 2 -1.53095794 1.0543602 #Item 3 -1.24211903 0.8660448 #Item 4 -1.27343552 0.7020015 #Item 5 0.05502064 0.9345833 #Item 6 -0.19743686 1.1093184 #Item 7 0.08471676 0.8292031 #Item 8 -0.01683570 1.0667659 #Item 9 1.04264012 0.8543570 #Item 10 0.97723563 0.9642812 #Item 11 1.62733107 0.8793472 #Item 12 1.44888562 1.1536152 # #$tia.original #[1] 11.62063 # #$boot.confint.tia # 2.5% 97.5% #10.65383 12.76426