# R code written by Melanie M. Wall - first version 10/29/2010
#                                     last updated 11/19/2010 when
#                                     a user defined random seed was added

# This program performs a non-parametric bootstrap for the Total Information
# Area Index (TIA) in order to provide a confidence interval for the TIA that 
# can then be compared across different item sets

# This program calculates the Total Information Function and calculates the 
# TIA (Total information area index) for the 
# 2 parameter logistic IRT model.  The TIA is calculated
# as the integral of the total test information function.  
# It can equivalently be calculated
# as the sum of the p discrimination parameters (where p is the number of items in the set).

# Note the TIA reflects the total information across the entire (-infinity, +infinity) latent
# trait, but it is straightforward to adapt the index (and the associated bootstrap in this
# program) in order to summarize the Total information across 
# some prespecified region of the latent trait - This is done in the function 
# below by specifying the lower and upper range parameters to something
# other than the default which are (-10,10).  The default goes
# from -10 to 10 since this is effectively the same as -infinity to +infinity on
# the logit scale.

# This program utilizes the 'ltm' latent trait modeling package, so it must first be loaded
# onto your system from the CRAN site for the program to work


library(ltm)


################Beginning of the function called boot.irt.info ##########

boot.irt.info<-function(dataset,number.boot.samples,trait.lower.range,trait.upper.range,myseed){

###################  Inputs      #################################################################
###
### dataset:             should be an nXp matrix or dataframe containing only the n individuals
###                      and p variables you want to use to fit your 2 parameter logistic IRT model
###
### number.boot.samples: is a number indicating the total number of bootstrap samples
###                      you want to use to obtain the confidence interval.  The larger the value,
###                      the longer the program takes to run but the more reliable the bootstrap 
###                      confidence interval will be.  It is typical to take somewhere between
###                      200 to 1000 bootstrap samples. WARNING: It is recommended that a smaller value
###                      (e.g. 10) is used first in order to gauge computational time.  Running 500 bootstrap
###                      re-samples takes approximately 8 mintues on a standard PC for a sample with n=700
###                      and p=12 variables.  
###           
### trait.lower.range:   to obtain TIA across the whole range of the trait use
### trait.upper.range    the values -10,10.  A restricted range can also be given
###                      by simply giving different values, e.g. 0,10 would
###                      summarize the total information on the positive end of the trait
###
### myseed:              The user should provide a value for the random seed so that the bootstrap
###                      results which randomly resample the data can be replicated
###                      The seed can be any integer, e.g. 54321
###
##############################################################################################

###########################################################################
#### The following uses the ltm function to obtain the ML estimators of the 
#### discrimination and severity parameters for the observed data
###########################################################################

parms.of.observed = ltm(dataset ~ z1,IRT.param=TRUE)
coef.observed<-coef(parms.of.observed)

##########################################################################
### The following makes the plot of the total information curves
### The test information curve is the sum of item information curves and
### provides a visual way to examine where the item set, i.e. "test",
### yields the most precise estimate of the underlying trait.  

### Often the location of the peak is 
### referred to as the place along the trait where the item set is
### "most discriminating", e.g. (Reise and Waller 2002).
###  Reise, S. andWaller, N. (2002) Item response theory for dichotomous assessment data. In Drasgow,
### F. and Schmitt, N., editors, Measuring and Analyzing Behavior in Organizations. San Francisco:
### Jossey-Bass.  
#############################################################################

plotall<-plot(parms.of.observed , type = "IIC", items = 0,xlab = "Latent trait")


###############################################################################
###
### The following calculates the Total Information Area Index
###
### To get the total information, the information function in the ltm package uses the 
### integrate() function built into R
###############################################################################

tia.original<-information(parms.of.observed,range=c(trait.lower.range,trait.upper.range))$InfoTotal


###############################################################################
###
### The following performs the Bootstrap for the Total Information Area Index
###
###
##############################################################################

set.seed(myseed) 

nb<-number.boot.samples
n<-dim(dataset)[1]

outinfo.boot<-rep(0,nb)

for (i in c(1:nb)){
sample.individuals<-sample(c(1:n),n, replace = T)
boot.sample<-dataset[sample.individuals,]
parms.boot = ltm(boot.sample ~ z1,na.action=NULL,IRT.param=TRUE)
###If I want to add a plot of all the information curves do it here
outinfo.boot[i]<-information(parms.boot,range=c(trait.lower.range,trait.upper.range))$InfoTotal
}

##### Note you can change the percentiles of the confidence interval you want to use
##### here the default is 2.5 to 97.5 thus giving a 95% confidence interval

q1<-quantile(outinfo.boot,c(.025,.975))

allout<-list(coef.observed=coef.observed,tia.original=tia.original, boot.confint.tia = q1)
return(allout)

}


########## End of the Function ############


###################  Here is an example of how to run the function called boot.irt.info.  
### The example data is simulated data created below and stored in the
### object called "symptoms".  The data has n=700 individuals and with p=12 dichotomous symptoms each
### 500 bootstrap re-samples are used to create a bootstrap 95% confidence interval of the TIA
### across the entire range of the latent trait, i.e. from -10 to 10

#######Example - simulated data
set.seed(123499)
n<-700
prob<-matrix(rep(0,n*12),ncol = 12)
symptoms<-matrix(rep(99,n*12),ncol = 12)
f<-rnorm(n,0,1)
disc<-c(1,1,1,1,1,1,1,1,1,1,1,1)
severity<-c(-1.5,-1.5,-1,-1,0,0,0,0,1,1,1.5,1.5)
for (i in c(1:n)){
prob[i,]<- 1/(1+exp(-disc*(f[i]-severity)))
symptoms[i,]<-rbinom(12,1,prob[i,])
}

#######This is the way to call the function - this takes about 8 minutes to run, of course
#######time will vary based on speed and memory of your computer

boot.irt.info(symptoms,200,-10,10,12345)


#Here are the results
#$coef.observed
#             Dffclt    Dscrmn
#Item 1  -1.55058371 1.2111926
#Item 2  -1.53095794 1.0543602
#Item 3  -1.24211903 0.8660448
#Item 4  -1.27343552 0.7020015
#Item 5   0.05502064 0.9345833
#Item 6  -0.19743686 1.1093184
#Item 7   0.08471676 0.8292031
#Item 8  -0.01683570 1.0667659
#Item 9   1.04264012 0.8543570
#Item 10  0.97723563 0.9642812
#Item 11  1.62733107 0.8793472
#Item 12  1.44888562 1.1536152
#
#$tia.original
#[1] 11.62063
#
#$boot.confint.tia
#    2.5%    97.5% 
#10.65383 12.76426