/*====================================================== MODIFIED MANTEL-HAENSZEL STATISTIC FOR CORRELATED DATA. ====================================================== INSTRUCTIONS FOR USE OF SAS MACRO PROGRAM: ========================================= Data must be inputted in the following order: (1) ID (2) RESPONSE (3) EXPOSURE (4) STRATUM The following commands can be used to open up the dataset and invoke the macro MHADJUST. You need not directly enter the macro into your SAS program. Instead, use the %INCLUDE command as shown below. Note: The macro will delete records with any missing observations. A warning message will then be printed. References: Begg (1999), Biometrics 55: 302-307. Begg & Paykin (2001), Journal of Statistical Computation and Simulation 70: 175-195. START HERE=====> DATA mhdat; INFILE 'C:\yourdata.dat'; INPUT ID RESPONSE EXPOSURE STRATUM; RUN; %INCLUDE 'C:\MHADJUST.SAS'; %MHADJUST(DATA=mhdat) ====================================================*/ %MACRO MHADJUST(data=_last_); OPTIONS LS=76 PS=59 PAGENO=1; PROC IML; RESET NOLOG NONAME NOCENTER; /*Sort the dataset by id.*/ SORT &data out=sorted BY ID; USE sorted; /*Count rows in entire dataset, including those with missing values.*/ READ ALL INTO DATAMISS; NROWMISS=nrow(DATAMISS); /*Read observations without any missing values into a matrix, DATAMAT. Subset out vectors ID, Y and Z.*/ READ ALL INTO DATAMAT WHERE(ID^=. & RESPONSE^=. & EXPOSURE^=. & STRATUM^=.);; ID=DATAMAT[,1]; Y=DATAMAT[,2]; Z=DATAMAT[,3]; NUMOBS=nrow(DATAMAT); /*If any missing values, print out message.*/ IF NUMOBS<] + J(nrow(datamat),1,1); NUMSTRAT=STRATVEC[<>]; STRATMAT=J(NUMOBS,NUMSTRAT,0); DO i=1 TO NUMOBS; DO j=1 TO NUMSTRAT; IF STRATVEC[i]=j THEN STRATMAT[i,j]=1; END; END; /*Compute stratum-specific cell counts. Cell and margin names correspond to paper by Begg, 1999, pg.166.*/ a=J(1,NUMSTRAT,0); b=a; c=a; d=a; m1=a; m2=a; n1=a; n2=a; t=a; DO k=1 TO NUMSTRAT; a[k]=(Y#Z#STRATMAT[,k])[+]; b[k]=(Y#(1-Z)#STRATMAT[,k])[+]; m1[k]=(Z#STRATMAT[,k])[+]; m2[k]=((1-Z)#STRATMAT[,k])[+]; t[k]=m1[k]+m2[k]; c[k]=m1[k]-a[k]; d[k]=m2[k]-b[k]; n1[k]=a[k]+b[k]; n2[k]=t[k]-n1[k]; END; /*Compute unadjusted MH test statistic, MHUNADJ, (same as SAS PROC FREQ), and MH test statistic MHSCORE based on working score test (see Begg, 1999, pg.166).*/ NUMNSTAT=0; DENNSTAT=0; DENWSTAT=0; DO k=1 TO NUMSTRAT; NUMNSTAT=NUMNSTAT + (a[k] - (m1[k]*n1[k]/t[k])) ; DENNSTAT=DENNSTAT + ((m1[k]*m2[k]*n1[k]*n2[k]) / ((t[k]**2)*(t[k]-1))); DENWSTAT=DENWSTAT + ((m1[k]*m2[k]*n1[k]*n2[k]) / (t[k]**3)); END; MHUNADJ=(NUMNSTAT**2)/DENNSTAT; MHSCORE=(NUMNSTAT**2)/DENWSTAT; /*Compute adjusted MH test statistic, MHADJUS, in the following 4 steps: 1. Create vector with GAMMA hat. Estimate of gamma under the null hypothesis is the solution of the GEE score equation which yields ln(n1k/n2k) where n1k=# diseased in stratum k and n2k=# nondiseased in stratum k.*/ GAMMA=J(1,NUMOBS,0); DO n=1 to NUMOBS; DO k=1 to NUMSTRAT; IF STRATVEC[n]=k THEN GAMMA[n]=log(n1[k]/n2[k]); END; END; /*2. Define vectors for muhat and variance function.*/ MUHAT=(exp(GAMMA))/(1+(exp(GAMMA))); VARFCN=MUHAT#(1-MUHAT); /*3. Generate the variance adjustment factor=f.*/ VA=0; VB=0; ENDVAL=0; DO i=1 TO NUMIDS; STARTVAL = ENDVAL + 1; ENDVAL = ENDVAL + VECLEN[i]; XMAT = Z[STARTVAL:ENDVAL]||STRATMAT[STARTVAL:ENDVAL,]; COVYMAT = (Y[STARTVAL:ENDVAL]-MUHAT[STARTVAL:ENDVAL])* (Y[STARTVAL:ENDVAL]-MUHAT[STARTVAL:ENDVAL])`; VA = VA + (XMAT`*(diag(VARFCN[STARTVAL:ENDVAL]))*XMAT); VB = VB + (XMAT`*COVYMAT*XMAT); END; VARN = 1/((inv(VA))[1,1]); VARR = ((inv(VA)*(VB)*inv(VA))[1,1])*VARN*VARN; f=VARR/VARN; /*4. Compute the adjusted Mantel-Haenszel test statistic.*/ MHADJUS=(MHSCORE/f); /*Compute p-values for the 3 different MH test statistics.*/ PVALUEA=1-probchi(MHADJUS,1); PVALUEU=1-probchi(MHUNADJ,1); PVALUES=1-probchi(MHSCORE,1); /*Print out results.*/ print " -----------------------------------------------------------"; print " MANTEL-HAENSZEL TEST CORRECTED FOR INTRACLUSTER CORRELATION"; print " -----------------------------------------------------------"; print " Total number of clusters:" NUMIDS[format=6.0]; print " Total number of observations in all clusters:" NUMOBS[format=6.0]; print " Minimum number of observations in a cluster:"(VECLEN[><])[format=7.0]; print " Maximum number of observations in a cluster:"(VECLEN[<>])[format=7.0]; print " -----------------------------------------------------------"; print " 2 X 2 TABLES FOR EACH STRATUM ARE AS FOLLOWS:"; DO i=1 TO NUMSTRAT; print," Stratum" i[format=2.0]; print " Exposed"[format=$22.] "Unexposed"[format=$9.] " Total"[format=$6.]; print " Diseased"[format=$13.] (a[i])[format=6.0] (b[i])[format=8.0] (n1[i])[format=8.0]; print " Nondiseased"[format=$13.] (c[i])[format=6.0] (d[i])[format=8.0] (n2[i])[format=8.0]; print " Total"[format=$13.] (m1[i])[format=6.0] (m2[i])[format=8.0] (t[i])[format=8.0]; END; print " -----------------------------------------------------------"; print " UNADJUSTED MANTEL-HAENSZEL TEST STATISTICS AND P-VALUES:"; print " Unadjusted Test:"[format=$24.] MHUNADJ[format=8.4] " p-value:"[format=$14.] PVALUEU[format=8.4]; print " Working Score Test:"[format=$24.] MHSCORE[format=8.4] " p-value:"[format=$14.] PVALUES[format=8.4]; print " -----------------------------------------------------------"; print " ADJUSTED MANTEL-HAENSZEL TEST STATISTIC AND P-VALUE:"; print " Variance Adjustment Factor f is:"[format=$42.] f[format=8.4]; print " Adjusted Test:"[format=$24.] MHADJUS[format=8.4] " p-value:"[format=$14.] PVALUEA[format=8.4]; print " -----------------------------------------------------------"; QUIT; %mend mhadjust; /*******************************************************************/ DATA mhdat; INFILE 'c:\yourdata.dat'; INPUT ID RESPONSE EXPOSURE STRATUM; RUN; %mhadjust(data=mhdat)