/*Conditional likelihood method to deal with nonignorable missing in matched case-control study*/ %macro CLest (gamma=, /*dataset which includes the initial value of parameters in imputation model, the estimated value will also be saved in the dataset*/ delta=, /*dataset which includes the initial value of parameters in missing-data mecanism model,the estimated value will also be saved in the dataset*/ indata=, /*input dataset*/ outdata=, /*output dataset with duplicated records and weights*/ finaldata=, /*dataset with imputated x* and offset*/ xindex=, /*name of the variable that could be missing*/ windex=,/*name of the variable that always be observed*/ yindex=,/*name of the case-control indicator*/ strataindex=,/*name of the stratum indicator*/ mcov=, /*covariates in missing mechanism model except x, when the model only depends on x, mcov=NULL*/ mdy=, /*If missing model depends on Y. FALSE: does not depend, TRUE=depend*/ icov=, /*covariates in the imputation model except y*/ mcovindex=, /*A character vector which indicate the covaraites names in missing mechanism model except x and y*/ icovindex=, /*A character vector which indicate the covaraites names in imputation model except y*/ varnames=/*variable names in the dataset, e.g."v1"//"v2"*/); /*Define the index used in matrix*/ %let xindexc="&xindex"; %let yindexc="&yindex"; %let coln=&varnames//"weight"// "observe"; /*add two variable weight and observe to the original dataset*/ data _newdata_; set &indata; weight=1;observe=1; if &xindex=. then observe=0;run;/*this dataset will be updated*/ data _indata0_; set _newdata_; run;/*the original dataset used in the last step*/ /*duplicate the record with missing x*/ proc iml; use _newdata_; read all into mat[colname=col]; col=&coln; close _newdata_; mat[loc(mat[,"observe"]=0),&xindexc]=0; matmiss=mat[loc(mat[,"observe"]=0),]; mat[loc(mat[,"observe"]=0),&xindexc]=1; matall=mat//matmiss; create _newdata_ from matall[c=col]; append from matall; close _newdata_; quit; /*use the current delta and gamma to produce the new parameter*/ %let diff=1; %do %while(&diff>=0.0000001); data _delta0_; set δ run; data _gamma0_; set γ run; %put the starting value &diff; /*compute the weights based on the current delta and gamma values*/ %if &mcov=NULL %then %do; proc iml; use _newdata_; read all into matall[c=col]; col=&coln; close _newdata_; use δ read point {1} var {intercept} into deltainc; read point {1} var {&xindex} into deltax; close δ use γ read point {1} var {intercept &icov &yindex} into gammaall; close γ n=nrow(matall); do i=1 to n; if (matall[i,"observe"]^=1) then do; d1=gammaall*t(1||matall[i,&icovindex||&yindexc]); d2=deltainc; d3=d2+deltax; d=d1+log((1+exp(d2))/(1+exp(d3))); if (matall[i,&xindexc]=1) then matall[i,"weight"]=exp(d)/(1+exp(d)); else if (matall[i,&xindexc]=0) then matall[i,"weight"]=1-(exp(d)/(1+exp(d))); end; end; create _newdata_ from matall[c=col]; append from matall; close _newdata_; quit; %end; %else %do; proc iml; use _newdata_; read all into matall[c=col]; col=&coln; close _newdata_; use δ read point {1} var {intercept &mcov} into deltainc; read point {1} var {&xindex} into deltax; close δ use γ read point {1} var {intercept &icov &yindex} into gammaall; close γ n=nrow(matall); Do i=1 to n; if (matall[i,"observe"]^=1) then do; d1=gammaall*t(1||matall[i,&icovindex||&yindexc]); d2=deltainc*t(1|| matall[i,&mcovindex] ); d3=d2+deltax; d=d1+log((1+exp(d2))/(1+exp(d3))); if (matall[i,&xindexc]=1) then matall[i,"weight"]=exp(d)/(1+exp(d)); else if (matall[i,&xindexc]=0) then matall[i,"weight"]=1-exp(d)/(1+exp(d)); end; end; create _newdata_ from matall[c=col]; append from matall; close _newdata_; quit; %end; /*comput new delta and gamma based on the expended dataset*/ proc logistic data=_newdata_ descending outest=&gamma covout noprint; model &xindex=&icov &yindex; weight weight;run; %if &mcov= NULL %then %do; proc logistic data=_newdata_ descending outest=&delta covout noprint; model observe= &xindex; weight weight;run; %end; %else %do; proc logistic data=_newdata_ descending outest=&delta covout noprint; model observe=&mcov &xindex; weight weight;run; %end; /*compute the difference between the new and previous parameters*/ %if &mcov=NULL %then %do; proc iml; use δ read point {1} var {intercept &xindex} into deltaall1; close δ use γ read point {1} var {intercept &icov &yindex} into gammaall1; close γ use _delta0_; read point {1} var {intercept &xindex} into deltaall2; close _delta0_; use _gamma0_; read point {1} var {intercept &icov &yindex} into gammaall2; close _gamma0_; df1=(deltaall1-deltaall2)*t(deltaall1-deltaall2); df2=(gammaall1-gammaall2)*t(gammaall1-gammaall2); df=Round(max(df1||df2),0.0000001); vname="d"; create _diff0_ from df[c=vname]; append from df; close _diff0_; quit; %end; %else %do; proc iml; use δ read point {1} var {intercept &mcov &xindex} into deltaall1; close δ use γ read point {1} var {intercept &icov &yindex} into gammaall1; close γ use _delta0_; read point {1} var {intercept &mcov &xindex} into deltaall2; close _delta0_; use _gamma0_; read point {1} var {intercept &icov &yindex} into gammaall2; close _gamma0_; df1=(deltaall1-deltaall2)*t(deltaall1-deltaall2); df2=(gammaall1-gammaall2)*t(gammaall1-gammaall2); df=Round(max(df1||df2),0.0000001); vname="d"; create _diff0_ from df[c=vname]; append from df; close _diff0_; quit; %end; data _null_; set _diff0_; call symput("diff", d); run; %put the difference is &diff; %end;/*do while end*/ data &outdata; set _newdata_;run;/*&outdata saves the expanded dataset with correct weights*/ /*compute the offset and X*for the logistic regression*/ %if &mcov=NULL %then %do; proc iml; use _indata0_; read all into originm[c=col]; col=&coln; close _indata0_; use δ read point {1} var {intercept} into deltainc; read point {1} var {&xindex} into deltax; close δ use γ read point {1} var {intercept &icov} into gammainc; read point {1} var {&yindex} into gammay; close γ nsample=nrow(originm); offset=j(nsample,1,0); do i=1 to nsample; if (originm[i,"observe"]=0) then do; d1=gammainc*t(1||originm[i,&icovindex]); d2=d1+gammay; g3=deltainc; g4=g3+deltax; px1=exp(d2)/(1+exp(d2));px0=exp(d1)/(1+exp(d1)); pr3=1/(1+exp(g4));pr4=1/(1+exp(g3)); phi_s=log((px1*pr3+(1-px1)*pr4)/(px0*pr3+(1-px0)*pr4)); originm[i,&xindexc]=log((1+exp(d2))/(1+exp(d1)))/gammay; offset[i]=phi_s; end; else if (originm[i,"observe"]=1) then do; offset[i]=0; end; end; newmat=originm||offset; newcoln=&coln// "offset"; create &finaldata from newmat[c=newcoln]; append from newmat; close &finaldata; quit; %end; %else %do; %if &mdy=False %then %do; /*the missing mechanism model does not depends on Y*/ proc iml; use _indata0_; read all into originm[c=col]; col=&coln; close _indata0_; use δ read point {1} var {intercept &mcov} into deltainc; read point {1} var {&xindex} into deltax; close δ use γ read point {1} var {intercept &icov} into gammainc; read point {1} var {&yindex} into gammay; close γ nsample=nrow(originm); offset=j(nsample,1,0); do i=1 to nsample; if (originm[i,"observe"]=0) then do; d1=gammainc*t(1||originm[i,&icovindex]); d2=d1+gammay; g3=deltainc*t(1||originm[i,&mcovindex]); g4=g3+deltax; px1=exp(d2)/(1+exp(d2));px0=exp(d1)/(1+exp(d1)); pr3=1/(1+exp(g4));pr4=1/(1+exp(g3)); phi_s=1+log((px1*pr3+(1-px1)*pr4)/(px0*pr3+(1-px0)*pr4)); originm[i,&xindexc]=log((1+exp(d2))/(1+exp(d1)))/gammay; offset[i]=phi_s; end; else if (originm[i,"observe"]=1) then do; offset[i]=0; end; end; newmat=originm||offset; newcoln=&coln// "offset"; create &finaldata from newmat[c=newcoln]; append from newmat; close &finaldata; quit;/*finaldata save the dataset with imputed x* and offset*/ %end; %else %if &mdy=TRUE %then %do; proc iml; use _indata0_; read all into originm[c=col]; read all into originmi[c=col]; col=&coln; close _indata0_; use δ read point {1} var {intercept &mcov} into deltainc; read point {1} var {&yindex} into deltay; read point {1} var {&xindex} into deltax; close δ use γ read point {1} var {intercept &icov} into gammainc; read point {1} var {&yindex} into gammay; close γ nsample=nrow(originm); originmi[,&yindexc]=0; offset=j(nsample,1,0); do i=1 to nsample; if (originm[i,"observe"]=0) then do; d1=gammainc*t(1||originm[i,&icovindex]); d2=d1+gammay; g3=deltainc*t(1||originmi[i,&mcovindex]); g4=g3+deltax; g5=g3+deltay; g6=g3+deltax+deltay; px1=exp(d2)/(1+exp(d2));px0=exp(d1)/(1+exp(d1)); pr1=1/(1+exp(g6));pr2=1/(1+exp(g5));pr3=1/(1+exp(g4));pr4=1/(1+exp(g3)); phi_s=1+log((px1*pr1+(1-px1)*pr2)/(px0*pr3+(1-px0)*pr4)); originm[i,&xindexc]=log((1+exp(d2))/(1+exp(d1)))/gammay; offset[i]=phi_s; end; else if (originm[i,"observe"]=1) then do; g1=deltainc*t(1|| originmi[i,&mcovindex])+deltax*originm[i,&xindexc]; g2=g1+deltay; phi_b=log(exp(g2)/(1+exp(g2))/(exp(g1)/(1+exp(g1)))); offset[i]=phi_b; end; end; newmat=originm||offset; newcoln=&coln// "offset"; create &finaldata from newmat[c=newcoln]; append from newmat; close &finaldata; quit;/*finaldata save the dataset with imputed x* and offset*/ %end; %end; /*Logistic regression with revised x* and offset*/ data &finaldata; set &finaldata; _time_=2-&yindex;run; Proc phreg data=&finaldata; model _time_*&yindex(0)=&windex &xindex/offset=offset ties=discrete; strata &strataindex; run; /*delete intermediate dataset produced during the program*/ proc datasets nolist; delete _diff0_ _gamma0_ _delta0_ _newdata_ _indata0_; quit; %mend CLest; data gammainitial; input intercept w y z; cards; 0 0.2 0.3 1 ; run; data deltainitial; input intercept y x; cards; 3 1 -2 ; run; %CLest(gamma=gammainitial,delta=deltainitial, indata=test0,outdata=od, finaldata=fd, mcov=y,mdy=TRUE, icov=w z, mcovindex="y" , icovindex="w"||"z", xindex=x, yindex=y, strataindex=mid,windex=w,varnames="id"//"mid"//"w"// "x"//"y"//"z"//"r" );