/********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 2*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Demonstration using NY SPARCS AMI Dx's 1994 - 2004*/ /** compare ages upstate and nyc ami admissions with ttest **/ libname mylib " "; proc ttest data=mylib.demo_1; * identify the data set you are working with; class nyc; * identify the variable you are using to identify and compare groups; var age; /* continuous variable you are analyzing*/ title 'Comparing ages nyc and upstate ami patients '; *title for your output; run; /* univariate analysis of age */ proc univariate data=mylib.demo_1; var age; histogram; *returns histogram of age varible; probplot; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 3*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* A DEMONSTRATION OF SIMPLE COLUMN INPUT FROM THE EDITOR WINDOW */ DATA patients;/* data step creates a file named 'patients' in the work library*/ input lastname $ 1-10 fname $ 12-21 ssn 23-31 status $ 33-43; datalines; /* note older version of SAS used 'cards' */ Green Samual 888888888 Discharged Brennon Carol 123456789 Discharged Wang Robert 999999999 Inpatient Randolph Virginia 987654321 Inpatient ; /* this semicolon needs to be on a line by itself*/ run; proc print data=patients;/* print your output*/ run; /********************************************************************** CREATE SAS FILE FOR CALIFORNIA ACUTE MI FILE **********************************************************************/ data ami; infile "my-file-path\Hert95.txt" lrecl=999; input FACNUM AGEYRS SEX RACE ZIP LOS DISP PMTSOR DRG APRSEV RLN; run; proc contents data=ami; run; proc print data=ami (obs=10); run; /********************************************************************** READING SPARCS DATA: Date, Age, Diagnosis **********************************************************************/ DATA sparcs_1; INFILE '' LRECl=450; * OBS=100; INPUT @18 DATE yymmn6. @44 AGE 3. @71 PDX $CHAR6. run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 4*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* creating fitness data set */ data fitness; length Name $ 9; input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse Rest_Pulse Maximum_Pulse Performance; datalines; Donna F 8.17 42 68.15 59.57 166 40 172 14 Gracie F 8.63 38 81.87 60.06 170 48 186 13 Luanne F 8.65 43 85.84 54.3 156 45 168 13 Mimi F 8.92 50 70.87 54.63 146 48 155 11 Chris M 8.95 49 81.42 49.16 180 44 185 11 Allen M 9.22 38 89.02 49.87 178 55 180 12 Nancy F 9.4 49 76.32 48.67 186 56 188 10 Patty F 9.63 52 76.32 45.44 164 48 166 10 Suzanne F 9.93 57 59.08 50.55 148 49 155 9 Teresa F 10 51 77.91 46.67 162 48 168 9 Bob M 10.07 40 75.07 45.31 185 62 185 9 Harriett F 10.08 49 73.37 50.39 168 67 168 9 Jane F 10.13 44 73.03 50.54 168 45 168 9 Harold M 10.25 48 91.63 46.77 162 48 164 9 Sammy M 10.33 54 83.12 51.85 166 50 170 8 Buffy F 10.47 52 73.71 45.79 186 59 188 8 Trent M 10.5 52 82.78 47.47 170 53 172 8 Jackie F 10.6 47 79.15 47.27 162 47 164 8 Ralph M 10.85 43 81.19 49.09 162 64 170 7 Jack M 10.95 51 69.63 40.84 168 57 172 7 Annie F 11.08 51 67.25 45.12 172 48 172 7 Kate F 11.12 45 66.45 44.75 176 51 176 7 Carl M 11.17 54 79.38 46.08 156 62 165 7 Don M 11.37 44 89.47 44.61 178 62 182 6 Effie F 11.5 48 61.24 47.92 170 52 176 6 George M 11.63 47 77.45 44.81 176 58 176 6 Iris F 11.95 40 75.98 45.68 176 70 180 5 Mark M 12.63 57 73.37 39.41 174 58 176 4 Steve M 12.88 54 91.63 39.2 168 44 172 4 Vaughn M 13.08 44 81.42 39.44 174 63 176 2 William M 14.03 45 87.66 37.39 186 56 192 0 ; run; /* demonstration of proc print */ proc print data=fitness (obs=5) noobs; var name age weight; where gender = 'F'; sum runtime; run; /* demonstration of proc sort */ proc sort data=fitness out= sortdat; /*output data set is in the work library */ by gender; /* will be sorted by gender ascending */ run; proc print data=sortdat; /* note using the output sorted data set */ by gender; /* variable by which we sorted */ sum runtime; /* request sum of numeric variable */ run; /* demonstration titles and footnotes */ proc print data=sortdat; /* note using the output sorted data set */ title 'Printout of Fitness Data'; title2 'Sorted by Gender'; title3 'With Sums for Runtime'; footnote 'Analyst: DiMaggio'; footnote2 'Conducted December 2008'; by gender; /* variable by which we sorted */ sum runtime; /* request sum of numeric variable */ run; /* demonstration format */ proc print data=fitness (obs=5) noobs; var name age weight; where gender = 'F'; sum runtime; *format name $UPCASE10.; run; /* demonstration proc format */ proc format; value old_runner 0-40 = 'young' 41-100 = 'old'; run; proc print data=fitness (obs=5) noobs; var name age weight; where gender = 'F'; sum runtime; format age old_runner.; run; /* demonstration ODS */ ods html file = 'C:\Users\Charles DiMaggio\Desktop\example.html' style=sasweb; /*open ODS */ proc print data=fitness (obs=5) noobs; var name age weight; where gender = 'F'; sum runtime; format age old_runner.; title 'RUNNER DATA'; title2 '12 December 2008'; footnote 'Analyst: Charles DiMaggio'; run; ods html close; /* close ODS */ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 5*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ libname ch5 ' '; /* refer to the location of your chapter 5 data sets */ /* demo creating new data set using Set */ proc contents data=ch4.ch4demo1; run; data nyc_deaths; set ch5.ch5demo1; tot_911_deaths = female_911_deaths + male_911_deaths; run; /* note there is no output, check log window */ /* demo concatenating new data set using Set */ data tot_deaths; /* creating new file in work library */ set nyc_deaths ch5.ch5demo2;; /* adding file we previously to existing non_NYC file */ run; /* demo merging data sets using Merge-By */ proc contents data=ch5.ch5demo3; run; data pop_deaths; merge tot_deaths ch5.ch5demo3; by zip; run; proc sort data=tot_deaths; by zip; proc sort data=ch5.ch5demo3; by zip; run; data pop_deaths(keep= zip tot_911_deaths mhi); merge tot_deaths ch5.ch5demo3; by zip; run; /* IF-THEN-ELSE Demonstration */ DATA SPARCS; /* name the data set in work directory */ INFILE 'my-file-path\sparcs.TXT' MISSOVER LRECl=452 OBS=15000; /* tell SAS where file is, that itís a long file and to read in the 1st 15,000 observations */ INPUT /* input the variables you are interested in */ @18 DATE yymmn6. /* informat based on SPARCS raw file */ @44 AGE 3. @71 PDX $CHAR6. @331 ECODE $CHAR6. @343 DISPO $CHAR2. ; RUN; PROC CONTENTS DATA=SPARCS; /* check your file was read in */ RUN; proc freq data=sparcs order=freq; tables pdx; run; /********************MORTALITY **************************/ data nycsparcs04; set nycsparcs04; IF DISPO = '20' then death=1; else death=0; /*********************** SUBSTANCE ABUSE *****************/ if pdx in /* use ICD9 codes to create diagnoses */ ('2910','2911','2912','2913','2914','2915','29181','29189','2919','2920','29211','29212', '2922','29281','29282','29283','29284','29289','2929', '30300','30301','30302','30303','30390','30391','30392','30393','30400' '30401','30402','30403','30410','30411','30412','30413','30420', '30421','30422','30423','30430','30431','30432','30433','30440', '30441','30442','30443','30450','30451','30452','30453','30460', '30461','30462','30463','30470','30471','30472','30473','30480', '30481','30482','30483','30490','30491','30492','30493','30500', '30501','30502','30503','3051','30520','30521','30522','30523', '30530','30531','30532','30533','30540','30541','30542','30543', '30550','30551','30552','30553','30560','30561','30562','30563', '30570','30571','30572','30573','30580','30581','30582','30583', '30590','30591','30592','30593') Then subst_ab=1; Else subst_ab=0; /************ CHILD ABUSE ***********************************/ if age LE 10 AND ecode in ('E9670','E9671','E9672','E9673','E9674','E9675','E9676','E9677','E9678','E9679', 'E9684','E9040','E9041','E9042','V1541','V1542','V1549','V6121') then child_ab=1; else child_ab=0; RUN; PROC PRINT DATA=SPARCS; VAR death subst_ab child_ab; sum death subst_ab child_ab; run; /* DATE Demonstration */ *libname ch5 ' '; proc ttest data=ch5.ch5demo4; class nyc; var age; where date < '1oct2001'd; title 'Comparing ages nyc and upstate ami patients: pre 9/11 '; run; proc ttest data=ch5.ch5demo4; class nyc; var age; where date > '1oct2001'd; title 'Comparing ages nyc and upstate ami patients: post 9/11'; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 6*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ libname ch6 ' '; /* refer to the location of your chapter 6 data */ proc contents data=ch6.ch6demo1; run; proc print data=ch6.ch6demo1 (obs=10); run; proc means data=ch6.ch6demo1 maxdec=1; var ageyrs ; class sex; run; proc freq data=ch6.ch6demo1; tables sex*race2 / measures; run; proc format; value gndr 1 = "male" 2 = "female"; run; proc freq data=ch6.ch6demo1; tables sex*race2 / measures; format sex gndr.; run; /* proc tabulate: columns */ proc tabulate data=ch6.ch6demo1; class drg; table drg; run; proc freq data=ch6.ch6demo1 order=freq; tables drg; run; /* proc tabulate: one-dimension totals */ proc tabulate data=ch6.ch6demo1; class drg; table drg all; run; /* proc tabulate: two x two totals */ proc tabulate data=ch6.ch6demo1; class drg race2; table drg all , race2 all; run; proc tabulate data=ch6.ch6demo1; class drg race2; var ageyrs; table drg,race2*ageyrs; run; proc tabulate data=ch6.ch6demo1; class drg race2; var ageyrs; table drg, race2*ageyrs*mean; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* tabulate syntax (Chapter 6)*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /*create output data set for surveillance from tabulate procedure */ proc tabulate data=ind.sparcs; /* individual-level data set */ where nyc=1; /* subset to particular geographic area */ class month; /* specify surveillance period using previously defined variable created with MONTH() function */ var injury ami child_ab; /* specify diagnoses to monitor note have to be numeric */ table month, injury ami child_ab; /* table specifications note default is sum or total */ ods output table=sparcs; /* create output data set based on table */ data sparcs; /*clean up the dataset*/ set sparcs; drop _type_ _page_ _table_; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 7*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ libname ch7 ' '; /* location of your chapter 7 data sets */ /* Demonstration PROC GCHART */ proc contents data=ch7.ch7demo1; run; PROC GCHART DATA = ch7.ch7demo1; HBAR case_control; RUN; quit; PROC GCHART DATA = ch7.ch7demo1; VBAR case_control; RUN; quit; data ch7demo1; set ch7.ch7demo1; cc = put(case_control, $8.); run; PROC GCHART DATA = ch6demo1; HBAR cc; RUN; quit; PROC GCHART DATA = ch6demo1; VBAR cc; RUN; quit; PROC GCHART DATA = ch7.ch7demo1; VBAR sex / groupvar=case_control; RUN; PROC GCHART DATA = ch7.ch7demo1; VBAR sex / patternid=midpoint; RUN; PROC GCHART DATA = ch7.ch7demo1; VBAR sex / groupvar=case_control patternid=midpoint; RUN; proc gplot data= ch7.ch7demo2; plot ami_sum * date; run; /* Demonstration PROC GPLOT */ /************* AMI Time Series *************************/ goptions reset=all; *goptions cback=white device=win; axis1 major = (h=2.0 c=black) minor = (h=1.0 c=black) order = 0 to 4000 by 1000 label = (h=1.5 a=90 f=arial c=black 'Number of AMI-related diagnoses'); axis2 major = (h=2.0 c=black) minor = (h=1.0 c=black) /*order = 1 to 132 by 6*/ label = (h=1.5 f=arial c=black 'Month and Year'); proc gplot data= ch7.ch7demo2; plot ami_sum * date / href=15219 vaxis = axis1 haxis = axis2; symbol v='.' f=arial h=2 i=sm5 c=black; note h=1.0 move=(59,60)pct 'September 2001'; * percent (over, up); title h=3.0 c=black f=arial j=center 'New York City AMI-Related Hospital Discharges'; title2 h=2.0 c=black f=arial j=center '1994 to 2004 By Month and Year'; run; quit; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 8*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* DEMONSTRATION: ONE-WAY TABLE*/ proc freq data=expend; tables ses; run; /* DEMONSTRATION: CROSSTABS*/ proc format;/*note use of proc format to create spendfmt and format to apply it*/ value spendfmt 1="High Utilization" 0="Low Utilization" ; run; proc freq data=expend; tables expenditure gender ses age /*note age might be a problematic cat var*/ gender*expenditure ses*expenditure; format expenditure spendfmt.; run; /* DEMONSTRATION: REORDERING VARIABLES */ /* first need to create a new data set that transforms a text variable into a numeric variable so the sort order corresponds to the logical order */ data expend; set expend; SES_level=1*(SES='Low') + 2*(SES='Medium') + 3*(SES='High'); run; /* neat bit of Boolean logic, instead of many IF THEN statements an expression enclosed in parentheses is a logical operator that returns the value 1 if the expression is true and 0 if it is false. here we tell SAS SAS to look at each observation, if the oldvar ='low' set it to 1 (true) if not, set it to 0 (false) , do that for each level (med and high) the multiply that 1 or 0 by 1, 2 or 3 as indicated. Effectively transforms a text variable into an ordinal numeric variable */ /* use PROC FORMAT to create a user defined format */ proc format; value sesfmt 1='Low SES' 2='Medium SES' 3='High SES'; run; /* use PROC FREQ with a FORMAT statement */ proc freq data=expend; tables SES_level*expenditure; format SES_level sesfmt. expenditure spendfmt.; run; /* expenditure chi square exercise */ proc freq data=expend; tables gender*expenditure / chisq expected nocol nopercent; run; /* DEMONSTRATION: EXACT PEARSON CHI SQUARE TEST */ /* read in a data set */ data exact; input a b ; datalines; 1 2 1 2 1 2 2 1 2 1 2 2 2 2 ; run; proc freq data=exact; tables a*b; exact pchi; run; /* DEMONSTRATION OF SPEARMAN CORRLEATION STATISTIC */ proc freq data=expend; tables SES_level*expenditure / chisq measures cl;; format SES_level sesfmt. expenditure spendfmt.; run; /* signficicance vs. association */ data expend1; set expend; run; data expend2; set expend expend1; run; proc freq data=expend2; tables SES_level*expenditure / chisq measures cl;; format SES_level sesfmt. purchase spendfmt.; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 9*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Odds Ratio */ proc freq data=birth nlevels;/* nlevels displays num levels for variables */ tables prev_pretrm*low / nocol norow measures; /* 2x2 table measures request OR */ title 'odds ratio for association between previous preterm and lbw'; run; /* Stratified Analysis */ proc freq data=birth nlevels; tables uterine_irr*prev_pretrm*low / measures; title 'odds ratio for association between previous preterm and lbw'; title2 'controlling for uterine irritibility'; run; /*Contingency Table Analysis */ proc freq data=birth nlevels; tables uterine_irr*prev_pretrm*low / all bdt; /* Tarone adjustment for small sample size */ exact or comor; /* exact preferred for small sample sizes comor gives CI for the exact OR*/ title 'Contingency Table Analysis'; run; /* 2x2 analysis */ data contingency; input exposure $ disease $ count ; cards; unexp nodis 16 unexp dis 48 exp nodis 40 exp dis 20 ; proc freq data=contingency; weight count; tables exposure*disease; run; /* stratified 2x2 table */ data stress; input confounder $ exposure $ outcome $ count @@; cards; conf unexp dis 50 conf unexp nodis 10 conf exp dis 100 conf exp nodis 90 noconf unexp dis 60 noconf unexp nodis 140 noconf exp dis 10 noconf exp nodis 50 ; proc freq data=stress order=data; weight count; tables confounder*exposure*outcome / all; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 10*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ libname ch10 ' '; /* location of your chapter 10 data sets */ options nodate nonumber; /* options for page*/ proc print data=ch10.infants (obs=10); /* note to limit number of observations printed*/ title ' list of observations'; run; proc means data= ch10.infants maxdec=4 n mean median stderr clm var; /* note proc statement ends here with semi-colon*/ var los; title ' descriptive stats: length of stay'; run; proc univariate data= ch10.infants; var los; * if no var will analyze all variables; id date; * variable to use to identify the obs; histogram / normal; probplot; title ' univariate stats: length of stay'; run; proc sort data=ch10.infants; by dispo; run; symbol color = salmon; proc boxplot data=ch10.infants; plot los*dispo / cframe = vligb cboxes = dagr cboxfill = ywh; title 'Boxplot'; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 11*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* run to create data set */ data bp_drug; input Drug Disease $ BP 3.; datalines; 1 A 119.70095982 1 A 121.36188924 1 A 119.69180851 1 A 119.60217745 1 A 120.96573695 1 A 119.18963686 1 A 120.04126616 1 A 120.6485107 1 A 121.39677755 1 A 121.2941861 2 A 134.70095982 2 A 136.36188924 2 A 134.69180851 2 A 134.60217745 2 A 135.96573695 2 A 134.18963686 2 A 135.04126616 2 A 135.6485107 2 A 136.39677755 2 A 136.2941861 3 A 139.17670991 3 A 139.7596925 3 A 139.61692792 3 A 139.47841248 3 A 140.32687732 3 A 139.19503074 3 A 139.82084261 3 A 139.33853642 3 A 139.18391356 3 A 141.16519705 4 A 151.0314732 4 A 148.45628664 4 A 150.90924814 4 A 150.36754897 4 A 150.40537109 4 A 150.27207554 4 A 148.84462813 4 A 148.98134213 4 A 148.67342047 4 A 151.22752311 1 B 159.47367321 1 B 157.8492618 1 B 159.71157707 1 B 161.08606542 1 B 157.88059214 1 B 160.70808134 1 B 158.93686029 1 B 159.59337556 1 B 161.04857471 1 B 162.4582518 2 B 149.44343695 2 B 149.51766786 2 B 151.67540302 2 B 148.43104013 2 B 150.04534684 2 B 149.53540532 2 B 149.21944044 2 B 149.29473811 2 B 149.80104295 2 B 150.2553889 3 B 140.84243239 3 B 139.80385608 3 B 140.01491297 3 B 140.84663178 3 B 139.47817427 3 B 139.34930573 3 B 140.06553297 3 B 139.90122485 3 B 140.48542888 3 B 139.48601375 4 B 129.32979505 4 B 130.1515735 4 B 130.44367458 4 B 129.75982193 4 B 129.05742185 4 B 130.10355508 4 B 130.70093917 4 B 130.88766817 4 B 131.63619676 4 B 129.8023949 1 C 126.23902904 1 C 125.75600563 1 C 125.87874742 1 C 125.04661693 1 C 122.97475788 1 C 123.59937382 1 C 125.16139122 1 C 124.73864001 1 C 123.42349457 1 C 125.38733391 2 C 125.69890209 2 C 125.56064272 2 C 123.83433943 2 C 124.6489834 2 C 123.29421225 2 C 125.24488558 2 C 123.753593 2 C 125.1478912 2 C 126.59647236 2 C 124.56548104 3 C 125.09941212 3 C 126.00783602 3 C 123.7285012 3 C 123.81351742 3 C 124.62018186 3 C 124.68909051 3 C 124.86764454 3 C 124.00137446 3 C 125.56161889 3 C 124.01149493 4 C 124.8987427 4 C 124.38445589 4 C 124.91319163 4 C 125.86522746 4 C 123.22694283 4 C 125.61937452 4 C 126.41012761 4 C 125.39699702 4 C 125.05360115 4 C 125.49297642 ; run; /* I. One-Way ANOVA */ /* explore data */ proc univariate data=bp_drug; var BP; histogram / normal; probplot; run; proc sort data=bp_drug; by drug; run; symbol color = salmon; proc boxplot data=bp_drug; plot BP*Drug / cframe = vligb cboxes = dagr cboxfill = ywh; title 'Boxplot Blood Pressure Drugs'; run; /* one-way ANOVA */ options ls=75 ps=45; /* page and line spacing options*/ proc glm data = bp_drug; class drug; model BP=drug; means drug / hovtest; output out=check r=resid p=pred; /* creating an output data set called 'check' in which 'r ' is a keyword SAS recognizes as residuals and 'p' recognized as predictors */ title 'testing for equality of mean BP by Drug with GLM'; run; quit; /* have to quit our of GLM */ /* now run gplot on the 'check' dataset created above*/ Proc gplot data=check; Plot resid*pred / haxis=axis1 vaxis=axis2 vref=0; /* can leave out if ok with defaults*/ axis1 w=2 major=(w=2) minor=none offset=(10pct); axis2 w=2 major=(w=2) minor=none; title 'plot residuals vs predictors for drugs'; run; quit; /* now run proc univariate to get histogram, normal plot, kurtosis and skewness on residuals*/ proc univariate data = check normal; var resid; histogram resid / normal; probplot;* resid / mu=est sigma=est color=blue w=1; title; run; /* compare means */ proc glm data = bp_drug; class drug; model BP=drug; lsmeans drug / pdiff=all adjust=tukey; /* pdiff=all requests all pairwaise p values */ output out=check r=resid p=pred; /* creating an output data set called 'check' in which 'r ' is a keyword SAS recognizes as residuals and 'p' recognized as predictors */ title 'testing for equality of mean BP by Drug with GLM'; run; quit; /* II. Two-Way ANOVA */ /* means for combinations of disease and drug */ proc means data=bp_drug mean var std; class disease drug; var BP; title 'Selected Descriptive Statistics for drug-disease combinations'; run; /* means plot */ proc gplot data=bp_drug; symbol c=blue w=2 interpol=std1mtj line=1;/* interpolation method gives s.e. bars */ symbol2 c=green w=2 interpol=std1mtj line=2; symbol3 c=red w=2 interpol=std1mtj line=3; plot BP*drug=disease; /* vertical by horizontal */ title 'Illustrating the Interaction Between Disease and Drug'; run; quit; /* PROC GLM for ANOVA with interaction term */ proc glm data=bp_drug; class disease drug; model BP=disease drug disease*drug;/*note interaction term*/ title 'Analyze the Effects of Drug and Disease'; title2 'Including Interaction'; run; quit; /* compare means with interaction term */ proc glm data=bp_drug; class disease drug; model BP=drug disease drug*disease; lsmeans disease*drug / adjust=tukey pdiff=all; /*note looking at combinations*/ title 'Multiple Comparisons Tests for Drug and Disease'; run; quit; /***************************************************************************************/ /* SAMPLE GLM SYNTAX FOR FUTURE USE */ /***************************************************************************************/ options ls=75 ps=45; /* page and line spacing options*/ proc glm data = your.data; class cat_var; /* categorical variable by which you want to group the outcome variable*/ model cont_var=cat_var; /*specify the model i.e. response variable = predictor(s) */ means cat_var / hovtest; /* computes means of dependent variable for each value of specified effect */ /* hovtest above is Levene's test for homogeneity (equality) of variances (one of the assumptions of ANOVA, i.e. homoscedasticity) the null hypothesis is that the variances are equal so do not want to reject the null, want a large p value (look under 'Pr > F') on output*/ lsmeans cat_var / pdiff=all adjust=tukey; /* pdiff=all requests all pairwaise p values */ output out=check r=resid p=pred; /* creating an output data set called 'check' in which 'r ' is a keyword SAS recognizes as residuals and 'p' recognized as predictors */ title 'testing for equality of means with GLM'; run; quit; /* note that you have to quit out of glm, or will keep running */ /* now run gplot on the 'check' dataset created above*/ proc gplot data=check; plot resid*pred / haxis=axis1 vaxis=axis2 vref=0; /* can leave this out if you're ok with defaults*/ axis1 w=2 major=(w=2) minor=none offset=(10pct); axis2 w=2 major=(w=2) minor=none; title 'plot residuals vs predictors for cereal'; run; quit; /* now run proc univariate to get histogram, normal plot, kurtosis and skewness on residuals*/ proc univariate data = check normal; var resid; histogram / normal; probplot / mu=est sigma=est color=blue w=1; title; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 12*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* run to create the corr data set */ data corr; input age height outcome1 outcome2; ; cards; 35 170 340 -340 45 165 341 -341 46 170 380 -380 16 156 390 -390 32 166 400 -400 16 157 410 -410 14 165 446 -446 16 152 455 -455 35 177 483 -483 33 158 510 -510 40 150 544 -544 28 165 550 -550 23 160 573 -573 52 150 577 -577 46 180 580 -580 29 173 600 -600 30 172 630 -630 21 163 655 -655 21 164 660 -660 20 189 662 -662 34 182 689 -689 43 184 690 -690 35 174 700 -700 39 177 720 -720 43 183 730 -730 37 175 765 -765 32 173 780 -780 24 173 790 -790 20 162 805 -805 25 180 810 -790 22 173 870 -710 25 171 945 -700 ; run; /* example scatterplots */ proc gplot data=corr; title1 'direct relationship?'; plot age*height; run; quit; proc gplot data=corr; title1 'indirect relationship?'; plot height*outcome2; run; quit; proc gplot data=corr; title1 'curvilinear relationship?'; plot age*outcome2; run; quit; /* example correlations from corr data */ proc corr data=corr rank; var age; with height; run; proc corr data=corr rank; var height; with outcome2; run; proc corr data=corr rank; var age; with outcome2; run; /* dow jones and asthma */ data dow; input year dow asthma; /*yearly dow jones industrial avg, asthma hospitalizations per 10,000 0-17 year olds*/ cards; 1980 963.99 21 1981 875 22.2 1982 1066.54 25.7 1983 1258.64 23.7 1984 1211.57 25.5 1985 1546.67 25.4 1986 1895.95 27.3 1987 1938.83 25.9 1988 2168.57 28.5 1989 2753.2 27.9 1990 2633.66 28.4 1991 3168.83 30.6 1992 3301.11 30.8 1993 3754.09 25.4 1994 3834.44 26.8 1995 5117.12 32.7 1996 6448.27 29.8 1997 7908.25 32.4 1998 9181.4 24.4 1999 11497.12 28.2 2000 10786.85 29.6 ; run; symbol1 h=2.0 v=star; proc gplot data=dow; plot dow*asthma; run; proc corr data=dow rank; var dow; with asthma; run; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 13*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* run to create the fitness data set */ data fitness; length Name $ 9; input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse Rest_Pulse Maximum_Pulse Performance; datalines; Donna F 8.17 42 68.15 59.57 166 40 172 14 Gracie F 8.63 38 81.87 60.06 170 48 186 13 Luanne F 8.65 43 85.84 54.3 156 45 168 13 Mimi F 8.92 50 70.87 54.63 146 48 155 11 Chris M 8.95 49 81.42 49.16 180 44 185 11 Allen M 9.22 38 89.02 49.87 178 55 180 12 Nancy F 9.4 49 76.32 48.67 186 56 188 10 Patty F 9.63 52 76.32 45.44 164 48 166 10 Suzanne F 9.93 57 59.08 50.55 148 49 155 9 Teresa F 10 51 77.91 46.67 162 48 168 9 Bob M 10.07 40 75.07 45.31 185 62 185 9 Harriett F 10.08 49 73.37 50.39 168 67 168 9 Jane F 10.13 44 73.03 50.54 168 45 168 9 Harold M 10.25 48 91.63 46.77 162 48 164 9 Sammy M 10.33 54 83.12 51.85 166 50 170 8 Buffy F 10.47 52 73.71 45.79 186 59 188 8 Trent M 10.5 52 82.78 47.47 170 53 172 8 Jackie F 10.6 47 79.15 47.27 162 47 164 8 Ralph M 10.85 43 81.19 49.09 162 64 170 7 Jack M 10.95 51 69.63 40.84 168 57 172 7 Annie F 11.08 51 67.25 45.12 172 48 172 7 Kate F 11.12 45 66.45 44.75 176 51 176 7 Carl M 11.17 54 79.38 46.08 156 62 165 7 Don M 11.37 44 89.47 44.61 178 62 182 6 Effie F 11.5 48 61.24 47.92 170 52 176 6 George M 11.63 47 77.45 44.81 176 58 176 6 Iris F 11.95 40 75.98 45.68 176 70 180 5 Mark M 12.63 57 73.37 39.41 174 58 176 4 Steve M 12.88 54 91.63 39.2 168 44 172 4 Vaughn M 13.08 44 81.42 39.44 174 63 176 2 William M 14.03 45 87.66 37.39 186 56 192 0 ; run; /* simple regression demonstration fitness data */ proc reg data=fitness; model oxygen_consumption=performance; title 'Simple Linear Regression of Oxygen Consumption and Performance'; run; quit; /* predicted values demonstration fitness data */ data need_predictions; input performance @@; /* tell sas >1 value for same variable, note same variable name */ datalines; 0 3 6 9 12 ; /* semi-colon on its own line*/ run; data predoxy; /* appending above data set to the fitness data set */ set fitness need_predictions; run; /* calculate the prediction */ proc reg data=predoxy; model oxygen_consumption=performance / p;/*tell SAS to predict values for oxygen*/ id performance; title 'Oxygen_Consumption=Performance with Predicted Values'; run; quit; /* plot prediction and confidence intervals */ options ps=50 ls=76; goptions reset=all fontres=presentation ftext=swissb htext=1.5; proc reg data=predoxy; /*using data set you created for prediction*/ model oxygen_consumption=performance / clm cli alpha=.05; id name performance; plot oxygen_consumption*performance / conf pred; symbol1 c=red v=dot; symbol2 c=red; symbol3 c=blue; symbol4 c=blue; symbol5 c=green; symbol6 c=green; title; run; quit; /* multiple reg demonstration fitness data */ proc reg data=fitness; model oxygen_consumption=performance runtime; title 'Multiple Linear Regression fitness Data'; run; quit; /* dummy variable coding */ proc reg data=fitness; /* reg doesn't work on categorical variables*/ model oxygen_consumption=performance gender; title 'Multiple Linear Regression fitness Data'; run; quit; /* need "dummy" variables for categorical variables*/ data fitness2; set fitness; if age lt 40 then age_cat=0; else if 40 => age <= 50 then age_cat=1; else if age gt 50 then age_cat=2; else age_cat=.; run; data fitness2; set fitness2; if age_cat=1 then dummy1=1; else dummy1=0; if age_cat=2 then dummy2=1; else dummy2=0; run; proc reg data=fitness2; model oxygen_consumption=performance dummy1 dummy2; agedum: test dummy1, dummy2; title 'Multiple Linear Regression fitness Data'; run; quit; */...by why only 2?*/ /* dummy example (From Introduction to SAS. UCLA: Academic Technology Services, Statistical Consulting Group. http://www.ats.ucla.edu/stat/sas/faq/dummy.htm) */ DATA dummy; INPUT id group score; CARDS; 1 1 48 2 1 49 3 1 50 4 2 17 5 2 20 6 2 23 7 3 28 8 3 30 9 3 32 ; RUN; PROC MEANS DATA=dummy; /* overall mean */ VAR score; RUN; PROC GLM DATA=dummy; /* run standard ANOVA: group is important */ CLASS group ; MODEL score = group ; MEANS group; RUN; DATA dummy2; /* create dummy variables using 3 indicators for group */ SET dummy; IF (group = 1) THEN group1 = 1; ELSE group1 = 0; IF (group = 2) THEN group2 = 1; ELSE group2 = 0; IF (group = 3) THEN group3 = 1; ELSE group3 = 0; RUN; PROC REG DATA=dummy2; /* run proc reg using 2-level dummy variables */ MODEL score = group1 group2 ; RUN; /* same as ANOVA */ /* group 3 omitted because referent (i.e intercept, when other groups set to 0): parameter est for intercept(30) = group 3 mean from PROC MEANS parameter estimate for group 1 = mean of group1 - mean of group3 (49-30=19) parameter estimate for group 2 = mean of group2 - mean of group3 (20-30=-10) */ /* Ypred for group1 = 30 + 1 * 19 + 0 * -10 = 49 Ypred for group2 = 30 + 0 * 19 + 1 * -10 = 20 Ypred for group3 = 30 + 0 * 19 + 0 * -10 = 30 Ypred for group1: 1 0 Ypred for group2: 0 1 Ypred for group3: 0 1 */ /* note same F as from ANOVA (ANOVA and REG really essentially the same) why did we leave out the third indicator variable? Because is is the intercept. group3 is the referent category = the category when the other variables are zero = intercept see that the parameter estimate for the intercept (30) is the same as the mean for group 3 from proc means see that other parameter estimates also related to the mean of the referent category e.g. parameter estimate for group 1 = mean of group1 - mean of group3 (49-30=19) parameter estimate for group 2 = mean of group2 - mean of group3 (20-30=-10) in summary Ypred for group1 = 30 + 1 * 19 + 0 * -10 = 49 Ypred for group2 = 30 + 0 * 19 + 1 * -10 = 20 Ypred for group3 = 30 + 0 * 19 + 0 * -10 = 30 In procs you will learn later, e.g. LOGISTIC, GENMOD (for Poisson) SAS has automated options to do dummy coding where you essentially just put in coefficients for comparisions so the above three just in terms of coefficients would be Ypred for group1: 1 0 Ypred for group2: 0 1 Ypred for group3: 0 1 */ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* Chapter 14*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ data fitness; length Name $ 9; input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse Rest_Pulse Maximum_Pulse Performance; datalines; Donna F 8.17 42 68.15 59.57 166 40 172 14 Gracie F 8.63 38 81.87 60.06 170 48 186 13 Luanne F 8.65 43 85.84 54.3 156 45 168 13 Mimi F 8.92 50 70.87 54.63 146 48 155 11 Chris M 8.95 49 81.42 49.16 180 44 185 11 Allen M 9.22 38 89.02 49.87 178 55 180 12 Nancy F 9.4 49 76.32 48.67 186 56 188 10 Patty F 9.63 52 76.32 45.44 164 48 166 10 Suzanne F 9.93 57 59.08 50.55 148 49 155 9 Teresa F 10 51 77.91 46.67 162 48 168 9 Bob M 10.07 40 75.07 45.31 185 62 185 9 Harriett F 10.08 49 73.37 50.39 168 67 168 9 Jane F 10.13 44 73.03 50.54 168 45 168 9 Harold M 10.25 48 91.63 46.77 162 48 164 9 Sammy M 10.33 54 83.12 51.85 166 50 170 8 Buffy F 10.47 52 73.71 45.79 186 59 188 8 Trent M 10.5 52 82.78 47.47 170 53 172 8 Jackie F 10.6 47 79.15 47.27 162 47 164 8 Ralph M 10.85 43 81.19 49.09 162 64 170 7 Jack M 10.95 51 69.63 40.84 168 57 172 7 Annie F 11.08 51 67.25 45.12 172 48 172 7 Kate F 11.12 45 66.45 44.75 176 51 176 7 Carl M 11.17 54 79.38 46.08 156 62 165 7 Don M 11.37 44 89.47 44.61 178 62 182 6 Effie F 11.5 48 61.24 47.92 170 52 176 6 George M 11.63 47 77.45 44.81 176 58 176 6 Iris F 11.95 40 75.98 45.68 176 70 180 5 Mark M 12.63 57 73.37 39.41 174 58 176 4 Steve M 12.88 54 91.63 39.2 168 44 172 4 Vaughn M 13.08 44 81.42 39.44 174 63 176 2 William M 14.03 45 87.66 37.39 186 56 192 0 ; run; /* DEMONSTRATION: RESIDUAL PLOTS FOR FITNESS DATA */ options ps=50 ls=97; goptions reset=all fontres=presentation ftext=swissb htext=1.5; proc reg data=fitness; PREDICT: model oxygen_consumption = runtime age run_pulse maximum_pulse; plot r.*(p. runtime age run_pulse maximum_pulse);/*plot residuals v predicted values*/ plot student.*obs. / vref=3 2 -2 -3/*studentized obs. Gives obs. # to ID */ haxis=0 to 32 by 1; plot student.*nqq.; /*nqq another name for normal prob plot*/ symbol v=dot; title 'PREDICT Model - Plots of Diagnostic Statistics'; run; quit; /* DEMONSTRATION: INFLUENTIAL OBSERVATIONS FITNESS DATA*/ goptions reset=all; proc reg data=fitness; PREDICT: model oxygen_consumption =runtime age run_pulse maximum_pulse / r influence; /* r is residuals, influence is for the influence statistics*/ id name; /* to allow us to identify the outlier*/ output out=ck4outliers rstudent=rstud cookd=cooksd; /* note we are creating an output data set of the outlier statistics so we can get SAS to look at them for us so we create and name a data set and label the output variables in which we are interested*/ title; run; quit; /* MACRO FOR OUTLIERS */ /* set the values of these macro variables, */ /* based on your data and model. */ %let numparms=5; /* # of predictor variables + 1 */ %let numobs=31; /* # of observations */ %let idvars=name; /* relevant identification variable(s) */ data influential; set ck4outliers; cutcookd=4/&numobs; rstud_i=(abs(rstud)>3); cookd_i=(cooksd>cutcookd); sum_i=rstud_i + cookd_i; if sum_i > 0; run; /* then print out the list of influential observations */ proc print data=influential; var sum_i &idvars cooksd rstud cutcookd cookd_i rstud_i; title 'Observations that Exceed Suggested Cutoffs'; run; /* DEMONSTRATION: COLLINEARITY IN FITNESS DATA*/ proc reg data=fitness; FULLMODEL: model oxygen_consumption = performance runtime age weight run_pulse rest_pulse maximum_pulse / vif ;/* collinearity diagnostic*/ title 'Collinearity -- Full Model'; run; quit; /* remove performance */ proc reg data=fitness; NOPERF: model oxygen_consumption = runtime age weight run_pulse rest_pulse maximum_pulse / vif; title 'Collinearity -- Performance Removed'; run; quit; /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /*LOGISTIC*/ /********************************************************************** XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX **********************************************************************/ /* The variable Low is used to determine whether the subject is a case (Low=1, low-birth-weight baby) or a control (Low=0, normal-weight baby). The dummy time variable Time takes the value 1 for cases and 2 for controls. */ /* from http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_phreg_sect036.htm */ data LBW; input id Age Low LWT Smoke HT UI @@; Time=2-Low; datalines; 25 16 1 130 0 0 0 143 16 0 110 0 0 0 166 16 0 112 0 0 0 167 16 0 135 1 0 0 189 16 0 135 1 0 0 206 16 0 170 0 0 0 216 16 0 95 0 0 0 37 17 1 130 1 0 1 45 17 1 110 1 0 0 68 17 1 120 1 0 0 71 17 1 120 0 0 0 83 17 1 142 0 1 0 93 17 0 103 0 0 0 113 17 0 122 1 0 0 116 17 0 113 0 0 0 117 17 0 113 0 0 0 147 17 0 119 0 0 0 148 17 0 119 0 0 0 180 17 0 120 1 0 0 49 18 1 148 0 0 0 50 18 1 110 1 0 0 89 18 0 107 1 0 1 100 18 0 100 1 0 0 101 18 0 100 1 0 0 132 18 0 90 1 0 1 133 18 0 90 1 0 1 168 18 0 229 0 0 0 205 18 0 120 1 0 0 208 18 0 120 0 0 0 23 19 1 91 1 0 1 33 19 1 102 0 0 0 34 19 1 112 1 0 1 85 19 0 182 0 0 1 96 19 0 95 0 0 0 97 19 0 150 0 0 0 124 19 0 138 1 0 0 129 19 0 189 0 0 0 135 19 0 132 0 0 0 142 19 0 115 0 0 0 181 19 0 105 0 0 0 187 19 0 235 1 1 0 192 19 0 147 1 0 0 193 19 0 147 1 0 0 197 19 0 184 1 1 0 224 19 0 120 1 0 0 27 20 1 150 1 0 0 31 20 1 125 0 0 1 40 20 1 120 1 0 0 44 20 1 80 1 0 1 47 20 1 109 0 0 0 51 20 1 121 1 0 1 60 20 1 122 1 0 0 76 20 1 105 0 0 0 87 20 0 105 1 0 0 104 20 0 120 0 0 1 146 20 0 103 0 0 0 155 20 0 169 0 0 1 160 20 0 141 0 0 1 172 20 0 121 1 0 0 177 20 0 127 0 0 0 201 20 0 120 0 0 0 211 20 0 170 1 0 0 217 20 0 158 0 0 0 20 21 1 165 1 1 0 28 21 1 200 0 0 1 30 21 1 103 0 0 0 52 21 1 100 0 0 0 84 21 1 130 1 1 0 88 21 0 108 1 0 1 91 21 0 124 0 0 0 128 21 0 185 1 0 0 131 21 0 160 0 0 0 144 21 0 110 1 0 1 186 21 0 134 0 0 0 219 21 0 115 0 0 0 42 22 1 130 1 0 1 67 22 1 130 1 0 0 92 22 0 118 0 0 0 98 22 0 95 0 1 0 137 22 0 85 1 0 0 138 22 0 120 0 1 0 140 22 0 130 1 0 0 161 22 0 158 0 0 0 162 22 0 112 1 0 0 174 22 0 131 0 0 0 184 22 0 125 0 0 0 204 22 0 169 0 0 0 220 22 0 129 0 0 0 17 23 1 97 0 0 1 59 23 1 187 1 0 0 63 23 1 120 0 0 0 69 23 1 110 1 0 0 82 23 1 94 1 0 0 130 23 0 130 0 0 0 139 23 0 128 0 0 0 149 23 0 119 0 0 0 164 23 0 115 1 0 0 173 23 0 190 0 0 0 179 23 0 123 0 0 0 182 23 0 130 0 0 0 200 23 0 110 0 0 0 18 24 1 128 0 0 0 19 24 1 132 0 1 0 29 24 1 155 1 0 0 36 24 1 138 0 0 0 61 24 1 105 1 0 0 118 24 0 90 1 0 0 136 24 0 115 0 0 0 150 24 0 110 0 0 0 156 24 0 115 0 0 0 185 24 0 133 0 0 0 196 24 0 110 0 0 0 199 24 0 110 0 0 0 225 24 0 116 0 0 0 13 25 1 105 0 1 0 15 25 1 85 0 0 1 24 25 1 115 0 0 0 26 25 1 92 1 0 0 32 25 1 89 0 0 0 46 25 1 105 0 0 0 103 25 0 118 1 0 0 111 25 0 120 0 0 1 120 25 0 155 0 0 0 121 25 0 125 0 0 0 169 25 0 140 0 0 0 188 25 0 95 1 0 1 202 25 0 241 0 1 0 215 25 0 120 0 0 0 221 25 0 130 0 0 0 35 26 1 117 1 0 0 54 26 1 96 0 0 0 75 26 1 154 0 1 0 77 26 1 190 1 0 0 95 26 0 113 1 0 0 115 26 0 168 1 0 0 154 26 0 133 1 0 0 218 26 0 160 0 0 0 16 27 1 150 0 0 0 43 27 1 130 0 0 1 125 27 0 124 1 0 0 4 28 1 120 1 0 1 79 28 1 95 1 0 0 105 28 0 120 1 0 0 109 28 0 120 0 0 0 112 28 0 167 0 0 0 151 28 0 140 0 0 0 159 28 0 250 1 0 0 212 28 0 134 0 0 0 214 28 0 130 0 0 0 10 29 1 130 0 0 1 94 29 0 123 1 0 0 114 29 0 150 0 0 0 123 29 0 140 1 0 0 190 29 0 135 0 0 0 191 29 0 154 0 0 0 209 29 0 130 1 0 0 65 30 1 142 1 0 0 99 30 0 107 0 0 1 141 30 0 95 1 0 0 145 30 0 153 0 0 0 176 30 0 110 0 0 0 195 30 0 137 0 0 0 203 30 0 112 0 0 0 56 31 1 102 1 0 0 107 31 0 100 0 0 1 126 31 0 215 1 0 0 163 31 0 150 1 0 0 222 31 0 120 0 0 0 22 32 1 105 1 0 0 106 32 0 121 0 0 0 134 32 0 132 0 0 0 170 32 0 134 1 0 0 175 32 0 170 0 0 0 207 32 0 186 0 0 0 ; run; PROC MEANS data=lbw; CLASS smoke; VAR low; RUN; PROC LOGISTIC DATA = lbw DESCENDING; MODEL low = smoke / RL; RUN; QUIT;