/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 2*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* Demonstration using NY SPARCS AMI Dx's 1994 - 2004*/

/** compare ages upstate and nyc ami admissions with ttest **/

libname mylib " ";

proc ttest data=mylib.demo_1; * identify the data set you are working with;
class nyc; * identify the variable you are using to identify and compare groups;
var age; /* continuous variable you are analyzing*/
title 'Comparing ages nyc and upstate ami patients '; *title for your output;
run;


/* univariate analysis of age */

proc univariate data=mylib.demo_1;
var age;
histogram; *returns histogram of age varible;
probplot;
run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 3*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* A DEMONSTRATION OF SIMPLE COLUMN INPUT FROM THE EDITOR WINDOW */

DATA patients;/* data step creates a file named 'patients' in the work library*/                                                          
  input lastname $ 1-10 fname $ 12-21 ssn 23-31 status $ 33-43;   
datalines;  /* note older version of SAS used 'cards' */
Green      Samual     888888888 Discharged                          
Brennon    Carol      123456789 Discharged                          
Wang       Robert     999999999 Inpatient                          
Randolph   Virginia   987654321 Inpatient                          
; /* this semicolon needs to be on a line by itself*/  
 run;

proc print data=patients;/* print your output*/                           
run;

/**********************************************************************
CREATE SAS FILE FOR CALIFORNIA ACUTE MI FILE
**********************************************************************/

data ami;
infile "my-file-path\Hert95.txt" lrecl=999; 
input FACNUM AGEYRS SEX RACE ZIP LOS DISP PMTSOR DRG APRSEV RLN;
run;


proc contents data=ami;
run;


proc print data=ami (obs=10);
run;


/**********************************************************************
READING SPARCS DATA: Date, Age, Diagnosis
**********************************************************************/

DATA sparcs_1;
INFILE ''  LRECl=450; * OBS=100;
INPUT 
@18   DATE		    yymmn6.
@44   AGE       	3.
@71   PDX       	$CHAR6.   
run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 4*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/


/* creating fitness data set */

data fitness;
   length Name $ 9;
   input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse
         Rest_Pulse Maximum_Pulse Performance;
   datalines;
Donna    F  8.17  42  68.15  59.57   166   40   172  14
Gracie   F  8.63  38  81.87  60.06   170   48   186  13
Luanne   F  8.65  43  85.84  54.3    156   45   168  13
Mimi     F  8.92  50  70.87  54.63   146   48   155  11
Chris    M  8.95  49  81.42  49.16   180   44   185  11
Allen    M  9.22  38  89.02  49.87   178   55   180  12
Nancy    F  9.4   49  76.32  48.67   186   56   188  10
Patty    F  9.63  52  76.32  45.44   164   48   166  10
Suzanne  F  9.93  57  59.08  50.55   148   49   155   9
Teresa   F 10     51  77.91  46.67   162   48   168   9
Bob      M 10.07  40  75.07  45.31   185   62   185   9
Harriett F 10.08  49  73.37  50.39   168   67   168   9
Jane     F 10.13  44  73.03  50.54   168   45   168   9
Harold   M 10.25  48  91.63  46.77   162   48   164   9
Sammy    M 10.33  54  83.12  51.85   166   50   170   8
Buffy    F 10.47  52  73.71  45.79   186   59   188   8
Trent    M 10.5   52  82.78  47.47   170   53   172   8
Jackie   F 10.6   47  79.15  47.27   162   47   164   8
Ralph    M 10.85  43  81.19  49.09   162   64   170   7
Jack     M 10.95  51  69.63  40.84   168   57   172   7
Annie    F 11.08  51  67.25  45.12   172   48   172   7
Kate     F 11.12  45  66.45  44.75   176   51   176   7
Carl     M 11.17  54  79.38  46.08   156   62   165   7
Don      M 11.37  44  89.47  44.61   178   62   182   6
Effie    F 11.5   48  61.24  47.92   170   52   176   6
George   M 11.63  47  77.45  44.81   176   58   176   6
Iris     F 11.95  40  75.98  45.68   176   70   180   5
Mark     M 12.63  57  73.37  39.41   174   58   176   4
Steve    M 12.88  54  91.63  39.2    168   44   172   4
Vaughn   M 13.08  44  81.42  39.44   174   63   176   2
William  M 14.03  45  87.66  37.39   186   56   192   0
;
run;

/* demonstration of proc print */

proc print data=fitness (obs=5) noobs;
var name age weight;
where gender = 'F';
sum runtime;
run;

/* demonstration of proc sort */

proc sort data=fitness out= sortdat; /*output data set  is in the work library */
  by gender; /* will be sorted by gender ascending */
run;


proc print data=sortdat; /* note using the output sorted data set */
 by gender; /* variable by which we sorted */
 sum runtime; /* request sum of numeric variable */
run;


/* demonstration titles and footnotes */

proc print data=sortdat; /* note using the output sorted data set */
title 'Printout of Fitness Data';
title2 'Sorted by Gender';
title3 'With Sums for Runtime';
footnote 'Analyst: DiMaggio';
footnote2 'Conducted December 2008';
 by gender; /* variable by which we sorted */
 sum runtime; /* request sum of numeric variable */
run;

/* demonstration format */
proc print data=fitness (obs=5) noobs;
var name age weight;
where gender = 'F';
sum runtime;
*format name $UPCASE10.;
run;

/* demonstration proc format */
proc format;
     value old_runner 0-40 = 'young'
	                  41-100 = 'old';
run;
                       
proc print data=fitness (obs=5) noobs;
var name age weight;
where gender = 'F';
sum runtime;
format age old_runner.;
run;


/* demonstration ODS */

ods html file = 'C:\Users\Charles DiMaggio\Desktop\example.html' style=sasweb; /*open ODS */

proc print data=fitness (obs=5) noobs;
var name age weight;
where gender = 'F';
sum runtime;
format age old_runner.;
title 'RUNNER DATA';
title2 '12 December 2008';
footnote 'Analyst: Charles DiMaggio';
run;

ods html close; /* close ODS */


/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 5*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

libname ch5 ' '; /* refer to the location of your chapter 5 data sets */

/* demo creating new data set using Set */

proc contents data=ch4.ch4demo1;
run;

data nyc_deaths;
set ch5.ch5demo1;
tot_911_deaths = female_911_deaths + male_911_deaths;
run; /* note there is no output, check log window */

/* demo concatenating new data set using Set */

data tot_deaths; /* creating new file in work library */
set nyc_deaths ch5.ch5demo2;; /* adding file we previously to existing non_NYC file */
run;

/* demo merging data sets using Merge-By */

proc contents data=ch5.ch5demo3;
run;

data pop_deaths;
       merge tot_deaths ch5.ch5demo3;
       by zip;
run;

proc sort data=tot_deaths;
     by zip;
proc sort data=ch5.ch5demo3;
     by zip;
run;

data pop_deaths(keep= zip tot_911_deaths mhi);
merge tot_deaths ch5.ch5demo3; 
by zip;
run;


/* IF-THEN-ELSE Demonstration */

DATA SPARCS; /* name the data set in work directory */

INFILE 'my-file-path\sparcs.TXT' MISSOVER LRECl=452 OBS=15000; 
/* tell SAS where file is, that itís a long file and to read in the 1st 15,000 observations */

INPUT /* input the variables you are interested in */
@18   DATE		yymmn6. /* informat based on SPARCS raw file */
@44   AGE       3.
@71   PDX       $CHAR6.
@331  ECODE     $CHAR6.
@343  DISPO     $CHAR2.      
;
RUN;

PROC CONTENTS DATA=SPARCS; /* check your file was read in */
RUN;


proc freq data=sparcs order=freq;
tables pdx;
run;


/********************MORTALITY **************************/
data nycsparcs04;
set nycsparcs04;
IF DISPO = '20'  then death=1;
                 else death=0;

/*********************** SUBSTANCE ABUSE *****************/

if pdx in /* use ICD9 codes to create diagnoses */
('2910','2911','2912','2913','2914','2915','29181','29189','2919','2920','29211','29212',
'2922','29281','29282','29283','29284','29289','2929',
'30300','30301','30302','30303','30390','30391','30392','30393','30400'
'30401','30402','30403','30410','30411','30412','30413','30420',
'30421','30422','30423','30430','30431','30432','30433','30440',
'30441','30442','30443','30450','30451','30452','30453','30460',
'30461','30462','30463','30470','30471','30472','30473','30480',
'30481','30482','30483','30490','30491','30492','30493','30500',
'30501','30502','30503','3051','30520','30521','30522','30523',
'30530','30531','30532','30533','30540','30541','30542','30543',
'30550','30551','30552','30553','30560','30561','30562','30563',
'30570','30571','30572','30573','30580','30581','30582','30583',
'30590','30591','30592','30593')
Then subst_ab=1;
Else subst_ab=0;

/************ CHILD ABUSE ***********************************/

if age LE 10 AND ecode in 
('E9670','E9671','E9672','E9673','E9674','E9675','E9676','E9677','E9678','E9679',
'E9684','E9040','E9041','E9042','V1541','V1542','V1549','V6121')
then child_ab=1;
else child_ab=0;

RUN;

PROC PRINT DATA=SPARCS;
VAR death subst_ab child_ab;
sum death subst_ab child_ab;
run;


/* DATE Demonstration */

*libname ch5 ' ';

proc ttest data=ch5.ch5demo4;
class nyc;
var age;
where date < '1oct2001'd;
title 'Comparing ages nyc and upstate ami patients: pre 9/11 ';
run;

proc ttest data=ch5.ch5demo4;
class nyc;
var age;
where date > '1oct2001'd;
title 'Comparing ages nyc and upstate ami patients: post 9/11';
run;
 
  
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 6*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

libname ch6 ' '; /* refer to the location of your chapter 6 data */

proc contents data=ch6.ch6demo1;
run;

proc print data=ch6.ch6demo1 (obs=10);
run;

proc means data=ch6.ch6demo1 maxdec=1;
var ageyrs ;
class sex;
run;

proc freq data=ch6.ch6demo1;
tables sex*race2 / measures;
run;

proc format;
value gndr
1 = "male"
2 = "female";
run;

proc freq data=ch6.ch6demo1;
tables sex*race2 / measures;
format sex gndr.;
run;


/* proc tabulate: columns */
proc tabulate data=ch6.ch6demo1;
         class drg;
         table drg;
run;

proc freq data=ch6.ch6demo1 order=freq;
tables drg;
run;

/* proc tabulate: one-dimension totals */

proc tabulate data=ch6.ch6demo1;
         class drg;
         table drg all;
run;

/* proc tabulate: two x two totals */

proc tabulate data=ch6.ch6demo1;
         class drg race2;
         table drg all , race2 all;
run;


proc tabulate data=ch6.ch6demo1;
         class drg race2;
		 var ageyrs;
         table drg,race2*ageyrs;
run;


proc tabulate data=ch6.ch6demo1;
         class drg race2;
		 var ageyrs;
         table drg, race2*ageyrs*mean;
run;




/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* tabulate syntax (Chapter 6)*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/*create output data set for surveillance from tabulate procedure */
proc tabulate data=ind.sparcs; /* individual-level data set */
         where nyc=1; /* subset to particular geographic area */
         class month; /* specify surveillance period using previously
                         defined variable created with MONTH() function */
	   var injury ami child_ab; /* specify diagnoses to monitor
                                     note have to be numeric */
	   table month, injury ami child_ab; /* table specifications
                                            note default is sum or total */
ods output table=sparcs; /* create output data set based on table */
data sparcs; /*clean up the dataset*/
    set sparcs;
	drop _type_ _page_ _table_; run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 7*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

libname ch7 ' '; /* location of your chapter 7 data sets */

/* Demonstration PROC GCHART */

proc contents data=ch7.ch7demo1;
run;

PROC GCHART DATA = ch7.ch7demo1;
     HBAR case_control;
RUN;

quit;

PROC GCHART DATA = ch7.ch7demo1;
     VBAR case_control;
RUN;

quit;

data ch7demo1;
set ch7.ch7demo1;
cc = put(case_control, $8.);
run;

PROC GCHART DATA = ch6demo1;
     HBAR cc;
RUN;

quit;

PROC GCHART DATA = ch6demo1;
     VBAR cc;
RUN;

quit;


PROC GCHART DATA = ch7.ch7demo1;
     VBAR sex / groupvar=case_control;
RUN;

PROC GCHART DATA = ch7.ch7demo1;
     VBAR sex / patternid=midpoint;
RUN;

PROC GCHART DATA = ch7.ch7demo1;
     VBAR sex / groupvar=case_control patternid=midpoint;
RUN;

proc gplot data= ch7.ch7demo2;
plot ami_sum * date;
run;


/* Demonstration PROC GPLOT */

/*************   AMI Time Series  *************************/

goptions reset=all;
*goptions cback=white device=win;

axis1  major = (h=2.0  c=black)
       minor = (h=1.0 c=black)
	   order = 0 to 4000 by 1000
	   label = (h=1.5 a=90 f=arial c=black 'Number of AMI-related diagnoses');

axis2  major = (h=2.0  c=black)
       minor = (h=1.0 c=black)
	   /*order = 1 to 132 by 6*/
	   label = (h=1.5 f=arial c=black 'Month and Year');

proc gplot data= ch7.ch7demo2;
plot ami_sum * date / href=15219
                         vaxis = axis1
						 haxis = axis2;
     symbol v='.' f=arial h=2 i=sm5 c=black;
	 note h=1.0 move=(59,60)pct 'September 2001'; * percent (over, up);
	 title h=3.0 c=black f=arial j=center 
	       'New York City AMI-Related Hospital Discharges';
     title2 h=2.0 c=black f=arial j=center
	       '1994 to 2004 By Month and Year';
run;
quit;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 8*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/


/* DEMONSTRATION: ONE-WAY TABLE*/

proc freq data=expend;
tables ses;
run;

/* DEMONSTRATION: CROSSTABS*/

proc format;/*note use of proc format to create spendfmt and format to apply it*/
   value spendfmt 1="High Utilization"
                  0="Low Utilization"
                  ;
run;


proc freq data=expend;
   tables expenditure gender ses age /*note age might be a problematic cat var*/
             gender*expenditure ses*expenditure;
   format expenditure spendfmt.;
run;

/* DEMONSTRATION: REORDERING VARIABLES */

/* first need to create a new data set that transforms a text variable into a numeric variable
so the sort order corresponds to the logical order */

data expend;
   set expend;
   SES_level=1*(SES='Low') + 2*(SES='Medium')
            + 3*(SES='High');
run;

/* neat bit of Boolean logic, instead of many IF THEN statements
an expression enclosed in parentheses is a logical operator that returns the value 1 if the expression is true 
and 0 if it is false. here we tell SAS SAS to look at each observation, if the oldvar ='low' set it to 1 (true) 
if not, set it to 0 (false) , do that for each level (med and high) the multiply that 1 or 0 by 1, 2 or 3 as indicated.  
Effectively transforms a text variable into an ordinal numeric variable */

/* use PROC FORMAT to create a user defined format */

proc format;
   value sesfmt 1='Low SES'
                2='Medium SES'
                3='High SES';
run;

/* use PROC FREQ with a FORMAT statement */

proc freq data=expend;
   tables SES_level*expenditure;
   format SES_level sesfmt. expenditure spendfmt.;
run;


/* expenditure chi square exercise */

proc freq data=expend;
   tables gender*expenditure
          / chisq expected nocol nopercent;
run;




/* DEMONSTRATION:  EXACT PEARSON CHI SQUARE TEST */

/* read in a data set */

data exact;
  input a b ;
  datalines;
1 2
1 2
1 2
2 1
2 1
2 2
2 2
;
run;

proc freq data=exact;
   tables a*b;
   exact pchi;
run;

/* DEMONSTRATION OF SPEARMAN CORRLEATION STATISTIC */

proc freq data=expend;
   tables SES_level*expenditure / chisq measures cl;;
   format SES_level sesfmt. expenditure spendfmt.;
run;

/* signficicance vs. association */

data expend1;
set expend;
run;

data expend2;
set expend expend1;
run;


proc freq data=expend2;
   tables SES_level*expenditure / chisq measures cl;;
   format SES_level sesfmt. purchase spendfmt.;
run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 9*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* Odds Ratio */
proc freq data=birth nlevels;/* nlevels displays num levels for variables */
  tables prev_pretrm*low / nocol norow measures; /* 2x2 table measures request OR */
  title 'odds ratio for association between previous preterm and lbw';
run;

/* Stratified Analysis */
proc freq data=birth nlevels;
  tables uterine_irr*prev_pretrm*low / measures; 
  title 'odds ratio for association between previous preterm and lbw';
  title2 'controlling for uterine irritibility';
run;

/*Contingency Table Analysis */
proc freq data=birth nlevels; 
   tables uterine_irr*prev_pretrm*low 
         / all bdt;       /* Tarone adjustment for small sample size */
   exact or comor; /* exact preferred for small sample sizes comor gives CI for the exact OR*/
   title 'Contingency Table Analysis';
run;

/* 2x2 analysis */
data contingency;
   input exposure $ disease $ count ;
   cards;
   unexp nodis  16
   unexp dis    48 
   exp    nodis 40 
   exp    dis   20
   ; 
proc freq data=contingency;
   weight count;
   tables exposure*disease;
run;


/* stratified 2x2 table */
data stress;
   input confounder $ exposure $ outcome $ count @@;
   cards;
 conf unexp  dis 50 conf  unexp  nodis  10 
 conf exp dis 100 conf  exp nodis  90  
 noconf unexp  dis 60 noconf  unexp  nodis 140
 noconf exp dis  10 noconf  exp nodis  50
   ; 
proc freq data=stress order=data; 
   weight count;
   tables confounder*exposure*outcome /  all;
   run;


/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 10*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

libname ch10 ' '; /* location of your chapter 10 data sets */

options nodate nonumber;    /* options for page*/
proc print data=ch10.infants (obs=10);  /* note to limit number of observations printed*/
title ' list of observations';
run;

proc means data= ch10.infants 
        maxdec=4
        n mean median stderr clm var;  /* note proc statement ends here  with semi-colon*/
        var los;
        title ' descriptive stats: length of stay';
run;

proc univariate data= ch10.infants; 
        var los; * if no var will analyze all variables;
		id date; * variable to use to identify the obs;
		histogram / normal;
		probplot;
        title ' univariate stats: length of stay';
run;

proc sort data=ch10.infants;
by dispo;
run;

symbol color = salmon;
   proc boxplot data=ch10.infants;
      plot los*dispo / cframe  = vligb 
                          cboxes   = dagr
                          cboxfill = ywh;
      title 'Boxplot';
run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 11*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* run to create data set */ 
data bp_drug;
  input Drug Disease $ BP 3.;
datalines;
1 A 119.70095982
1 A 121.36188924
1 A 119.69180851
1 A 119.60217745
1 A 120.96573695
1 A 119.18963686
1 A 120.04126616
1 A 120.6485107
1 A 121.39677755
1 A 121.2941861
2 A 134.70095982
2 A 136.36188924
2 A 134.69180851
2 A 134.60217745
2 A 135.96573695
2 A 134.18963686
2 A 135.04126616
2 A 135.6485107
2 A 136.39677755
2 A 136.2941861
3 A 139.17670991
3 A 139.7596925
3 A 139.61692792
3 A 139.47841248
3 A 140.32687732
3 A 139.19503074
3 A 139.82084261
3 A 139.33853642
3 A 139.18391356
3 A 141.16519705
4 A 151.0314732
4 A 148.45628664
4 A 150.90924814
4 A 150.36754897
4 A 150.40537109
4 A 150.27207554
4 A 148.84462813
4 A 148.98134213
4 A 148.67342047
4 A 151.22752311
1 B 159.47367321
1 B 157.8492618
1 B 159.71157707
1 B 161.08606542
1 B 157.88059214
1 B 160.70808134
1 B 158.93686029
1 B 159.59337556
1 B 161.04857471
1 B 162.4582518
2 B 149.44343695
2 B 149.51766786
2 B 151.67540302
2 B 148.43104013
2 B 150.04534684
2 B 149.53540532
2 B 149.21944044
2 B 149.29473811
2 B 149.80104295
2 B 150.2553889
3 B 140.84243239
3 B 139.80385608
3 B 140.01491297
3 B 140.84663178
3 B 139.47817427
3 B 139.34930573
3 B 140.06553297
3 B 139.90122485
3 B 140.48542888
3 B 139.48601375
4 B 129.32979505
4 B 130.1515735
4 B 130.44367458
4 B 129.75982193
4 B 129.05742185
4 B 130.10355508
4 B 130.70093917
4 B 130.88766817
4 B 131.63619676
4 B 129.8023949
1 C 126.23902904
1 C 125.75600563
1 C 125.87874742
1 C 125.04661693
1 C 122.97475788
1 C 123.59937382
1 C 125.16139122
1 C 124.73864001
1 C 123.42349457
1 C 125.38733391
2 C 125.69890209
2 C 125.56064272
2 C 123.83433943
2 C 124.6489834
2 C 123.29421225
2 C 125.24488558
2 C 123.753593
2 C 125.1478912
2 C 126.59647236
2 C 124.56548104
3 C 125.09941212
3 C 126.00783602
3 C 123.7285012
3 C 123.81351742
3 C 124.62018186
3 C 124.68909051
3 C 124.86764454
3 C 124.00137446
3 C 125.56161889
3 C 124.01149493
4 C 124.8987427
4 C 124.38445589
4 C 124.91319163
4 C 125.86522746
4 C 123.22694283
4 C 125.61937452
4 C 126.41012761
4 C 125.39699702
4 C 125.05360115
4 C 125.49297642
;
run;

/* I. One-Way ANOVA */

/* explore data */

proc univariate data=bp_drug;
var BP;
histogram / normal;
probplot;
run;

proc sort data=bp_drug;
by drug;
run;

symbol color = salmon;
   proc boxplot data=bp_drug;
      plot BP*Drug      / cframe  = vligb 
                          cboxes   = dagr
                          cboxfill = ywh;
      title 'Boxplot Blood Pressure Drugs';
run;

/* one-way ANOVA */

options ls=75 ps=45;  /* page and line spacing options*/
proc glm data = bp_drug;
       class drug;
       model BP=drug;
       means drug / hovtest;
       output out=check r=resid   p=pred;  /* creating an output data set called 'check' in which 'r ' is a keyword SAS recognizes as residuals and 'p' recognized as predictors */
       title 'testing for equality of mean BP by Drug with GLM';
run;
quit;  /* have to quit our of GLM */


/* now run gplot on the 'check' dataset created above*/
Proc gplot data=check;
         Plot resid*pred / haxis=axis1 vaxis=axis2 vref=0; /* can leave out if ok with            
                                                                                         defaults*/
         axis1 w=2 major=(w=2) minor=none offset=(10pct);
         axis2 w=2 major=(w=2) minor=none;
         title 'plot residuals vs predictors for drugs';
run;
quit;

/* now run proc univariate to get histogram, normal plot, kurtosis and skewness on residuals*/
proc univariate data = check normal;
      var resid;
      histogram resid / normal;
      probplot;* resid / mu=est sigma=est color=blue w=1;
      title;
run;

/* compare means */ 
proc glm data = bp_drug;
       class drug;
       model BP=drug;
       lsmeans drug / pdiff=all adjust=tukey; /* pdiff=all requests all pairwaise p values */  
       output out=check r=resid   p=pred;  /* creating an output data set called 'check' in which 'r ' is a keyword SAS recognizes as residuals and 'p' recognized as predictors */
       title 'testing for equality of mean BP by Drug with GLM';
run;
quit;

/* II. Two-Way ANOVA */

/* means for combinations of disease and drug */
proc means data=bp_drug mean var std;
   class disease drug; 
   var BP;
   title 'Selected Descriptive Statistics for drug-disease combinations';
run;

/* means plot */
proc gplot data=bp_drug;
   symbol c=blue w=2 interpol=std1mtj line=1;/* interpolation method gives s.e. bars */
   symbol2 c=green w=2 interpol=std1mtj line=2;
   symbol3 c=red w=2 interpol=std1mtj line=3;
   plot BP*drug=disease; /* vertical by horizontal */
   title 'Illustrating the Interaction Between Disease and Drug';
run;
quit;

/* PROC GLM for ANOVA with interaction term */
proc glm data=bp_drug;
   class disease drug;
   model BP=disease drug disease*drug;/*note interaction term*/
   title 'Analyze the Effects of Drug and Disease';
   title2 'Including Interaction';
run;
quit;

/* compare means with interaction term */
proc glm data=bp_drug;
   class disease drug;
   model BP=drug disease drug*disease;
   lsmeans disease*drug / adjust=tukey pdiff=all; /*note looking at combinations*/
   title 'Multiple Comparisons Tests for Drug and Disease';
run;
quit;





/***************************************************************************************/
/*   SAMPLE GLM SYNTAX FOR FUTURE USE                                                  */
/***************************************************************************************/



options ls=75 ps=45;  /* page and line spacing options*/
proc glm data = your.data;
         class cat_var; /* categorical variable by which you want to group the outcome variable*/
         model cont_var=cat_var; /*specify the model i.e. response variable = predictor(s) */
         means cat_var / hovtest; /* computes means of dependent variable for each value of specified effect */

/* hovtest above is Levene's test for homogeneity (equality) of variances
(one of the assumptions of ANOVA, i.e. homoscedasticity) 
the null hypothesis is that the variances are equal so do not want to reject the null,  
want a large p value (look under 'Pr > F') on output*/

		 lsmeans cat_var / pdiff=all adjust=tukey; /* pdiff=all requests all pairwaise p values */  
 
         output out=check r=resid   p=pred;  /* creating an output data set called 'check' in                                           
                                                                 which 'r ' is a keyword SAS recognizes as              
                                                                 residuals and 'p' recognized as predictors */
         title 'testing for equality of means with GLM';
run;
quit; /* note that you have to quit out of glm, or will keep running */

/* now run gplot on the 'check' dataset created above*/

proc gplot data=check;
         plot resid*pred / haxis=axis1 vaxis=axis2 vref=0; /* can leave this out if you're 
		                                                    ok with defaults*/
         axis1 w=2 major=(w=2) minor=none offset=(10pct);
         axis2 w=2 major=(w=2) minor=none;
         title 'plot residuals vs predictors for cereal';
run;
quit;


/* now run proc univariate to get histogram, normal plot, kurtosis and skewness on residuals*/
proc univariate data = check normal;
      var resid;
      histogram / normal;
      probplot / mu=est sigma=est color=blue w=1;
      title;
run;


/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 12*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* run to create the corr data set */
data corr;
input age height outcome1 outcome2;
;
cards;
35	170	340	-340
45	165	341	-341
46	170	380	-380
16	156	390	-390
32	166	400	-400
16	157	410	-410
14	165	446	-446
16	152	455	-455
35	177	483	-483
33	158	510	-510
40	150	544	-544
28	165	550	-550
23	160	573	-573
52	150	577	-577
46	180	580	-580
29	173	600	-600
30	172	630	-630
21	163	655	-655
21	164	660	-660
20	189	662	-662
34	182	689	-689
43	184	690	-690
35	174	700	-700
39	177	720	-720
43	183	730	-730
37	175	765	-765
32	173	780	-780
24	173	790	-790
20	162	805	-805
25	180	810	-790
22	173	870	-710
25	171	945	-700
;
run;

/* example scatterplots  */

proc gplot data=corr;
title1 'direct relationship?';
plot age*height;
run;
quit;


proc gplot data=corr;
title1 'indirect relationship?';
plot height*outcome2;
run;
quit;


proc gplot data=corr;
title1 'curvilinear relationship?';
plot age*outcome2;
run;
quit;



/* example correlations from corr data  */

proc corr data=corr rank;
var age;
with height;
run;

proc corr data=corr rank;
var height;
with outcome2;
run;

proc corr data=corr rank;
var age;
with outcome2;
run;






/* dow jones and asthma */

data dow;
input year dow asthma;  /*yearly dow jones industrial avg,  asthma hospitalizations per 10,000 0-17 year olds*/
cards;
1980	963.99	21
1981	875	22.2
1982	1066.54	25.7
1983	1258.64	23.7
1984	1211.57	25.5
1985	1546.67	25.4
1986	1895.95	27.3
1987	1938.83	25.9
1988	2168.57	28.5
1989	2753.2	27.9
1990	2633.66	28.4
1991	3168.83	30.6
1992	3301.11	30.8
1993	3754.09	25.4
1994	3834.44	26.8
1995	5117.12	32.7
1996	6448.27	29.8
1997	7908.25	32.4
1998	9181.4	24.4
1999	11497.12	28.2
2000	10786.85	29.6
;
run;

symbol1 h=2.0 v=star;
proc gplot data=dow;
plot dow*asthma;
run;

proc corr data=dow rank;
var dow;
with asthma;
run;

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 13*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

/* run to create the fitness data set */

data fitness;
   length Name $ 9;
   input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse
         Rest_Pulse Maximum_Pulse Performance;
   datalines;
Donna    F  8.17  42  68.15  59.57   166   40   172  14
Gracie   F  8.63  38  81.87  60.06   170   48   186  13
Luanne   F  8.65  43  85.84  54.3    156   45   168  13
Mimi     F  8.92  50  70.87  54.63   146   48   155  11
Chris    M  8.95  49  81.42  49.16   180   44   185  11
Allen    M  9.22  38  89.02  49.87   178   55   180  12
Nancy    F  9.4   49  76.32  48.67   186   56   188  10
Patty    F  9.63  52  76.32  45.44   164   48   166  10
Suzanne  F  9.93  57  59.08  50.55   148   49   155   9
Teresa   F 10     51  77.91  46.67   162   48   168   9
Bob      M 10.07  40  75.07  45.31   185   62   185   9
Harriett F 10.08  49  73.37  50.39   168   67   168   9
Jane     F 10.13  44  73.03  50.54   168   45   168   9
Harold   M 10.25  48  91.63  46.77   162   48   164   9
Sammy    M 10.33  54  83.12  51.85   166   50   170   8
Buffy    F 10.47  52  73.71  45.79   186   59   188   8
Trent    M 10.5   52  82.78  47.47   170   53   172   8
Jackie   F 10.6   47  79.15  47.27   162   47   164   8
Ralph    M 10.85  43  81.19  49.09   162   64   170   7
Jack     M 10.95  51  69.63  40.84   168   57   172   7
Annie    F 11.08  51  67.25  45.12   172   48   172   7
Kate     F 11.12  45  66.45  44.75   176   51   176   7
Carl     M 11.17  54  79.38  46.08   156   62   165   7
Don      M 11.37  44  89.47  44.61   178   62   182   6
Effie    F 11.5   48  61.24  47.92   170   52   176   6
George   M 11.63  47  77.45  44.81   176   58   176   6
Iris     F 11.95  40  75.98  45.68   176   70   180   5
Mark     M 12.63  57  73.37  39.41   174   58   176   4
Steve    M 12.88  54  91.63  39.2    168   44   172   4
Vaughn   M 13.08  44  81.42  39.44   174   63   176   2
William  M 14.03  45  87.66  37.39   186   56   192   0
;
run;

/* simple regression demonstration fitness data */

proc reg data=fitness;
   model oxygen_consumption=performance;
   title 'Simple Linear Regression of Oxygen Consumption and Performance';
run;
quit;

/* predicted values demonstration fitness data */

data need_predictions;
   input performance @@; /* tell sas >1 value for same variable, note same variable name */
   datalines; 
0 3 6 9 12
; /* semi-colon on its own line*/
run;

data predoxy; /* appending above data set to the fitness data set */
   set fitness 
       need_predictions;
run;

/* calculate the prediction */
proc reg data=predoxy;
   model oxygen_consumption=performance / p;/*tell SAS to predict values for oxygen*/
   id performance;
   title 'Oxygen_Consumption=Performance with Predicted Values';
run;
quit;

/* plot prediction and confidence intervals */

options ps=50 ls=76;
goptions reset=all fontres=presentation ftext=swissb htext=1.5;

proc reg data=predoxy; /*using data set you created for prediction*/
   model oxygen_consumption=performance / clm cli alpha=.05;
   id name performance;
   plot oxygen_consumption*performance / conf pred;
   symbol1 c=red v=dot;
   symbol2 c=red;
   symbol3 c=blue;
   symbol4 c=blue;
   symbol5 c=green;
   symbol6 c=green;
   title;
run;
quit;

/* multiple reg demonstration fitness data */
proc reg data=fitness;
   model oxygen_consumption=performance runtime;
   title 'Multiple Linear Regression fitness Data';
run;
quit;




/* dummy variable coding */


proc reg data=fitness; /* reg doesn't work on categorical variables*/
   model oxygen_consumption=performance gender;
   title 'Multiple Linear Regression fitness Data';
run;
quit;

/* need "dummy" variables for categorical variables*/

data fitness2;
set fitness;
if age lt 40 then age_cat=0;
else if  40 => age <= 50 then age_cat=1;
else if age gt 50 then age_cat=2;
else age_cat=.;
run;


data fitness2;
set fitness2;
if age_cat=1 then dummy1=1;
else dummy1=0;
if age_cat=2 then dummy2=1;
else dummy2=0;
run;

proc reg data=fitness2;
   model oxygen_consumption=performance dummy1 dummy2;
   agedum: test dummy1, dummy2;
   title 'Multiple Linear Regression fitness Data';
run;
quit;

*/...by why only 2?*/

/* 
dummy example

(From Introduction to SAS.
UCLA: Academic Technology Services, Statistical Consulting Group.
http://www.ats.ucla.edu/stat/sas/faq/dummy.htm) 
*/

DATA dummy;
  INPUT id group score;
CARDS;
1 1 48
2 1 49
3 1 50
4 2 17
5 2 20
6 2 23
7 3 28
8 3 30
9 3 32
;
RUN; 


PROC MEANS DATA=dummy; /* overall mean */
  VAR score;
RUN;
 

PROC GLM DATA=dummy; /* run standard ANOVA: group is important */
  CLASS group ;   
  MODEL score = group ;
  MEANS group;
RUN;


DATA dummy2; /* create dummy variables using 3 indicators for group */
  SET dummy;
  IF (group = 1) THEN group1 = 1; ELSE group1 = 0;
  IF (group = 2) THEN group2 = 1; ELSE group2 = 0;
  IF (group = 3) THEN group3 = 1; ELSE group3 = 0;
RUN;
 
PROC REG DATA=dummy2; /* run proc reg using 2-level dummy variables */
  MODEL score = group1 group2 ;
RUN;
/*  same as ANOVA */
/* group 3 omitted because referent (i.e intercept, when other groups set to 0):
			parameter est for intercept(30) = group 3 mean from PROC MEANS
			parameter estimate for group 1 = mean of group1 - mean of group3 (49-30=19)
      		parameter estimate for group 2 = mean of group2 - mean of group3 (20-30=-10) */

/*
Ypred for group1 = 30 + 1 * 19 + 0 * -10 = 49
Ypred for group2 = 30 + 0 * 19 + 1 * -10 = 20
Ypred for group3 = 30 + 0 * 19 + 0 * -10 = 30

Ypred for group1: 1  0 
Ypred for group2: 0  1 
Ypred for group3: 0  1
*/






/*  note same F as from ANOVA (ANOVA and REG really essentially the same)

why did we leave out the third indicator variable? Because is is the intercept.

group3 is the referent category = the category when the other variables are zero = intercept
see that the parameter estimate for the intercept (30) is the same as the mean for group 3 from proc means
see that other parameter estimates also related to the mean of the referent category
e.g.  parameter estimate for group 1 = mean of group1 - mean of group3 (49-30=19)
      parameter estimate for group 2 = mean of group2 - mean of group3 (20-30=-10)

in summary
Ypred for group1 = 30 + 1 * 19 + 0 * -10 = 49
Ypred for group2 = 30 + 0 * 19 + 1 * -10 = 20
Ypred for group3 = 30 + 0 * 19 + 0 * -10 = 30 

In procs you will learn later, e.g. LOGISTIC, GENMOD (for Poisson) SAS has automated
options to do dummy coding where you essentially just put in coefficients for comparisions
so the above three just in terms of coefficients would be

Ypred for group1: 1  0 
Ypred for group2: 0  1 
Ypred for group3: 0  1 

*/ 
 

/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/* Chapter 14*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/

data fitness;
   length Name $ 9;
   input Name $ Gender $ Runtime Age Weight Oxygen_Consumption Run_Pulse
         Rest_Pulse Maximum_Pulse Performance;
   datalines;
Donna    F  8.17  42  68.15  59.57   166   40   172  14
Gracie   F  8.63  38  81.87  60.06   170   48   186  13
Luanne   F  8.65  43  85.84  54.3    156   45   168  13
Mimi     F  8.92  50  70.87  54.63   146   48   155  11
Chris    M  8.95  49  81.42  49.16   180   44   185  11
Allen    M  9.22  38  89.02  49.87   178   55   180  12
Nancy    F  9.4   49  76.32  48.67   186   56   188  10
Patty    F  9.63  52  76.32  45.44   164   48   166  10
Suzanne  F  9.93  57  59.08  50.55   148   49   155   9
Teresa   F 10     51  77.91  46.67   162   48   168   9
Bob      M 10.07  40  75.07  45.31   185   62   185   9
Harriett F 10.08  49  73.37  50.39   168   67   168   9
Jane     F 10.13  44  73.03  50.54   168   45   168   9
Harold   M 10.25  48  91.63  46.77   162   48   164   9
Sammy    M 10.33  54  83.12  51.85   166   50   170   8
Buffy    F 10.47  52  73.71  45.79   186   59   188   8
Trent    M 10.5   52  82.78  47.47   170   53   172   8
Jackie   F 10.6   47  79.15  47.27   162   47   164   8
Ralph    M 10.85  43  81.19  49.09   162   64   170   7
Jack     M 10.95  51  69.63  40.84   168   57   172   7
Annie    F 11.08  51  67.25  45.12   172   48   172   7
Kate     F 11.12  45  66.45  44.75   176   51   176   7
Carl     M 11.17  54  79.38  46.08   156   62   165   7
Don      M 11.37  44  89.47  44.61   178   62   182   6
Effie    F 11.5   48  61.24  47.92   170   52   176   6
George   M 11.63  47  77.45  44.81   176   58   176   6
Iris     F 11.95  40  75.98  45.68   176   70   180   5
Mark     M 12.63  57  73.37  39.41   174   58   176   4
Steve    M 12.88  54  91.63  39.2    168   44   172   4
Vaughn   M 13.08  44  81.42  39.44   174   63   176   2
William  M 14.03  45  87.66  37.39   186   56   192   0
;
run;

/* DEMONSTRATION: RESIDUAL PLOTS FOR FITNESS DATA */

options ps=50 ls=97;
goptions reset=all fontres=presentation ftext=swissb htext=1.5;

proc reg data=fitness;
   PREDICT: model oxygen_consumption 
                  = runtime age run_pulse maximum_pulse;
   plot r.*(p. runtime age run_pulse maximum_pulse);/*plot residuals v predicted values*/
   plot student.*obs. / vref=3 2 -2 -3/*studentized obs. Gives obs. # to ID */
                        haxis=0 to 32 by 1;
   plot student.*nqq.; /*nqq another name for normal prob plot*/
   symbol v=dot;
   title 'PREDICT Model - Plots of Diagnostic Statistics';
run;
quit;


/* DEMONSTRATION: INFLUENTIAL OBSERVATIONS FITNESS DATA*/

goptions reset=all;
proc reg data=fitness;
   PREDICT: model oxygen_consumption
                      =runtime age run_pulse maximum_pulse
                      / r influence; /* r is residuals, influence is for the influence statistics*/
   id name; /* to allow us to identify the outlier*/
   output out=ck4outliers 
          rstudent=rstud  cookd=cooksd;
/* note we are creating an output data set of the outlier statistics so we can get SAS to look at them for us so we 
create and name a data set and label the output variables in which we are interested*/
title;
run;
quit;

/* MACRO FOR OUTLIERS */

/*  set the values of these macro variables, */
/*  based on your data and model.            */
%let numparms=5;  /* # of predictor variables + 1 */ 
%let numobs=31;   /* # of observations */
%let idvars=name; /* relevant identification variable(s) */

data influential;
   set ck4outliers; 
   cutcookd=4/&numobs;

   rstud_i=(abs(rstud)>3);
   cookd_i=(cooksd>cutcookd);
   sum_i=rstud_i + cookd_i;
   if sum_i > 0;
run;

/* then print out the list of influential observations */

proc print data=influential;
   var sum_i &idvars cooksd rstud cutcookd 
       cookd_i rstud_i;
   title 'Observations that Exceed Suggested Cutoffs';
run;


/* DEMONSTRATION: COLLINEARITY IN FITNESS DATA*/


proc reg data=fitness;
   FULLMODEL:
   model oxygen_consumption
                  = performance runtime age weight
                    run_pulse rest_pulse maximum_pulse
                  / vif ;/* collinearity diagnostic*/
   title 'Collinearity -- Full Model';
run;
quit;


/* remove performance */
proc reg data=fitness;
   NOPERF:
   model oxygen_consumption
                  = runtime age weight
                    run_pulse rest_pulse maximum_pulse
                  / vif;
   title 'Collinearity -- Performance Removed';
run;
quit;





/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/
/*LOGISTIC*/
/**********************************************************************
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
**********************************************************************/


/* The variable Low is used to determine whether the subject is a case (Low=1, low-birth-weight baby) or a control (Low=0, normal-weight baby). The dummy time variable Time takes the value 1 for cases and 2 for controls. */

/* from http://support.sas.com/documentation/cdl/en/statug/63033/HTML/default/viewer.htm#statug_phreg_sect036.htm */
data LBW;
         input id Age Low LWT Smoke HT UI @@;
         Time=2-Low;
         datalines;
       25  16   1   130   0  0  0    143  16   0   110   0  0  0
      166  16   0   112   0  0  0    167  16   0   135   1  0  0
      189  16   0   135   1  0  0    206  16   0   170   0  0  0
      216  16   0    95   0  0  0     37  17   1   130   1  0  1
       45  17   1   110   1  0  0     68  17   1   120   1  0  0
       71  17   1   120   0  0  0     83  17   1   142   0  1  0
       93  17   0   103   0  0  0    113  17   0   122   1  0  0
      116  17   0   113   0  0  0    117  17   0   113   0  0  0
      147  17   0   119   0  0  0    148  17   0   119   0  0  0
      180  17   0   120   1  0  0     49  18   1   148   0  0  0
       50  18   1   110   1  0  0     89  18   0   107   1  0  1
      100  18   0   100   1  0  0    101  18   0   100   1  0  0
      132  18   0    90   1  0  1    133  18   0    90   1  0  1
      168  18   0   229   0  0  0    205  18   0   120   1  0  0
      208  18   0   120   0  0  0     23  19   1    91   1  0  1
       33  19   1   102   0  0  0     34  19   1   112   1  0  1
       85  19   0   182   0  0  1     96  19   0    95   0  0  0
       97  19   0   150   0  0  0    124  19   0   138   1  0  0
      129  19   0   189   0  0  0    135  19   0   132   0  0  0
      142  19   0   115   0  0  0    181  19   0   105   0  0  0
      187  19   0   235   1  1  0    192  19   0   147   1  0  0
      193  19   0   147   1  0  0    197  19   0   184   1  1  0
      224  19   0   120   1  0  0     27  20   1   150   1  0  0
       31  20   1   125   0  0  1     40  20   1   120   1  0  0
       44  20   1    80   1  0  1     47  20   1   109   0  0  0
       51  20   1   121   1  0  1     60  20   1   122   1  0  0
       76  20   1   105   0  0  0     87  20   0   105   1  0  0
      104  20   0   120   0  0  1    146  20   0   103   0  0  0
      155  20   0   169   0  0  1    160  20   0   141   0  0  1
      172  20   0   121   1  0  0    177  20   0   127   0  0  0
      201  20   0   120   0  0  0    211  20   0   170   1  0  0
      217  20   0   158   0  0  0     20  21   1   165   1  1  0
       28  21   1   200   0  0  1     30  21   1   103   0  0  0
       52  21   1   100   0  0  0     84  21   1   130   1  1  0
       88  21   0   108   1  0  1     91  21   0   124   0  0  0
      128  21   0   185   1  0  0    131  21   0   160   0  0  0
      144  21   0   110   1  0  1    186  21   0   134   0  0  0
      219  21   0   115   0  0  0     42  22   1   130   1  0  1
       67  22   1   130   1  0  0     92  22   0   118   0  0  0
       98  22   0    95   0  1  0    137  22   0    85   1  0  0
      138  22   0   120   0  1  0    140  22   0   130   1  0  0
      161  22   0   158   0  0  0    162  22   0   112   1  0  0
      174  22   0   131   0  0  0    184  22   0   125   0  0  0
      204  22   0   169   0  0  0    220  22   0   129   0  0  0
       17  23   1    97   0  0  1     59  23   1   187   1  0  0
       63  23   1   120   0  0  0     69  23   1   110   1  0  0
       82  23   1    94   1  0  0    130  23   0   130   0  0  0
      139  23   0   128   0  0  0    149  23   0   119   0  0  0
      164  23   0   115   1  0  0    173  23   0   190   0  0  0
      179  23   0   123   0  0  0    182  23   0   130   0  0  0
      200  23   0   110   0  0  0     18  24   1   128   0  0  0
       19  24   1   132   0  1  0     29  24   1   155   1  0  0
       36  24   1   138   0  0  0     61  24   1   105   1  0  0
      118  24   0    90   1  0  0    136  24   0   115   0  0  0
      150  24   0   110   0  0  0    156  24   0   115   0  0  0
      185  24   0   133   0  0  0    196  24   0   110   0  0  0
      199  24   0   110   0  0  0    225  24   0   116   0  0  0
       13  25   1   105   0  1  0     15  25   1    85   0  0  1
       24  25   1   115   0  0  0     26  25   1    92   1  0  0
       32  25   1    89   0  0  0     46  25   1   105   0  0  0
      103  25   0   118   1  0  0    111  25   0   120   0  0  1
      120  25   0   155   0  0  0    121  25   0   125   0  0  0
      169  25   0   140   0  0  0    188  25   0    95   1  0  1
      202  25   0   241   0  1  0    215  25   0   120   0  0  0
      221  25   0   130   0  0  0     35  26   1   117   1  0  0
       54  26   1    96   0  0  0     75  26   1   154   0  1  0
       77  26   1   190   1  0  0     95  26   0   113   1  0  0
      115  26   0   168   1  0  0    154  26   0   133   1  0  0
      218  26   0   160   0  0  0     16  27   1   150   0  0  0
       43  27   1   130   0  0  1    125  27   0   124   1  0  0
        4  28   1   120   1  0  1     79  28   1    95   1  0  0
      105  28   0   120   1  0  0    109  28   0   120   0  0  0
      112  28   0   167   0  0  0    151  28   0   140   0  0  0
      159  28   0   250   1  0  0    212  28   0   134   0  0  0
      214  28   0   130   0  0  0     10  29   1   130   0  0  1
       94  29   0   123   1  0  0    114  29   0   150   0  0  0
      123  29   0   140   1  0  0    190  29   0   135   0  0  0
      191  29   0   154   0  0  0    209  29   0   130   1  0  0
       65  30   1   142   1  0  0     99  30   0   107   0  0  1
      141  30   0    95   1  0  0    145  30   0   153   0  0  0
      176  30   0   110   0  0  0    195  30   0   137   0  0  0
      203  30   0   112   0  0  0     56  31   1   102   1  0  0
      107  31   0   100   0  0  1    126  31   0   215   1  0  0
      163  31   0   150   1  0  0    222  31   0   120   0  0  0
       22  32   1   105   1  0  0    106  32   0   121   0  0  0
      134  32   0   132   0  0  0    170  32   0   134   1  0  0
      175  32   0   170   0  0  0    207  32   0   186   0  0  0
      ;
run;

PROC MEANS data=lbw;
CLASS smoke;
VAR low;
RUN;


PROC LOGISTIC DATA = lbw DESCENDING;
 	MODEL low = smoke / RL;
	RUN; 
	QUIT;