OPTIONS LINESIZE=70;
DATA disease; /* create disease file */
INFILE 'c:\SASFiles\logistic_disease.dat';
INPUT id age socist sector disease savings;
IF sector = 2 THEN sector = 0; /* Make sector an indicator variable */
TITLE '1. Logistic Regression with sector';
PROC LOGISTIC DATA=disease DESCENDING;/* DESCENDING changes response profile to */
/* have value 1 as the first ordered value */
/* and 0 as the second ordered value so */
/* that the model predict the probability */
/* of getting value 1 */
MODEL disease = sector;
OUTPUT OUT=prob1 PREDICTED=phat;/* store data file and estimated probability in prob1 file */
RUN;
DATA disease2; /* create the disease2 file */
SET disease; /* open the disease file for processing */
SELECT(socist); /* set up two indicator variables for the categorical */
WHEN (1) DO; /* variable socist that has three categories */
soc1 = 1;
soc2 = 0;
END;
WHEN (2) DO;
soc1 = 0;
soc2 = 1;
END;
WHEN (3) DO;
soc1 = 0;
soc2 = 0;
END;
END;
TITLE '2. Logistic Regression with sector and socioeconomic status all categorical variables';
PROC LOGISTIC DATA=disease2 DESCENDING;
MODEL disease = sector soc1 soc2 / SCALE=NONE AGGREGATE;
/* SCALE=NONE AGGREGATE is used when explanatory variables are all categorical */
/* SCALE option produces goodness-of-fit statistics */
/* AGGREGATE option treat each unique combination of the explanatory variable */
/* values as a distinct group in computing the goodness-of-fit statistics */
/* No goodness of fit test when including interaction term SATURATED model */
socist: TEST soc1=0, soc2=0; /* a test for the significance of soc variable */
OUTPUT OUT=prob2 PREDICTED=phat;
RUN;
TITLE '3. Logistic Regression with sector and socioeconomic status and savings a continuous variable';
PROC LOGISTIC DATA=disease2 DESCENDING;
MODEL disease = sector soc1 soc2 savings/ SELECTION=FORWARD INCLUDE=3 DETAILS LACKFIT INFLUENCE IPLOTS;
/* LAKFIT produces Hosmer and Lemeshow goodness-of-fit test */
/* INCLUDE=3 requests that the first three terms listed in the MODEL statement */
/* are to be included in each fitted model */
/* DETAILS makes the Analysis of Variables Not in the Model table to be printed */
/* INFLULENCE requests regression diagnostics */
/* IPLOTS plots residuals versus observation index number to check unusual pattern or outliers*/
OUTPUT OUT=prob2 PREDICTED=phat;
RUN;
TITLE '4. Logistic Regression with CATMOD';
PROC CATMOD ORDER=DATA;
MODEL disease = sector socist;
/* This procedure set up the indicator varibles automatically */
/* The Likelihood ratio in the ML ANOVA output is goodness-of-fit test */
/* Not good for having continuous explanatory variable with many distinct values */
/* Not an efficient way for logistic regression estimation */
RUN;