BookmarkSubscribeRSS Feed
tina_ting
Fluorite | Level 6

Hello SAS experts,

 

I am working on single-group, two-stage interrupted time series analysis and have written SAS code for both the continuous outcome (using PROC MIXED) and the binary outcome (using a Poisson model). I have included detailed comments in the code and have accounted for potential confounders. I have also attached the resulting plots and output tables, as well as a sample of the original dataset for reference.

 

Could you please review the PROC MIXED version and the Poisson version of the code, along with the generated graphs and results, and let me know if they are correct? I would greatly appreciate any suggestions for optimization.

 

Thank you very much for your help!

 

/*********************************************
Step 1: Single-group, two-stage ITS (continuous Y) — PROC MIXED version
   variables:
     - id             : Subject identifier
     - MO             : Month (1–12)
     - time 1           : (equal to MO)
     - Y              : Continuous outcome variable (each person’s annual observation)
     - intervention1  : Indicator for first intervention (0/1), equals 1 from MO=4 onward
     - intervention2  : Indicator for second intervention (0/1), equals 1 from MO=6 onward
     - city           : Binary indicator for location (0/1)
     - count_gr       : Outpatient visit count group (1/2/3)
*********************************************/

proc mixed data=ITS.COST method=ml;
  /* (1) Specify categorical variables */
  class id city count_gr;
  /* (2) Main model: time trend, two intervention effects and interactions, and covariates */
  model COST = /* change variable placeholder */
        time1
        intervention1 time1*intervention1
        intervention2 time1*intervention2
        /* Covariates */
        city count_gr
        / solution;
  /* (3) Specify repeated measures structure (multiple yearly observations per subject): unstructured covariance */
  repeated / subject=id type=un;

  /* (4) Save model results to its_model_continuous for subsequent PROC PLM scoring */
  store out=its_model_continuous;

  /* (5) Estimate statements: segmented regression estimates of slopes and level changes */
  estimate 'Pre slope' time1 1 / cl e;
  estimate 'Post1 slope' time1 1 time1*intervention1 1 / cl e;
  estimate 'Gap1 (1st level shift)' intervention1 1 time1*intervention1 1 / cl e;
  estimate 'Post2 slope' time1 1 time1*intervention1 1 time1*intervention2 1 / cl e;
  estimate 'Gap2 (2nd level shift)' intervention2 1 time1*intervention2 1 / cl e;
run;

/*********************************************
Step 2: Use PROC PLM to obtain predicted values and standard errors for each time point
*********************************************/
proc plm restore=its_model_continuous;
  score data=ITS.COST out=pred_panel_cont  
        predicted=pred_value
        stderr=StdErrPred;
run;

/*********************************************
Step 3: Aggregate average predicted values and 95% confidence intervals for each time point
*********************************************/
proc summary data=pred_panel_cont nway;
  class time1;
  var pred_value StdErrPred;
  output out=avg_pred_cont mean= / autoname;
run;

data avg_pred_cont;
  set avg_pred_cont;
  /* If after summary, variable names are pred_value_Mean and StdErrPred_Mean */
  pred = pred_value_Mean;
  se   = StdErrPred_Mean;
  lcl  = pred - 1.96 * se;
  ucl  = pred + 1.96 * se;
run;
/*********************************************
Step 4: Plot ITS predicted values (continuous Y → predicted values and 95% CI)
*********************************************/
ods graphics on / attrpriority=none;
proc sgplot data=avg_pred_cont;
  /* Confidence interval band */
  band x=time1 lower=lcl upper=ucl
       / fillattrs=(color=gray transparency=0.9)
         legendlabel="95% Confidence Interval";
  /* Predicted values line */
  series x=time1 y=pred
         / markers
           lineattrs=(thickness=2 color=blue)
           markerattrs=(symbol=circlefilled size=8 color=blue)
           legendlabel="Predicted Value";
  /* Add intervention point reference lines (first intervention time1=3, second intervention time1=5) */
  refline 3 / axis=x lineattrs=(pattern=shortdash color=black)
               label="First Intervention" labelattrs=(weight=bold size=9);
  refline 5 / axis=x lineattrs=(pattern=shortdash color=black)
               label="Second Intervention" labelattrs=(weight=bold size=9);
  xaxis label="Time (Month)"
        values=(1 to 12 by 1)   /* Force display of 1,2,...,12 */
        valuesdisplay=("1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12")
        integer;   /* Display only integer tick values */
  yaxis label="Total Cost";
  keylegend / position=bottom location=outside across=1;
run;

DatasetITS.COST

tina_ting_0-1749038102865.png

tina_ting_1-1749038113965.pngtina_ting_2-1749038117314.pngtina_ting_3-1749038119664.pngtina_ting_4-1749038128851.png

/*********************************************
Single-group two-stage ITS (binary Y) — Poisson model version
*********************************************/

/* Step 1: Aggregate → annual PIM_count, N_person */
proc sql;
  create table ITS.POLY_agg as
  select MO,
    /* Cumulative number of POLY events */
    sum(POLY) as POLY_count, 
    /* Total number of people in that year (distinct id) */
    count(distinct id) as N_person,
    /* 1. For binary categorical variables, use mean() to get proportion (0/1 only needs one variable) */
    mean(city)                         as prop_city,       /* Proportion of urban vs. non-urban */
    /* 4. Proportions for count_gr (outpatient visit count groups 1, 2, 3) */
    sum(case when count_gr = 1 then 1 else 0 end) / count(distinct id)    as prop_count1,
    sum(case when count_gr = 2 then 1 else 0 end) / count(distinct id)    as prop_count2,
    sum(case when count_gr = 3 then 1 else 0 end) / count(distinct id)    as prop_count3
  from VAR.POLY
  group by MO
  order by MO
  ;
quit;

/* Step 2: Add time, intervention indicators, and offset */
data ITS.POLY_poisson;
  set ITS.POLY_agg;
  time = MO;                    
  if MO >= 4 then intervention1 = 1;  else intervention1 = 0;
  if MO >= 6 then intervention2 = 1;  else intervention2 = 0;
  lnN = log(N_person);                     /* Poisson offset */
run;

/* Step 3: ITS model estimation */
proc genmod data=ITS.POLY_poisson;
  model POLY_count =
        time 
        intervention1 time*intervention1
        intervention2 time*intervention2
        /* Control covariates: binary categorical variables (0/1 feed in as proportions) */
        prop_city            /* Proportion of urban vs. non-urban (city=1) */
        /* Use prop_count1, prop_count2; count_gr=0 is reference */
        prop_count1          /* Proportion with count_gr=1 */
        prop_count2          /* Proportion with count_gr=2 */
        / dist=poisson link=log offset=lnN;

  /* ITS estimates for slopes/level changes in each stage (adjust as needed) */
  estimate 'PRE slope'                 time 1 / exp;
  estimate 'POST1 slope'              time 1 time*intervention1 1 / exp;
  estimate 'GAP1 (1st level shift)'   intervention1 1 / exp;
  estimate 'POST2 slope'              time 1 time*intervention1 1 time*intervention2 1 / exp;
  estimate 'GAP2 (2nd level shift)'   intervention2 1 / exp;
  output out=pred1 pred=predcount lower=lcl upper=ucl;
run;

/* Step 4a: Calculate “rate” in pred1, If pred1 already contains POLY_count and N_person, process as follows:*/
data pred1_rate;
  set pred1;
  /* Calculate rate = event count ÷ population */
  rate = POLY_count / N_person;
  format rate percent7.2;
run;
/* Step 4b: Plot “rate” line and scatter with SGPLOT */
proc sgplot data=pred1_rate noautolegend;
  /* (1) Rate line */
  series x=time y=rate
         / lineattrs=(thickness=2 color=blue)
           name="RateLine";
  /* (2) Rate scatter points + data labels */
  scatter x=time y=rate
          / markerattrs=(symbol=CircleFilled color=blue size=8)
            datalabel=rate
            datalabelattrs=(size=9 color=black)
            name="RatePts";
  /* (3) Intervention time points (first at time=4; second at time=6) */
  refline 4 6 / axis=x  lineattrs=(pattern=shortdash color=black);
  /* (4) Axis settings */
  xaxis label="Time (Year)"
        values=(1 to 12 by 1)
        valuesdisplay=("1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12")
        integer;
  yaxis label="POLY Rate";
  /* (5) Legend */
  keylegend "RatePts"  / title="Observed Rate (Points/Labels)" position=topright across=1;
  keylegend "RateLine" / title="Observed Rate (Line)"         position=topright across=1;
run;

DatasetVAR.POLY

tina_ting_5-1749038163824.png

DatasetITS.POLY_agg

tina_ting_6-1749038172499.png

DatasetITS.POLY_poisson

tina_ting_7-1749038182357.png

tina_ting_8-1749038188647.png

tina_ting_9-1749038194469.png

tina_ting_10-1749038199830.png

 

 

hackathon24-white-horiz.png

The 2025 SAS Hackathon Kicks Off on June 11!

Watch the live Hackathon Kickoff to get all the essential information about the SAS Hackathon—including how to join, how to participate, and expert tips for success.

YouTube LinkedIn

What is Bayesian Analysis?

Learn the difference between classical and Bayesian statistical approaches and see a few PROC examples to perform Bayesian analysis in this video.

Find more tutorials on the SAS Users YouTube channel.

SAS Training: Just a Click Away

 Ready to level-up your skills? Choose your own adventure.

Browse our catalog!

Discussion stats
  • 0 replies
  • 463 views
  • 0 likes
  • 1 in conversation