/************************************************************************************; * This set of marco can be used to compute Range of Influence Statistics as described in Fay, M.P. (2002). Measuring a binary responses range of influence in logistic regression. American Statistican, 56, 5-9. Written by Carolyn Anderson Last Modified 2015 Usage notes: - The response variable should equal 0/1 - Basic description of the marco and how to use it: %macro RangeOfInfluence(indata=, rangedata=, samplesize=, obs=, Y=, linpred=, discrete=); where: indata = the data set that is being analyzed rangedata= data set that is output that included parameter estimates and loglike for the unaltered data set and changing the response of each of the lines of data (one at a time). This data set include the range statistics for the loglikelihood and the intercept, but not for the betas (because I don't know how many or what the names are of the corresponding variables). The deltas (range of influence statistics) can be computed for betas in a data step after running the marco....see examples. There variables will always be in this data set: NAME Description --------------------------------------------------------------------------------------- obs_changed = which observed response was changed for the corresponding line of data. _STATUS_ = will indicate whether model converged or not when response changes for obs_changed. Intercept = estimated value of the intercept with response for obs_changed switched. ........... The next variables will be estimated parameters for those in your model. See examples below for how to computed range of influence statistics for these. ............ total_ll = log-likelihood of unaltered data. total_intercept = the estimated intercept for the model fit to the unaltered data. delta_ll = range of influence statistics for the loglikelihood. delta_intercept = range of influence statistics for the intercept. samplesize=number of observations obs = name of the identification variable (like an id). Macro assumes this equals 1 - sample size Y = mame of the response variable linpred = list of explanatory variables in the model discrete = list of variables in linpred that are discrete and should be included in the class statement of proc logistic (i.e., program will create dummy variables for you). ******************************************************************************************* Examples: Three examples are included to illustrate use for different number and types of explanatory variables; */ *************** Macros used in Range Of Influence Main macro *************; ****** change: Change the reponse variable; %macro change(indata=,outdata=,obschanged=,obs=); data &outdata; set &indata; if &obs=&obschanged then &Y=1-&Y; run; %mend; ******** addObs: Marco adds new variable to indicate which obs was dropped; %macro addObs(inout=,obschanged=); data &inout; set &inout; obs_changed=&obschanged; run; %mend; ******* outpudate: Marco up-dates the range of influence data set; %macro outupdate(inout=,data_added=); data &inout; set &inout &data_added; run; %mend; /*************************************************************************** Main MACRO: RangeOfIfluence ***************************************************************************/ %macro RangeOfInfluence(indata=, rangedata=, obs=, Y=, linpred=, discrete=); /************************************* getting samplesize from logistic doesn't work yet *****************/ * Model fit to unaltered data; proc logistic data=&indata descending outest=all_data ; class &discrete; model &Y = &linpred; ods output NObs=samplesize; /*** Need to make samplesize global ***/ data Ncheck; set samplesize; * SumFreqsUsed; call symput('n',SumFreqsUsed); if Label="Number of Observations Read" then delete; * Expand all_data so that can computed deltas; data all_expanded; merge all_data Ncheck; do i=1 to (&n+1); * do i=1 to (&samplesize+1); total_int =intercept; total_ll =_LNLIKE_; output; end; keep total_int total_ll; * Some more set-up; data &rangedata; obs_changed=0; run; %let loop=1; ****** Loop through; %do %until (&loop>&n); %change(indata=&indata,outdata=now,obschanged=&loop,obs=id); proc logistic data=now descending outest=tmp noprint; class &discrete; model &Y = &linpred; %addObs(inout=tmp,obschanged=&loop); %outupdate(inout=&rangedata,data_added=tmp); %let loop = %eval(&loop +1); run; %end; *********End Loop; *Compute deltas for intercept and loglike....need to do manually for betas; data &rangedata; merge &rangedata all_expanded; if obs_changed < 1 then delete; delta_intercept = intercept - total_int; delta_ll = _LNLIKE_ - total_ll; drop _TYPE_ _LINK_ _TYPE_ _NAME_; %mend; ** End macro defintion **********************************************************************; *Example Usage; options ls=80 nocenter nodate; data esr; title 'ESR Data'; input id fibrin globulin response @@; n=1; datalines; 1 2.52 38 0 2 2.56 31 0 3 2.19 33 0 4 2.18 31 0 5 3.41 37 0 6 2.46 36 0 7 3.22 38 0 8 2.21 37 0 9 3.15 39 0 10 2.60 41 0 11 2.29 36 0 12 2.35 29 0 13 5.06 37 1 14 3.34 32 1 15 2.38 37 1 16 3.15 36 0 17 3.53 46 1 18 2.68 34 0 19 2.60 38 0 20 2.23 37 0 21 2.88 30 0 22 2.65 46 0 23 2.09 44 1 24 2.28 36 0 25 2.67 39 0 26 2.29 31 0 27 2.15 31 0 28 2.54 28 0 29 3.93 32 1 30 3.34 30 0 31 2.99 36 0 32 3.32 35 0 ; proc logistic data=esr; model response = fibrin globulin / lackfit; run; ***************************************************************************** * Example 1: One numerical explainatory variable *****************************************************************************; title 'Example 1: One numerical explainatory variable'; %RangeOfInfluence(indata=esr, rangedata=range, obs=id, Y=response, linpred=fibrin); run; ***** Computing range of influence statistics for regression parameters ; data rangestat; set range; delta_b = fibrin - ( 2.0995); *<--- put in values from model w/o changes. This will be in the output; run; * Printing out range of influence statistics ordered by, in this case regression coefficient; proc sort; by delta_b; proc print; var obs_changed delta_intercept delta_b delta_ll; run; * Plot of range of influence statistics for intercept by coefficient; goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext=2.5 ftext=swiss ctext= target= gaccess= gsfmode= ; goptions device=WIN ctext=blue graphrc interpol=join; axis1 color=black width=2.0 label=('ROI for Slope of Fibrin') ; axis2 color=black width=2.0 label=(angle=90'ROI for Intercept') ; symbol value=circle height=1.5 color=blue i=none; proc gplot data=WORK.rangestat; plot delta_intercept * delta_b / href=0 vref=0 haxis=axis1 vaxis=axis2 frame ; run; ***************************************************************************** * Example 2: Two numerical explainatory variables *****************************************************************************; title 'Example 2: Two numerical explainatory variables'; %RangeOfInfluence(indata=esr, rangedata=range2, samplesize=32, obs=id, Y=response, linpred=fibrin globulin ); run; data rangestat2; set range2; delta_bf = fibrin - (1.9104 ); *<--- put in values from model w/o changes; delta_bg = globulin - (0.1558 ); *<--- ; run; proc sort data=rangestat2; by delta_bf; proc print data=rangestat2; var obs_changed delta_intercept delta_ll delta_bf delta_bg; run; ***************************************************************************** *Example 3: with a discrete variable and a numerical explanatory variable Arbitary values were added to create a discrete variable. *****************************************************************************; options ls=80 nocenter nodate; data esr2; title 'ESR Data'; input id fibrin globulin response egdiscrete @@; n=1; datalines; 1 2.52 38 0 1 2 2.56 31 0 2 3 2.19 33 0 3 4 2.18 31 0 1 5 3.41 37 0 1 6 2.46 36 0 2 7 3.22 38 0 3 8 2.21 37 0 2 9 3.15 39 0 1 10 2.60 41 0 2 11 2.29 36 0 3 12 2.35 29 0 3 13 5.06 37 1 1 14 3.34 32 1 2 15 2.38 37 1 3 16 3.15 36 0 1 17 3.53 46 1 1 18 2.68 34 0 2 19 2.60 38 0 3 20 2.23 37 0 2 21 2.88 30 0 1 22 2.65 46 0 2 23 2.09 44 1 3 24 2.28 36 0 3 25 2.67 39 0 1 26 2.29 31 0 2 27 2.15 31 0 3 28 2.54 28 0 1 29 3.93 32 1 1 30 3.34 30 0 2 31 2.99 36 0 3 32 3.32 35 0 2 ; title 'Example 3: with a discrete variable and a numerical explanatory variable'; %RangeOfInfluence(indata=esr2, rangedata=range3, samplesize=32, obs=id, Y=response, linpred=fibrin egdiscrete, discrete=egdiscrete); run; data rangestat3; set range3; delta_bf = fibrin - (2.3338 ); delta_eg1 = egdiscrete1 - (-0.3810); delta_eg2 = egdiscrete2 - (-0.6579); run; proc print data=rangestat3; var obs_changed delta_intercept delta_ll delta_bf delta_eg1 delta_eg2; run;