/* Graphical Exploratory Data Analysis for NELS n=23 data set */ OPTIONS nocenter nodate; */ You may need to run schoool23_data.sas and make changes to libnames and file names */ */ Plot response by homew for each school with regression line drawn in and display on template; data school23; set school23; homework=homew+1-1; * if race=5 then delete; hsq=homework*homework; if race=3 then black=1; else black=0; if race=1 then racec='Asian'; else if race=2 then racec='Hispanic'; else if race=3 then racec='Black'; else if race=4 then racec='White'; else racec=' American'; if race=4 then whitec='White'; else whitec='Not White'; if sex=1 then gendNativeer='Male'; else gender='Female'; run; proc freq data=school23; tables race black /nopercent norow nocol; run; /* Individual math by school mean centered SES */ proc sort data=school23; by school; proc means data=school23 mean ;*noprint; class school; var ses homework math; output out=meanses mean=gmeanses gmeanhome meanmath; run; data school23c; merge school23 meanses; by school; if school=. then delete; cses = ses - gmeanses; chomework = homework-gmeanhome; if gmeanhome<= 1.6 then grphome=1; else if gmeanhome>1.6 and gmeanhome<=2 then grphome=2; else if gmeanhome>2 then grphome=3; run; */ Quick check on whether I can fit a complex model ; proc mixed data=school23c method=ml covtest noclprint ic empirical; class race sex school public; model math= homework cSES race sex public race*cses gmeanses /solution ddfm=satterth; random intercept homework /subject=school type=un; title 'Test model '; run; /*************** Set Graphics Output Format ****************/ ods graphics / imagefmt=pdf; proc greplay nofs igout=ses1 gout=ses1 tc=mytemps; template t2x2; tplay 1:1 2:2 ; run; /*...........Here goes the exploratory analyses.....................*/ /************** Produce panel of within school regressions. This produces png files */ ods graphics / imagefmt=pdf; * Linear regression with confidence limits for mean; title 'Math by Homwork: Overall'; proc sgplot data=school23c; reg x=homework y=math / CLM ; run; * Spline with confidence limits for mean; title 'Math by Homwork: Overall (spline)'; proc sgplot data=school23c; PBSPLINE x=homework y=math / CLM ; run; * Panel plots of individual schools: Linear regression; title 'Math by Homework: Linear regression'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; reg x=homework y=math ; run; * Panel plots of individual schools: Cubic regression; title 'Math by Homework: Cubic'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; loess x=homework y=math / interpolation=cubic; run; * Panel plots of individual schools: Splines; title 'Math by Homework: Splines'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; pbspline x=homework y=math ; run; /* Ugly i=join plot */ goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext=2 ftext=swiss ctext=black target= gaccess= gsfmode= colors= csymbol= ; goptions device=WIN ctext=blue graphrc interpol=join; goptions device=win; symbol i=join value=none ; axis1 label=('Time Spent Doing Homework') order=0 to 7 minor=none; axis2 label=none minor=none; proc gplot data=school23 uniform gout=work.test1; plot MATH * HOMEWork=school/ haxis=axis1 vaxis=axis2 nolegend frame; title justify=center height=3 'Math by Homework'; run; * Plot means per school per homework ; proc sort data=school23 ; * Step 1: sort the data; by school homework; run; proc means data=school23 noprint; * Step 2: create data set with mean math by school & homework; by school homework; var math; output out=meansby mean=meanmath; run; * Joint means for each school; * Step 3: plot the means; title 'Math by Homework: Join Points'; proc sgplot data=meansby; series x=homework y=meanmath / group=school MARKERATTRS=(size=.00) ; keylegend 'none' ; run; * Linear regressions all in one; title 'Math by Homework: Linear Regressions'; proc sgplot data=school23; reg x=homework y=math / group=school MARKERATTRS=(size=.00) ; keylegend 'none' ; run; /******************** Race ***********************************/ * Join mean points for Math x Homeworek = white ; proc sort data=school23c; by whitec homework; run; proc means data=school23c noprint; by whitec homework; var math; output out=whitemean mean=meanmath; run; title 'Marginal Distribution: Means Joined'; proc sgplot data=whitemean; series x=homework y=meanmath / group=whitec MARKERATTRS=(size=.00) ; keylegend / location=inside position=topleft; run; title 'Marginal Distribution (regression): white'; proc sgplot data=school23; reg x=homework y=math / group=white MARKERATTRS=(size=.00); run; * All Races; proc sort data=school23c; by racec homework; run; proc means data=school23c noprint; by racec homework; var math; output out=racemeans mean=meanmath; run; * For all races: Joint means; title1 'Marginal Distribution (regression): race'; proc sgplot data=racemeans; series x=homework y=meanmath / group=racec MARKERATTRS=(size=.00) ; keylegend / location=inside position=topleft; run; * For all races; title1 'Marginal Distribution (regression): race'; proc sgplot data=school23; reg x=homework y=math / group=racec MARKERATTRS=(size=.00) ; keylegend / location=inside position=topleft; run; /********************** Gender **********************/ proc sort data=school23c; by gender homework; run; proc means data=school23c noprint; by gender homework; var math; output out=bygender mean=meanmath; run; title 'Marginal Distribution: gender'; proc sgplot data=bygender; series x=homework y=meanmath / group=gender markerattrs=(size=.00); yaxis min=30 max=70; keylegend / location=inside position=topleft; run; title 'Linear regression: gender'; proc sgplot data=school23c; reg x=homew y=math / group=gender markerattrs=(size=.00); yaxis min=30 max=70; keylegend / location=inside position=topleft; run; /**************************** SES *************************/ title 'School specific: math by SES'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; reg x=ses y=math ; run; title 'School specific (Splines): math by SES'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; pbspline x=ses y=math ; run; title 'School specific (Cubic): math by SES'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; loess x=homework y=math / interpolation=cubic; run; title 'SES: linear regressions'; proc sgplot data=school23c; reg x=ses y=math / group=school markerattrs=(size=.00); keylegend 'none'; run; /* Where to put cut-points so groups are about same size */ proc freq data=school23c; table ses / list; run; /* 10 percentiles, n~50 */ data school23g; set school23c; if ses< -1.10 then gses=1; else if ses>= -1.10 and ses< -0.80 then gses=2; else if ses>= -0.80 and ses< -0.52 then gses=3; else if ses>= -0.52 and ses< -0.30 then gses=4; else if ses>= -0.30 and ses< -0.12 then gses=5; else if ses>= -0.12 and ses< 0.15 then gses=6; else if ses>= 0.15 and ses< 0.55 then gses=7; else if ses>= 0.55 and ses< 0.85 then gses=8; else if ses>= 0.85 and ses< 1.22 then gses=9; else if ses>=1.22 then gses=10; run; title 'Check whether grouping on ses is OK'; proc freq data=school23g; tables gses; run; proc sort data=school23g; by gses; run; proc means data=school23g noprint; by gses; var ses math; output out=gmeanses mean=gmeanses meanmath; run; proc sort data=gmeanses; by gmeanses; run; title 'Marginal Distribution (grouped SES): Join'; proc sgplot data=gmeanses; series x=gmeanses y=meanmath ; yaxis min=30 max=70; run; title 'Marginal Distribution (grouped SES): Linear Reg.'; proc sgplot data=gmeanses; reg x=gmeanses y=meanmath; yaxis min=30 max=70; run; /******* Math x SES = sector */ proc sort data=school23g; by gses public; run; proc means data=school23g noprint; by gses public; var ses math; output out=gmeanses2 mean=gmeanses meanmath; run; title 'Marginal Distribution (grouped SES): Sector'; proc sgplot data=gmeanses2; series x=gmeanses y=meanmath / group=public; yaxis min=30 max=70; keylegend / location=inside position=topcenter; run; title 'Linear Regression: Math x SES = public'; proc sgplot data=gmeanses2; reg x=gmeanses y=meanmath / group=public; yaxis min=30 max=70; keylegend / location=inside position=topcenter; run; /******************* Group Mean Centered SES *******************/ title 'School specific: math by cSES'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; reg x=cses y=math ; run; title 'School specific (Splines): math by cSES'; proc sgpanel data=school23c; panelby school /columns=3 rows=4; pbspline x=cses y=math ; run; title 'SES: linear regressions'; proc sgplot data=school23c; reg x=cses y=math / group=school markerattrs=(size=.00); keylegend 'none'; run; /******************** Homework & Sector ***********************/ proc sort data=school23c; by homew public; run; proc means data=school23c noprint; by homew public; var math; output out=meanmath2 mean=meanmath; title 'Homework & Sector (Public) Effects on Math'; proc sgplot data=meanmath2; series x=homew y=meanmath / group=public; yaxis min=30 max=70; keylegend / location=inside position=topcenter; run; title 'OLS: Homework & Sector (Public) Effects on Math'; proc sgplot data=meanmath2; reg x=homew y=meanmath / group=public markerattrs=(size=.00); yaxis min=30 max=70; keylegend / location=inside position=topcenter; run; /* R2 and R2meta: Group Specific Exploring of Preliminary Fixed Effects Structure*/ * Preliminaries; proc sort data=school23reg; by school; * Get sample sizes for schools; proc means data=school23reg noprint; by school; var math; output out=nj n=nj; run; /* PROC REG doesn't let you enter interactions are, cses*white, so must create these in data step. These are ones that will be used in various models */ data school23reg; set school23c; male= sex-1; csesWhite = cses*white; homewmale= homew*male; homewWhite = homew*white; csesmale = cses*male; homewCses = homew*cses; whitemale = white*male; run; * SIMPLEST PRELIMINARY MODEL; * Regressions for each school and output Rsq, SS and MS to data sets; proc reg data=school23reg ; by school; model math = homew cses white sex csesWhite ; ods output ANOVA=ANOVA1 FitStatistics=Fitstat1; run; ** R^2 meta **************************; proc sort data=anova1; by source; proc means data=anova1 sum; by source; var SS; output out=sss sum=SS; run; * Merge in sample sizes and only keep Rsq from Fitstat; data Rsq1data; merge nj Fitstat1; by school; R2meta1= 20950.08/ 40329.40; * taken from means output; Rsq1 = input(cValue2,8.5); * cValue2 is character so this changes it to numeric; if Label2='R-Square' then output; keep nj Rsq1 R2meta1; run; * Now we can plot it all; goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext=2 ftext=swiss ctext= target= gaccess= gsfmode= ; goptions device=WIN ctext=blue graphrc interpol= colors= csymbol=; symbol1 value=dot interpol=none color=blue; symbol2 value=none interpol=rl line=1 color=red; axis1 color=black width=2.0 style=1 label=(height=2 "Group Sample Size"); axis2 color=black width=2.0 style=1 label=(height=2 angle=90 "R^2 for each group"); legend1 position=(top inside right) frame value=('Rsq' 'R2meta=.52') label=none; proc gplot data=Rsq1data; plot Rsq1*nj R2meta1*nj / overlay haxis=axis1 vaxis=axis2 legend=legend1; run; /******** Next most complext model ************/ proc reg data=school23reg ; by school; model math = homew cses white sex csesWhite homewmale homewwhite; ods output ANOVA=ANOVA2 FitStatistics=Fitstat2; run; ** R^2 meta **************************; proc sort data=anova2; by source; proc means data=anova2 sum; by source; var SS; output out=sss sum=SS; run; * Merge in sample sizes and only keep Rsq from Fitstat; data Rsq2data; merge nj Fitstat2; by school; R2meta2= 22221.76/ 40329.40; * taken from means output; Rsq2 = input(cValue2,8.5); * cValue2 is character so this changes it to numeric; if Label2='R-Square' then output; keep nj Rsq2 R2meta2; run; * Now we can plot it all (keep all goptions as is except legend); legend2 position=(top inside right) frame value=('Rsq' 'R2meta=.55') label=none; proc gplot data=Rsq2data; plot Rsq2*nj R2meta2*nj / overlay haxis=axis1 vaxis=axis2 legend=legend2; run; /********** All 2-way interactions************/ proc reg data=school23reg ; by school; model math = homew cses white sex csesWhite homewmale homewwhite csesmale whitemale homewCses; ods output ANOVA=ANOVA3 FitStatistics=Fitstat3; run; ** R^2 meta **************************; proc sort data=anova3; by source; proc means data=anova3 sum; by source; var SS; output out=sss sum=SS; run; * Merge in sample sizes and only keep Rsq from Fitstat; data Rsq3data; merge nj Fitstat3; by school; R2meta3= 24263.99/ 40329.40; * taken from means output; Rsq3 = input(cValue2,8.5); * cValue2 is character so this changes it to numeric; if Label2='R-Square' then output; keep nj Rsq3 R2meta3; run; * Now we can plot it all (keep all goptions as is except legend); legend3 position=(top inside right) frame value=('Rsq' 'R2meta=.60') label=none; proc gplot data=Rsq3data; plot Rsq3*nj R2meta3*nj / overlay haxis=axis1 vaxis=axis2 legend=legend3; run; /*********** Comparing R2 for different models *************/ data Rsqall; merge Rsq1data Rsq2data Rsq3data; run; goptions reset=(axis, legend, pattern, symbol, title, footnote) norotate hpos=0 vpos=0 htext=2 ftext=swiss ctext= target= gaccess= gsfmode= ; goptions device=WIN ctext=blue graphrc interpol= colors= csymbol=; symbol1 value=dot interpol=none color=blue; symbol2 value=none interpol=rl line=1 color=red; axis1 color=black width=2.0 style=1 label=(height=2 "R2 for Simplest Model") order=0 to 1 by .2; axis2 color=black width=2.0 style=1 label=(height=2 angle=90 "R For (Probable?) Preliminary Model") order=0 to 1 by .2; axis3 color=black width=2.0 style=1 label=(height=2 angle=90 "R For Model Complex Model") order=0 to 1 by .2; axis4 color=black width=2.0 style=1 label=(height=2 "R For (Probable?) Preliminary Model") order=0 to 1 by .2; proc gplot data=Rsqall; plot Rsq2*Rsq1 / overlay haxis=axis1 vaxis=axis2 ; run; proc gplot data=Rsqall; plot Rsq3*Rsq1 / overlay haxis=axis1 vaxis=axis3 ; run; proc gplot data=Rsqall; plot Rsq3*Rsq2 / overlay haxis=axis4 vaxis=axis3 ; run; /******************* End 2010 ********************************/