In: Statistics and Probability
IN SAS:
a. Examine this SAS data set including the variable labels and attributes. Add a comment to your program that notes the sort order of the variables in this data set.
b. Create a data set that has one observation for each sixth grader.
Here, Fam_id = Family_id
Std_id = Student_id
Sch = School Code
DOB = Date of birth for all the grade students
DOB6th = Date of birth of the 6th grade student for a given Family_id
Agg_Diff = Age difference in years between 6th grade student and his/her siblings for a given Family_id
Output should look like this :
Fam_id Sdt_ID Sch Grade DOB DOB6th Age_Diff Y_sib O_sib 90021 103699 4th 07/24/2004 01/26/2002 2.49 90021 127945 RG 6th 01/26/2002 01/26/2002 0 2 0 90021 149229 2nd 10/28/2005 01/26/2002 3.75 90053 109831 RC 6th 08/27/2002 08/27/2002 0 1 1 90053 122779 5th 08/28/2003 08/27/2002 1 90053 124617 8th 05/07/2000 08/27/2002 -2.31 90097 145616 4th 06/06/2004 12/20/2001 2.46 90097 164264 RC 6th 12/20/2001 12/20/2001 0 1 0 90112 147688 7th 10/11/2000 02/23/2002 -1.37 90112 171989 9th 06/27/1999 02/23/2002 -2.66 90112 197925 RG 6th 02/23/2002 02/23/2002 0 0 2
where, Y_sib = number of younger siblings of the sixth grader
O_sib = number of older siblings of the sixth grader
DATA TEMP;
INPUT Fam_id Sdt_ID Sch $ Grade $ DOB DOB6th Age_Diff;
INFORMAT DOB DOB6th mmddyy8.;
FORMAT DOB DOB6th mmddyy10.;
CARDS;
90021 103699 . 4th 07/24/2004 01/26/2002 2.49
90021 127945 RG 6th 01/26/2002 01/26/2002 0
90021 149229 . 2nd 10/28/2005 01/26/2002 3.75
90053 109831 RC 6th 08/27/2002 08/27/2002 0
90053 122779 . 5th 08/28/2003 08/27/2002 1
90053 124617 . 8th 05/07/2000 08/27/2002 -2.31
90097 145616 . 4th 06/06/2004 12/20/2001 2.46
90097 164264 RC 6th 12/20/2001 12/20/2001 0
90112 147688 . 7th 10/11/2000 02/23/2002 -1.37
90112 171989 . 9th 06/27/1999 02/23/2002 -2.66
90112 197925 RG 6th 02/23/2002 02/23/2002 0
;
RUN;
PROC SQL;
CREATE TABLE TEMP_CAL AS
Select
COALESCE(A.Fam_id,B.Fam_id) as Fam_id, /*SAS Functions are also
applicable in PROC SQL queries including normal SQL
functions*/
CASE WHEN A.Old_Student_Count=. THEN 0
ELSE A.Old_Student_Count
END as Old_Student_Count,
CASE WHEN B.Young_Student_Count=. THEN 0
ELSE B.Young_Student_Count
END as Young_Student_Count
FROM
(
Select
Fam_id,
Count(Distinct Sdt_ID) as Old_Student_Count
FROM
TEMP
WHERE
Age_Diff<0
GROUP BY Fam_id
) A
FULL JOIN
(
Select
Fam_id,
Count(Distinct Sdt_ID) as Young_Student_Count
FROM
TEMP
WHERE
Age_Diff>0
GROUP BY Fam_id
) B
ON A.Fam_id=B.Fam_id
;
QUIT;
DATA TEMP_FINAL;
IF _n_=1 THEN DO;
IF 0 THEN SET TEMP_CAL;
Declare Hash h1(dataset:'TEMP_CAL');
h1.definekey('Fam_id');
h1.definedata('Young_Student_Count','Old_Student_Count');
h1.definedone();
END;
SET TEMP;
h1.find(key:Fam_id);
IF Grade NE '6th' THEN DO;
Young_Student_Count=.;
Old_Student_Count=.;
END;
RUN;
PROC PRINT DATA=TEMP_FINAL;
TITLE 'Final Table';
RUN;