본문 바로가기

STATISTICS/SAS

비교분석 코드 예제

  1. 가설(귀무가설, 대립가설) 세운다.
  2. 유의수준 $\alpha$ 정한다.
  3. 검정통계량 결정
    $ t = \frac{\bar{X}-\mu_0}{S/\sqrt{n}} \sim t_{n-1}$
  4. 관측된 자료에 대한 p값 계산
  5. p와 $\alpha$ 값 비교로 귀무가설 기각 여부 확인

모수적 검정

일표본 t 검정

TITLE "One sample t-test for rent around KU";

DATA kurent;
    INPUT rent @@;
CARDS;
35 45 40 37 38 42 44 42 38 40 36 44 39 41 36
;
RUN;

PROC TTEST DATA = kurent H0=38;     /*H0 대신 MU0 가능, default=0 */
    VAR rent;
RUN;

모비율 검정

LIBNAME ex 'c:\work\mysasdat';
PROC FREQ DATA = ex.ibuprofen;
TABLES side_effect / 
BINOMIAL(level= ‘1’ p=0.05) alpha =.1;
RUN;

독립 이표본 t-검정

TITLE "Two sample t-test to compare mpg between American and Japanese cars";
PROC TTEST DATA=ex.cars;
    WHERE origin = 1 or origin = 3;
    CLASS origin;
    VAR mpg;
RUN;

두 모비율 검정

DATA binge;
    INPUT gender $ drinker $ count;
    DATALINES;
men yes 1630
men no 5550
women yes 1684
women no 8232
;
RUN;

PROC FREQ DATA=binge;
    TABLES gender*drinker / NOPERCENT NOROW NOCOL CHISQ;
    WEIGHT count;
RUN; 

대응짝 t-검정

두 변수의 차이를 이용, 정규성 가정

PROC TTEST DATA = ex.body;
    PAIRED height*armlet;
RUN;

비모수적 검정

부호검정 부호순위검정

PROC UNIVARIATE DATA=kurent NORMAL PLOT MU0=38;
    VAR rent;
RUN;

윌콕슨 순위합검정

TITLE 'Rank Sum Test';
DATA body3; 
    INPUT gender $ wrist @@;
CARDS;
M 16.5 M 17 M 16.9 M 16.6 M 18.0
F 13.5 F 15.0 F 14.0 F 14.5 F 14.5 F 13.9
;
RUN;

PROC NPAR1WAY DATA=body3 WILCOXON;
    EXACT Wilcoxon;
    CLASS gender; VAR wrist;
RUN; 

연관성 분석

상관성 분석

TITLE "Correlation Analysis";
DATA student; 
    INPUT age income expense @@; 
CARDS;
25 170 67 28 177 62 20 165 53 16 150 48 
19 160 58 21 160 59 22 173 60 16 169 57
20 169 70 19 170 71 20 179 63 26 180 75
23 174 82 16 179 60 25 189 82 17 169 74 
30 180 77
;
RUN;

/*
PROC GPLOT DATA=student;
    PLOT age*(income expense) income*expense;
RUN; QUIT;
*/

PROC CORR DATA=student;
    VAR age income expense;
RUN;

PROC CORR DATA=student PEARSON SPEARMAN KENDALL;
    VAR age income;
RUN;

카이제곱 검정

DATA soft_drink;
    DO age='20대', '30대', '40대';
        DO beverage='coke', 'pepsi', 'fanta', 'others';
            INPUT count @@;
            OUTPUT;
        END;
    END;
CARDS;
10 14 24 12 13 9 0 8 12 8 0 10
; 
RUN;

PROC FREQ DATA=soft_drink;
    TABLES age*beverage/CHISQ NOCOL NOPERCENT EXPECTED FISHER;  /* FISHER: 2x2에서는 자동으로 나오지만 범주 개수가 늘어나면 추가 */
    WEIGHT count;
RUN;

분산분석

일원분류분산분석

DATA clover;
    INPUT strain $ nitrogen @@;
CARDS;
A 19.4 A 32.6 A 27.0 A 32.1 A 33.0
B 17.7 B 24.8 B 27.9 B 25.2 B 24.3
C 17.0 C 19.4 C 9.1 C 11.9 C 15.8
D 20.7 D 21.0 D 20.5 D 18.8 D 18.6
E 14.3 E 14.4 E 11.8 E 11.6 E 14.2
F 17.3 F 19.4 F 19.1 F 16.9 F 20.8
;
RUN;

PROC GLM DATA=clover;
    CLASS strain;
    MODEL nitrogen=strain;
    MEANS strain / DUNCAN TUKEY ALPHA=0.10;
RUN; 

이원분류분산분석

TITLE "Two-way ANOVA";
DATA sales;
    DO city= 'Large', 'Middle', 'Small';
        Do design='A','B','C';
            Do rep=1,2,3;
                INPUT sales @@;
                OUTPUT;
            END;
        END;
    END;
CARDS;
23 20 21 22 19 20 19 18 21
22 20 19 24 25 22 20 19 22
18 18 16 21 23 20 20 22 24
;
RUN;

PROC ANOVA DATA=sales;
    CLASS city design;
    MODEL sales=city design city*design;
    MEANS city design city*design;
RUN; 

'STATISTICS > SAS' 카테고리의 다른 글

Code _ Linear Regression  (0) 2020.03.09
Plots  (0) 2020.03.05
SAS Basic  (0) 2020.03.05