본문 바로가기

STATISTICS/SAS

Code _ Linear Regression

단순회귀

DATA repair;
    INPUT minutes units;
DATALINES;
23      1 
29      2 
49      3 
64      4 
74      4 
87      5 
96      6 
97      6  
109     7  
119     8  
149     9  
145     9  
154     10  
166     10
;
RUN;

PROC CORR;
    VAR minutes units;
RUN;

PROC REG;
    model minutes=units;

다중회귀

Model-Selection Methods

  • Full Model Fitted (NONE)
  • Forward Selection (FORWARD)
    • SLENTRY = level (default: 0.50)
  • Backward Elimination (BACKWARD)
    • SLSTAY = level (default: 0.10)
  • Stepwise (STEPWISE)
    • SLENTRY / SLSTAY = level
  • Maximum R2 Improvement (MAXR)
  • Minimum R2 (MINR) Improvement
  • R2 Selection (RSQUARE)
  • Adjusted R2 Selection (ADJRSQ)
  • Mallows’ Cp Selection (CP)
PROC REG DATA=performance ;
    MODEL y=x1 x2 x3 x4 x5;
RUN;

PROC REG DATAa=performance ;
    MODEL y=x1 x2 x3 x4 x5/
        SELECTION=adjrsq aic;
RUN;

PROC REG DATA=performance ;
    MODEL y=x1 x2 x3 x4 x5/
        SELECTION=cp best=10;   /* best : 상위 10개의 모형 선택 */
RUN;

등분산성

DATA Workers;
  SET Workers;
  logy = log(y);
  x2 = x**2;
RUN;

proc reg data = Workers;
    model logy = x x2;
run;

다중공선성

PROC REG DATA= EEO;
    MODEL achv = fam peer school/tol vif collin;
RUN;

영향력

PROC REG DATA = NYRivers;
    MODEL y = x1-x4 / influence;
RUN; 

ods graphics on;

PROC REG DATA = NYRivers
      PLOTS(LABEL)=(CooksD RStudentByLeverage DFFITS DFBETAS);
   id river;
   MODEL y = x1-x4;
RUN;

질적설명변수

S = 급료
X = 경력 연수
E = 교육수준 (1=고졸, 2=대졸, 3=대학원졸)
M = 관리(1=관리책임자, 0=기타)

E_i1 = 1 (i번째 개체가 고졸 ), 0 (그 외)
E_i2 = 1 (i번째 개체가 대졸 ), 0 (그 외)

DATA salary;
    SET salary;
    e1 = .;
    IF e = 1 THEN e1 = 1;
    ELSE e1 = 0;
    e2 = .;
    IF e = 2 THEN e2 = 1;
    ELSE e2 = 0;
RUN;

PROC FREQ DATA = salary;
    TABLES e e1 e2;
RUN;

DATA salary;
    SET salary;
    category = .;
    IF e = 1 and m = 0 THEN category = 1;
    IF e = 1 and m = 1 THEN category = 2;
    IF e = 2 and m = 0 THEN category = 3;
    IF e = 2 and m = 1 THEN category = 4;
    IF e = 3 and m = 0 THEN category = 5;
    IF e = 3 and m = 1 THEN category = 6;
RUN;

가법모형

PROC REG DATA = salary;   /* additive model */
    VAR category;
    MODEL s = x e1 e2 m;
    PLOT student.*x student.*category;
RUN;

DATA salary;
    SET salary;
    e1m = e1*m;
    e2m = e2*m;
RUN;

승법모형

DATA salary;
    SET salary;
    e1m = e1*m;
    e2m = e2*m;
RUN;

PROC REG DATA = salary;    /* multiplicative model */
    VAR category;
    MODEL s = x e1 e2 m e1m e2m;  
    PLOT student.*x student.*category;
RUN;

두 집단 비교

DATA employment;
    SET employment;
    racetest = race*test;
RUN;

PROC REG DATA = employment;
    MODEL jperf = test race racetest;
    PLOT student.*test;
RUN;

'STATISTICS > SAS' 카테고리의 다른 글

비교분석 코드 예제  (0) 2020.03.06
Plots  (0) 2020.03.05
SAS Basic  (0) 2020.03.05