supersasmacro
• 博客等级：
• 博客积分：0
• 博客访问：513,864
• 关注人气：503
• 获赠金笔：0支
• 赠出金笔：0支
• 荣誉徽章：

## sas缺失值missing data详解

(2009-08-10 22:14:16)

### 教育

sas缺失值missing data详解

1 SAS的缺失值

SAS的缺失值分为两类，一类是数值型的缺失值，用(.)表示，另一类是字符型的缺失值，用(’’)或者(’ ‘)表示。例：

data miss1;

input charmiss \$ 1 nummiss 3;

cards;

A 1

3

D 4

;

proc print;

run;

Obs    charmiss    nummiss

1

.

3

4

data miss2;

input charmiss \$ 1 nummiss 3-4;

missing n a r _;

cards;

A -1

A .

B r

3

D 0

a

E 6

F n

G _

;

proc print;

run;

Obs    charmiss    nummiss

-1

.

R

3

0

A

6

N

_

proc format;

value spec .='Missing'

._='Illegible'

.R='Refused'

.N='Not Done'

.A='Absent';

run;

proc print data=miss2;

var charmiss nummiss;

format nummiss spec.;

run;

Obs    charmiss     nummiss

-1

Missing

Refused

3

D                0

Absent

6

Not Done

Illegible

2 缺失值的引用

proc sort data=miss2 out=miss3;

by nummiss;

run;

proc print data=miss3;

run;

Obs    charmiss    nummiss

_

.

A

N

R

-1

0

3

6

proc sort data=miss2(where=(nummiss ne .)) out=miss3;

by nummiss;

run;

proc print data=miss3;

run;

Obs    charmiss    nummiss

_

A

N

R

-1

0

3

6

proc sort data=miss2(where=(nummiss not in (. ._ .r .a .n))) out=miss3;

by nummiss;

run;

proc print data=miss3;

run;

Obs    charmiss    nummiss

-1

0

3

6

proc sort data=miss2(where=(nummiss le .z))  out=miss3;

by nummiss;

run;

Obs    charmiss    nummiss

_

.

A

N

R

proc sort data=miss2(where=(nummiss gt .z))  out=miss3;

by nummiss;

run;

Obs    charmiss    nummiss

-1

0

3

6

proc sort data=miss2(where=(charmiss ne ''))  out=miss3;

by nummiss;

run;

Obs    charmiss    nummiss

_

.

N

R

-1

0

6

proc sort data=miss2(where=(nummiss is not missing)) out=miss3;

by nummiss;

run;

proc sort data=miss2(where=(nummiss is not null)) out=miss3;

by nummiss;

Obs    charmiss    nummiss

-1

0

3

6

3 MISSING和NMISS函数介绍

MISSING：可用于字符型和数字型变量，当变量为空时，返回1，当变量不为空时，返回0。特定的缺失字符如A N R _也为空处理。例如：

proc sort data=miss2(where=(missing (nummiss)))  out=miss3;

by nummiss;

run;

Obs    charmiss    nummiss

_

.

A

N

R

NMISS：只用于数字型变量，返回一组变量的值中缺失值个数。例：

data test;

n=_n_;

input score1 - score4;

cards;

3

2

2

.

.

1

1

;

run;

data countmiss;

set test;

miss_c=nmiss(of score1-score4);

run;

proc print;

run;

Obs      score1    score2    score3    score4    miss_c

2

1

0

3

4

0

1

data countmiss;

set test;

by n;

miss_c=sum(missing(score1),missing(score2),missing(score3),missing(score4));

run;

data test2;

set countmiss;

by n;

if miss_c=0 then total=sum(of score1-score4);

else if miss_c lt 4 then flag=1;

else if miss_c=4 then flag=2;

run;

Obs      score1    score2    score3    score4    miss_c    total    flag

1

1               1

.

1

2

.

1

4 merge和update缺失值数据

UPDATE：用新数据集的数据更新主表数据

MERGE：将两个数据集合并为一个数据集。

data MISSDT1;

input visit labdate \$20.;

cards;

01JAN2006

02JAN2006

03JAN2006

04JAN2006

05JAN2006

;

run;

data MISSDT2;

input visit labdate \$20.;

cards;

01FEB2006

04FEB2006

05FEB2006

;

run;

data merged;

merge missdt1

missdt2;

by visit;

run;

Obs    visit     labdate

01FEB2006

.

.

04FEB2006

05FEB2006

data updated;

update missdt1

missdt2;

by visit;

run;

Obs    visit     labdate

01FEB2006

02JAN2006

03JAN2006

04FEB2006

05FEB2006

data updated;

update missdt1

missdt2 UPDATEMODE=  NOMISSINGCHECK;

by visit ;

run;

5 SAS中缺失值处理

%macro missing(data);

data &data;

set &data;

array TEMP _numeric_;

do over TEMP;

if missing(TEMP) then TEMP=0;

end;

run;

data &data;

set &data;

array TEMP _character_;

do over TEMP;

if missing(TEMP) then TEMP=0;

end;

run;

%mend;

%missing(cx);

data AnalysisData;

set RawData;

array score {5} score1-score5;

do Treament=1 to 5;

if missing(score[Treament]) then delete;

end;

run;

data AnaylsisData;

set Rawdata;

if nmiss(of score1-score5)>2 then delete;

run;

6 缺失值处理要注意的地方

data _null_ ;

a=.

b=0

c=-7

d=99

put '缺失值与非缺失值相加,结果为缺失值 ' add= ;

sum=sum(a,b,c,d) ;

put '缺失值与非缺失值用sum函数,结果为非缺失值之和:' sum= ;

summiss=sum(.,a) ;

put '缺失值求sum函数,结果为缺失值 ' summiss= ;

sumzero=sum(0,.,a) ;

put '0和缺失值求sum函数,结果为0 ' sumzero= ;

*查看缺失值与0的大小关系;

if a<0 then

put '缺失值小于0' ;

else if a>0 then

put '缺失值大于0' ;

run

0和缺失值求sum函数,结果为0 sumzero=0

MISSING! - Understanding and Making the Most of Missing Data

http://www2.sas.com/proceedings/sugi31/025-31.pdf

Tools of Miss-Calculation: Managing Missing Values with SAS

http://www2.sas.com/proceedings/forum2008/082-2008.pdf

Chapter 1 A Collection of Useful Tips

http://www.sas.com/service/doc/pubcat/chaps/55513.pdf

0