两种SAS代码实现变量的缺失值频数及占比

标签:
sas数据分析数据分析培训数据分析师数据挖掘 |
sas对缺失值的统计,可得出缺失值的频数及占比。以下为详细程序代码:
data tmp;
infile datalines delimiter=",";
length var1 $8.;
length var2 8.;
length var3 $8.;
length var4 8.;
input var1 $ var2 var3 $ var4
@@;
datalines;
A,1,B,2.,3,C,.C,.,.,3
;
run;
如图得到下列数据集
http://www.cda.cn/uploadfile/image/20170331/20170331202904_49210.png
然后统计数据集中缺失值和占比,先为字符型和数值型先分别设定一个format,然后直接对变量进行频数统计,再做一下简单处理,就可得到理想结果。
代码如下
proc format;
value num_f . = "0"
low-high = "1" ;
value $char_f " " = "0"
other = "1" ;
run;
ods output onewayfreqs=tables;
proc freq data= tmp ;
tables _all_ / missing;
format _numeric_ num_f. _character_ $char_f.;
run;
ods output close;
数据集如下:
http://www.cda.cn/uploadfile/image/20170331/20170331202930_47294.png
data miss;
length variable $50;
set tables;
variable = scan(Table,2,"“");
value = max(of F_:);
if value = 0;
keep variable frequency percent;
label variable = "缺失变量名" frequency = "缺失频数" percent = %nrstr("%缺失占比");
run;
http://www.cda.cn/uploadfile/image/20170331/20170331202949_96736.png
以上为第一种方法;
下面用数组的方法进行实现。
data tmp11;
set tmp;
array arr1{*} _NUMERIC_ ;
array arr2{*} _CHARACTER_ ;
length variable $50;
do i = 1 to dim(arr1);
if missing(arr1(i)) then do;
variable =vname(arr1(i));
output;
end;
end;
do j = 1to dim(arr2);
if missing(arr2(j)) then do;
variable = vname(arr2(j));
output;
end;
end;
keep variable;
run;
proc sql noprint;
select count(*) into : N from tmp;
create table miss as
select variable label = "缺失变量名",count(*) as frequency label = "缺失频数",
input(compress(put(calculated frequency / &N.,percent10.2),"%"),best32.) as percent label = %nrstr("%缺失占比")数据分析师培训
from tmp11
group by variable;
quit;