SAS中的HASH语句及其常见应用
(2014-03-27 19:55:16)http://blog.csdn.net/yugao1986/article/details/6929892
首先,应用HASH有以下几点优点
• Key lookup occurs in memory, avoiding costly disk
access.
• When a key lookup occurs, only a small subset of the records are
searched.
• The key and data parts of a record can consist of more than one
value, removing the need to format and concatenate values to
construct the key and data parts.
• The hash object allocates memory as records are added. That is,
the hash object only allocates as much memory as it needs and the
number of records that can be stored is only limited by the amount
of memory available to SAS.
• When loading a hash object from a data set, the data set need not
be sorted or indexed.
下面介绍如何定义hash对象
The hash table methods are the functions it can perform, namely:
1.HASH OBJECT LOAD AND LOOKUP
-
data
participants; -
input
name $ gender:$1. treatment $; -
datalines;
-
John
M Placebo -
Ronald
M Drug-A -
Barbara
F Drug-B -
Alice
F Drug-A - ;
-
data
weight( drop=i); -
input
date:DATE9. @; - do
i = 1to 4; -
input
name $ weight @; -
output;
- end;
-
-
datalines;
-
05May2006
Barbara 125 Alice 130 Ronald 170 John 160 -
04Jun2006
Barbara 122 Alice 133 Ronald 168 John 155 -
;
-
data
results; -
length name treatment $ 8 gender $ 1; -
_N_if = 1then do; -
declare hash h(dataset:'participants'); -
h.defineKey('name'); -
h.defineData('gender', 'treatment'); -
h.defineDone(); -
end; -
set weight; -
if h.find() = 0 then output; - run;
-
proc
print data=results; -
format date DATE9.; -
var date name gender weight treatment; - run;
2.ADD, REPLACE, AND OUTPUT
-
data
goals; -
input
player $ when & $9.; -
datalines;
-
Hill
1st 01:24 -
Jones
1st 09:43 -
Santos
1st 12:45 -
Santos
2nd 00:42 -
Santos
2nd 03:46 -
Jones
2nd 11:15 - ;
-
data
_null_; -
length goals_list $ 64; -
_N_if = 1then do; -
declare hash h(); -
h.defineKey('player'); -
h.defineData('player', 'goals_list'); -
h.defineDone(); -
end; -
end=done;set goals -
if h.find() ^= 0 then do; -
goals_list= when; -
h.add(); -
end; -
else do; -
goals_list= trim(goals_list)|| ', ' || when; -
h.replace(); -
end; -
if done then h.output(dataset:'goal_summary'); -
run;
-
proc
print data=goal_summary; -
run;
HASH的应用
1.HITER: HASH ITERATOR OBJECT
-
data
sample ; -
input
k sat ; -
cards
; -
185
01 -
971
02 -
400
03 -
260
04 -
922
05 -
970
06 -
543
07 -
532
08 -
050
09 -
067
10 - ;
-
run
; -
-
data
_null_ ; -
if 0 then set sample ; -
dcl hash hh ( dataset: 'sample', hashexp: 8, ordered: 'a') ; -
dcl hiter hi ( 'hh' ) ; -
hh.DefineKey ( 'k' ) ; -
hh.DefineData ( 'sat' , 'k' ) ; -
hh.DefineDone () ; -
rcdo = hi.first() rcby 0 while ( = 0) ; -
kput = z3.+1 sat= z2.; -
rc= hi.next() ; -
end ; -
put 13 * '-' ; -
rcdo = hi.last() rcby 0 while ( = 0) ; -
kput = z3.+1 sat= z2.; -
rc= hi.prev() ; -
end ; -
stop ; -
run
; -
-
-
data
_null_ ; -
array a (-100000 : 100000) _temporary_ ; -
array b (-100000 : 100000) _temporary_ ; -
jdo = lbound(a) to hbound (a) ; -
a (j) = ceil ( ranuni (1) * 1e5 ) ; -
b (j) = j ; -
end ; -
length ka 8 sb 8 ; -
declare hash hh (hashexp: 0, ordered: ‘a’ ) ; -
declare hiter hi ( 'hh' ) ; -
hh.DefineKey ( 'ka' ) ; -
hh.DefineData ( 'ka' , 'sb' ) ; -
hh.DefineDone () ; -
jdo = lbound(a)to hbound(a) ; -
ka= a(j) ; -
if hh.check () = 0 then continue ; -
sb= b(j) ; -
n_unique ++ 1 ; -
hh.add () ; -
end ; -
*
sort ascending ; -
rc= hi.first() ; -
jdo = lbound(a) rcby 1 while ( = 0) ; -
a (j) = ka ; -
b (j) = sb ; -
rc= hi.next() ; -
end ; -
*
sort descending ; -
rc= hi.last(); -
jdo = lbound(a)by rc1 while ( = 0) ; -
a (j) = ka ; -
b (j) = sb ; -
rc= hi.prev(); -
end ; -
stop ; -
run
;
2.Summarizing Without Summary
-
data
input ; -
k1do = 1e6to 1 by -1 ; -
k2= put(k1, z7.) ; -
numdo = 1to ceil (ranuni(1) * 6) ; -
output ; -
end ; -
end ; -
run
; -
-
proc
summary data= inputnway ; -
class k1 k2 ; -
var num ; -
outoutput = summ_sum( drop= _:)sum = sum; -
run
; -
-
data
_null_ ; -
if 0 then set input ; -
dcl hash hh (hashexp:16) ; -
hh.definekey ('k1', 'k2' ) ; -
hh.definedata ('k1', 'k2', 'sum') ; -
hh.definedone () ; -
do until (eof) ; -
endset input = eof; -
sumif hh.find () ne 0 then = 0; -
sum ++ num ; -
hh.replace () ; -
end ; -
rc= hh.output(dataset: 'hash_sum') ; -
run
;
3.SPLITTING A SAS FILE DYNAMICALLY USING THE .OUTPUT() METHOD
-
-
data
sample ; -
input
id transid amt ; -
cards
; -
1
11 40 -
1
11 26 -
1
12 97 -
2
13 5 -
2
13 7 -
2
14 22 -
3
14 1 -
4
15 43 -
4
15 81 -
5
11 86 -
5
11 85 - ;
-
run
; -
-
proc
sql noprint ; -
select distinct 'OUT' || put (id, best.-l) -
into : dslist -
separated by ' ' -
from sample ; -
select 'WHEN (' || put (id, best.-l) || ') OUTPUT OUT' || put (id, best.-l) -
into : whenlist -
separated by ';' -
from sample; -
quit
; -
proc
sort data= sample; -
by id transid amt ; -
run
; -
data
&dslist ; -
set sample ; -
select ( id ) ; -
&whenlist ; -
otherwise ; -
end; -
run
; -
-
data
_null_ ; -
dcl hash hid (ordered: 'a') ; -
hid.definekey ('id', 'transid', 'amt', '_n_') ; -
hid.definedata ('id', 'transid', 'amt' ) ; -
hid.definedone ( ) ; -
_n_do = 1by 1 until ( last.id ) ; -
set sample ; -
by id ; -
hid.add() ; -
end ; -
hid.output (dataset: 'OUT' || put (id, best.-l)) ; -
run
; -
-
-
data
sample ; -
input
id transid amt ; -
cards
; -
5
11 86 -
2
14 22 -
1
12 97 -
3
14 1 -
4
15 43 -
2
13 5 -
2
13 7 -
1
11 40 -
4
15 81 -
5
11 85 -
1
11 26 - ;
-
run
; -
data
_null_ ; -
dcl hash hoh (ordered: 'a') ; -
dcl hiter hih ('hoh' ) ; -
hoh.definekey ('id' ) ; -
hoh.definedata ('id', 'hh' ) ; -
hoh.definedone () ; -
dcl hash hh () ; -
_n_do = 1by 1 until ( eof ) ; -
endset sample = eof; -
if hoh.find () ne 0 then do ; -
hh= _new_hash (ordered: 'a') ; -
hh.definekey ('id','transid', '_n_') ; -
hh.definedata ('id','transid', 'amt') ; -
hh.definedone () ; -
hoh.replace () ; -
end ; -
hh.replace() ; -
end ; -
rcdo = hih.next() rcby 0 while ( = 0) ; -
hh.output (dataset: 'out'|| put (id, best.-L)) ; -
rc= hih.next(); -
end ; -
stop ; -
run
;
声明:文章主要摘录《Hash Component Objects:Dynamic Data Storage and Table Look-Up》和《Data Step Hash Objects as Programming Tools》
相关文献:
《How to Implement the SAS® DATA Step Hash Object》
《An Introduction to SAS® Hash Programming Techniques》
《Getting Started with the DATA Step Hash Object》

加载中…