在hadoop&hive编程中15个实用perl脚本
(2014-12-25 15:15:40)
标签:
股票 |
分类: 数据处理 |
##获取系统时间
##获取N天前日期
##获取月末日期
##获取月初日期
##获取上月末日期
##获取上月初日期
##两个日期之差
##建立hive表
##truncate hive表
##运行hadoop命令
##运行hive命令
##运行hive命令并返回命令执行结果
##设置hive命令运行基础header
##设置hive压缩参数
##清理hdfs
#!/usr/bin/perl -w
#——————————————————-
# author wangli.lee
# create 2012-08-01
# type youni
#——————————————————-
package
cmn;
use strict;
use warnings;
use POSIX;
##获取系统时间
sub getSystemTime
{
my @time=(localtime)[5,4,3,2,1,0];
$time[0]+=1900;
$time[1]+=1;
my $nowtime=sprintf(“u-u-u u:u:u”,@time);
print(“${nowtime}\n”);
return ${nowtime}
}
##获取昨天
sub getOffsetDay {
my ($date,$offset) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start
-86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,$day;
return $time_start; }
##获取月末
sub getMonthEnd {
my ($date) = @_; ##yyyy-mm-dd
my $time_start = ’1900-01-01′;
my ($year, $month, $day) = split ‘-’, $date;
my
$start_com = mktime(0,0,0,28,$month-1,$year-1900);
my ($day_com,$month_com,$year_com)=(localtime($start_com +
86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf “M-d-d”,$year,$month,28; return $time_start;}
$start_com =
mktime(0,0,0,29,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com +
86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf
“M-d-d”,$year,$month,29; return $time_start;}
$start_com =
mktime(0,0,0,30,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com +
86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf
“M-d-d”,$year,$month,30; return $time_start;}
$start_com =
mktime(0,0,0,31,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com +
86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf
“M-d-d”,$year,$month,31; return $time_start;}
return $time_start; }
##获取月初
sub getMonthBegin {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day – 1;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start
-86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,1;
return $time_start; }
##获取上月末
sub getLastMonthEnd {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start
-86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,$day;
return $time_start; }
##获取上月初
sub getLastMonthBegin {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start
-86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,1;
return $time_start; }
##两个日期之差
sub getDateMinus {
my ($startdate,$run_date) = @_; ##yyyy-mm-dd
my ($year,
$month, $day) = split ‘-’, $startdate;
my $startdate_time = mktime(0,0,0,$day,$month-1,$year-1900);
($year,
$month, $day) = split ‘-’, $run_date;
my $rundate_time = mktime(0,0,0,$day,$month-1,$year-1900);
my $day_mius = ($rundate_time – $startdate_time)/86400;
return $day_mius; }
##建立hive表
sub createTable
{
my ($tn,$col,$part) = @_;
my $c = join(‘ string,’, @$col).” string”;
my $p = join(‘ string,’, @$part).” string”;
#print
$c.”\n”;
#print $p.”\n”;
my $table_desc = qq(
use
p_sdo_data_etl;
set hive.business.name=’youni’;
drop table $tn;
CREATE TABLE
IF NOT EXISTS $tn(
$c
) partitioned by ($p)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ’01′
LINES TERMINATED BY ‘\n’STORED AS RCFILE
location ‘/group/p_sdo_data/p_sdo_data_etl/youni/$tn/’;
);
return $table_desc;
}
##truncate
hive表
sub cleanTable {
my ($table_name,$clean_date) = @_; ##yyyy-mm-dd
my $table_info_result = `hive -S -e “use p_sdo_data_etl;desc $table_name”;`;
my @fields = split/deserializer/,$table_info_result;
print $#fields ;
my $columns = 1;
for
(2..$#fields)
{$columns = $columns.’,1′;}
print $columns ;
my
$sql=qq(
use p_sdo_data_etl;
set hive.business.name=’youni’;
insert
overwrite table $table_name partition (pt =
‘$clean_date’)
select $columns
from $table_name
where pt = ’1900-00-00′
);
return
$sql;
}
##运行hadoop命令
sub Runhadoopcmd {
my ($cmd,$exit_if_err) = @_;
my $command = qq(
source
/etc/profile
source /etc/bashrc
${cmd}
);
print “hadoop command: ” . $command . “\n”;
if(system($command) and $exit_if_err eq “Y”) {print “analy
error\n”;exit(1);}
}
##运行hive命令
sub RunHiveCMD {
my ($s,$exit_if_err) = @_;
my
$hadoop_home = $ENV{‘HADOOP_HOME’};
my $java_home = $ENV{‘JAVA_HOME’};
my $hive_home = $ENV{‘HIVE_HOME’};
my
$commandHive = qq(
export JAVA_HOME=${java_home}
export HADOOP_HOME=${hadoop_home}
export HIVE_HOME=${hive_home}
export
PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HIVE_HOME/bin:\$PATH
$hive_home/bin/hive –hiveconf user.group=p_sdo_data –hiveconf
mapred.job.queue.name=cug_p_sdo_data –hiveconf
mapred.fairscheduler.pool=cug_p_youni_sdo_data -e “$s”
);
print “hive
command: ” . $commandHive . “\n”;
if(system($commandHive)) {
print “map to hive table error\n”;
exit(3);
}
}
##运行hive命令并返回命令执行结果
sub RunHiveCMD_S {
my ($s,$exit_if_err) = @_;
my
$hadoop_home = $ENV{‘HADOOP_HOME’};
my $java_home = $ENV{‘JAVA_HOME’};
my $hive_home = $ENV{‘HIVE_HOME’};
my
$commandHive = qq(
export JAVA_HOME=${java_home}
export HADOOP_HOME=${hadoop_home}
export HIVE_HOME=${hive_home}
export
PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HIVE_HOME/bin:\$PATH
$hive_home/bin/hive -S –hiveconf user.group=p_sdo_data –hiveconf
mapred.job.queue.name=cug_p_sdo_data –hiveconf
mapred.fairscheduler.pool=cug_p_youni_sdo_data -e “$s”
);
my $rlt;
print “hive command: ” . $commandHive . “\n”;
$rlt = `$commandHive`;
return $rlt;
}
##设置hive命令运行基础header
sub basicHeader {
my $sql_header=qq(
use
p_sdo_data_etl;
set hive.business.name=’youni’;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;
);
return $sql_header;
}
##设置hive压缩参数
sub compressHeader {
my $sql_header=qq(
set
mapred.output.compress = true;
set mapred.output.compression.codec =
org.apache.hadoop.io.compress.GzipCodec;
set mapred.output.compression.type = BLOCK;
set
mapred.compress.map.output = true;
set mapred.map.output.compression.codec =
org.apache.hadoop.io.compress.LzoCodec;
set
hive.exec.compress.output = true;
set hive.exec.compress.intermediate = true;
set hive.intermediate.compression.codec =
org.apache.hadoop.io.compress.LzoCodec;
);
return $sql_header;
}
##清理hdfs
sub cleanHDFS {
my ($table_name,$clean_date) = @_; ##yyyy-mm-dd
my
$commandhdfs = qq(
source /etc/profile
source /etc/bashrc
/home/horae/shell-horae/hivesql/youni/hdfs_oper.sh “$table_name”
“$clean_date”
);
my $rlt;
print “hive command: ” . $commandhdfs . “\n”;
$rlt = `$commandhdfs`;
return $rlt;
}
1;