首页博文目录关于我

个人资料

微博

加好友发纸条

写留言加关注

博客等级：
博客积分：

博客访问：
关注人气：
获赠金笔：0支
赠出金笔：0支
荣誉徽章：

正文字体大小：大中小

在hadoop&hive编程中15个实用perl脚本

(2014-12-25 15:15:40)

标签：

股票

分类：数据处理

在hadoop&hive编程中15个实用perl脚本

##获取系统时间
##获取N天前日期
##获取月末日期
##获取月初日期
##获取上月末日期
##获取上月初日期
##两个日期之差
##建立hive表
##truncate hive表
##运行hadoop命令
##运行hive命令
##运行hive命令并返回命令执行结果
##设置hive命令运行基础header
##设置hive压缩参数
##清理hdfs

#!/usr/bin/perl -w
#——————————————————-
# author wangli.lee
# create 2012-08-01
# type youni
#——————————————————-

package cmn;
use strict;
use warnings;
use POSIX;

##获取系统时间
sub getSystemTime
{
my @time=(localtime)[5,4,3,2,1,0];

$time[0]+=1900;

$time[1]+=1;

my $nowtime=sprintf(“u-u-u u:u:u”,@time);

print(“${nowtime}\n”);

return ${nowtime}

}

##获取昨天
sub getOffsetDay {
my ($date,$offset) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start -86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,$day;
return $time_start; }

##获取月末
sub getMonthEnd {
my ($date) = @_; ##yyyy-mm-dd
my $time_start = ’1900-01-01′;
my ($year, $month, $day) = split ‘-’, $date;

my $start_com = mktime(0,0,0,28,$month-1,$year-1900);
my ($day_com,$month_com,$year_com)=(localtime($start_com + 86400*1))[3,4,5];

if (($month-1) != $month_com) {$time_start=sprintf “M-d-d”,$year,$month,28; return $time_start;}

$start_com = mktime(0,0,0,29,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com + 86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf “M-d-d”,$year,$month,29; return $time_start;}

$start_com = mktime(0,0,0,30,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com + 86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf “M-d-d”,$year,$month,30; return $time_start;}

$start_com = mktime(0,0,0,31,$month-1,$year-1900);
($day_com,$month_com,$year_com)=(localtime($start_com + 86400*1))[3,4,5];
if (($month-1) != $month_com) {$time_start=sprintf “M-d-d”,$year,$month,31; return $time_start;}

return $time_start; }

##获取月初
sub getMonthBegin {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day – 1;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start -86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,1;
return $time_start; }

##获取上月末
sub getLastMonthEnd {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start -86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,$day;
return $time_start; }

##获取上月初
sub getLastMonthBegin {
my ($date) = @_; ##yyyy-mm-dd
my ($year, $month, $day) = split ‘-’, $date;
my $offset = $day;
my $start = mktime(0,0,0,$day,$month-1,$year-1900);
($day,$month,$year)=(localtime($start -86400*${offset}))[3,4,5];
my $time_start=sprintf “M-d-d”,$year+1900,$month+1,1;
return $time_start; }

##两个日期之差
sub getDateMinus {
my ($startdate,$run_date) = @_; ##yyyy-mm-dd

my ($year, $month, $day) = split ‘-’, $startdate;
my $startdate_time = mktime(0,0,0,$day,$month-1,$year-1900);

($year, $month, $day) = split ‘-’, $run_date;
my $rundate_time = mktime(0,0,0,$day,$month-1,$year-1900);

my $day_mius = ($rundate_time – $startdate_time)/86400;

return $day_mius; }

##建立hive表
sub createTable
{
my ($tn,$col,$part) = @_;
my $c = join(‘ string,’, @$col).” string”;
my $p = join(‘ string,’, @$part).” string”;

#print $c.”\n”;
#print $p.”\n”;

my $table_desc = qq(

use p_sdo_data_etl;
set hive.business.name=’youni’;

drop table $tn;

CREATE TABLE IF NOT EXISTS $tn(
$c
) partitioned by ($p)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ’01′
LINES TERMINATED BY ‘\n’STORED AS RCFILE
location ‘/group/p_sdo_data/p_sdo_data_etl/youni/$tn/’;

);

return $table_desc;

}

##truncate hive表
sub cleanTable {

my ($table_name,$clean_date) = @_; ##yyyy-mm-dd

my $table_info_result = `hive -S -e “use p_sdo_data_etl;desc $table_name”;`;

my @fields = split/deserializer/,$table_info_result;

print $#fields ;

my $columns = 1;

for (2..$#fields)
{$columns = $columns.’,1′;}

print $columns ;

my $sql=qq(
use p_sdo_data_etl;
set hive.business.name=’youni’;

insert overwrite table $table_name partition (pt = ‘$clean_date’)
select $columns
from $table_name
where pt = ’1900-00-00′

);

return $sql;
}

##运行hadoop命令
sub Runhadoopcmd {
my ($cmd,$exit_if_err) = @_;

my $command = qq(

source /etc/profile
source /etc/bashrc

${cmd}

);
print “hadoop command: ” . $command . “\n”;
if(system($command) and $exit_if_err eq “Y”) {print “analy error\n”;exit(1);}

}

##运行hive命令
sub RunHiveCMD {
my ($s,$exit_if_err) = @_;

my $hadoop_home = $ENV{‘HADOOP_HOME’};
my $java_home = $ENV{‘JAVA_HOME’};
my $hive_home = $ENV{‘HIVE_HOME’};

my $commandHive = qq(
export JAVA_HOME=${java_home}
export HADOOP_HOME=${hadoop_home}
export HIVE_HOME=${hive_home}
export PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HIVE_HOME/bin:\$PATH
$hive_home/bin/hive –hiveconf user.group=p_sdo_data –hiveconf mapred.job.queue.name=cug_p_sdo_data –hiveconf mapred.fairscheduler.pool=cug_p_youni_sdo_data -e “$s”
);

print “hive command: ” . $commandHive . “\n”;
if(system($commandHive)) {
print “map to hive table error\n”;
exit(3);
}

}

##运行hive命令并返回命令执行结果
sub RunHiveCMD_S {
my ($s,$exit_if_err) = @_;

my $hadoop_home = $ENV{‘HADOOP_HOME’};
my $java_home = $ENV{‘JAVA_HOME’};
my $hive_home = $ENV{‘HIVE_HOME’};

my $commandHive = qq(
export JAVA_HOME=${java_home}
export HADOOP_HOME=${hadoop_home}
export HIVE_HOME=${hive_home}
export PATH=\$JAVA_HOME/bin:\$HADOOP_HOME/bin:\$HIVE_HOME/bin:\$PATH
$hive_home/bin/hive -S –hiveconf user.group=p_sdo_data –hiveconf mapred.job.queue.name=cug_p_sdo_data –hiveconf mapred.fairscheduler.pool=cug_p_youni_sdo_data -e “$s”
);

my $rlt;

print “hive command: ” . $commandHive . “\n”;

$rlt = `$commandHive`;

return $rlt;

}

##设置hive命令运行基础header
sub basicHeader {
my $sql_header=qq(

use p_sdo_data_etl;
set hive.business.name=’youni’;
set hive.exec.parallel=true;
set hive.exec.parallel.thread.number=8;

);

return $sql_header;

}

##设置hive压缩参数
sub compressHeader {
my $sql_header=qq(

set mapred.output.compress = true;
set mapred.output.compression.codec = org.apache.hadoop.io.compress.GzipCodec;
set mapred.output.compression.type = BLOCK;

set mapred.compress.map.output = true;
set mapred.map.output.compression.codec = org.apache.hadoop.io.compress.LzoCodec;

set hive.exec.compress.output = true;
set hive.exec.compress.intermediate = true;
set hive.intermediate.compression.codec = org.apache.hadoop.io.compress.LzoCodec;

);

return $sql_header;

}

##清理hdfs
sub cleanHDFS {

my ($table_name,$clean_date) = @_; ##yyyy-mm-dd

my $commandhdfs = qq(
source /etc/profile
source /etc/bashrc
/home/horae/shell-horae/hivesql/youni/hdfs_oper.sh “$table_name” “$clean_date”
);

my $rlt;

print “hive command: ” . $commandhdfs . “\n”;

$rlt = `$commandhdfs`;

return $rlt;

}

阅读┊ 收藏 ┊ 喜欢 ▼ ┊打印┊举报/Report

前一篇：vim超级快捷方式

后一篇：excel自动调整行高和设置默认行高

新浪BLOG意见反馈留言板　欢迎批评指正