Last active
April 20, 2020 16:30
-
-
Save userid/fecee8535dcebf3197b7347c123a653c to your computer and use it in GitHub Desktop.
# 10.10.10.41 15 07 * * * /home/odps/src/download.pl >>/home/log/odps/odps_to_clickhouse.log 2>&1 # 10.10.14.33 24 9 * * * /home/odps/ods_ac_serverconfig/src/crontab_download_generate.pl >> /home/log/odps/odps_download.log 2>>/home/log/odps/odps_download_err.log
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
## | |
## | |
## | |
use 5.012; | |
$| = 1; | |
use POSIX qw(strftime); | |
chdir '/home/odps/ods_ac_serverconfig/'; | |
my $date = strftime("%Y%m%d", localtime(time()-86400)); | |
my $sql = "tunnel download cbods.ods_ac_serverconfig/requestdate='$date' -fd '|' data/serverconfig/serverconfig.$date.txt;"; | |
say $sql; | |
say qx{ /opt/odps/bin/odpscmd -e "$sql"}; | |
my $downloadfile = "data/serverconfig/serverconfig.$date.txt"; | |
die("file download error") unless -s $downloadfile; | |
###_---- 开始解析 | |
open my $f_user, "+>", "huge/user.csv"; | |
open my $f_device, "+>", "huge/device.csv"; | |
open my $f_udevice, "+>", "huge/udevice.csv"; | |
open my $f_used, "+>", "huge/used.csv"; | |
open my $f_contains, "+>", "huge/contains.csv"; | |
sub get_uuid_time($){ | |
use POSIX qw(strftime); | |
my $uuid= shift || 'ef802820-46b3-11e2-bf3a-47ef6b3e28e2'; | |
$uuid =~ s/-//g; | |
my $timelow = hex substr( $uuid, 2 * 0, 2 * 4 ); | |
my $timemid = hex substr( $uuid, 2 * 4, 2 * 2 ); | |
my $version = hex substr( $uuid, 2 * 6, 1 ); | |
my $timehi = hex substr( $uuid, 2 * 6 + 1, 2 * 2 - 1 ); | |
my $time = ( $timehi * ( 2**16 ) + $timemid ) * ( 2**32 ) + $timelow; | |
my $epoc = int( $time / 10000000 ) - 12219292800; | |
#return strftime( "%Y%m%d", localtime($epoc) ) ; | |
return strftime( "%Y-%m-%d %H:%M:%S", localtime($epoc) ); | |
} | |
sub process($){ | |
my $l = shift; | |
chomp $l; | |
my @vals = split '\\|', $l ; | |
return if scalar @vals <5; | |
my $userid = $vals[0]; | |
my $type = $vals[1]; | |
my $model = $vals[2]; | |
$model=~s/,/_/; | |
my $mac = $vals[3]; | |
my $uuid = $vals[-1] || ''; | |
return if $userid eq ''; | |
return if $userid == 0; | |
my $date = get_uuid_time($uuid); | |
#say "$userid $model $mac $uuid $date"; | |
print $f_user "$userid\n"; | |
print $f_device "$mac,$model,$type\n"; | |
print $f_udevice "$uuid,$date,$type\n"; | |
print $f_used "$userid,$uuid\n"; | |
print $f_contains "$uuid,$mac\n"; | |
} | |
my $n = 0; | |
open my $fd,"<", $downloadfile; | |
while(<$fd>){ | |
process($_) if ( $_ ne ''); | |
say "processed $n lines." if $n++ % 1000 == 0; | |
} | |
close($fd); | |
close($f_user); | |
close $f_device; | |
close $f_udevice; | |
close $f_user; | |
close $f_contains; | |
die("解析文件出错" ) unless -s "huge/user.csv"; | |
#__----- 开始导入hugegraph | |
say qx {/home/dev/hugegraph-loader-0.7.0/bin/hugegraph-loader -g hugegraph -f huge/s.json -s huge/schema.groovy -h 10.10.14.33 --max-parse-errors 10000 --max-insert-errors 10000}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/perl -w | |
## | |
## | |
## | |
use 5.012; | |
$| = 1; | |
use POSIX qw(strftime); | |
chdir '/home/odps/'; | |
my $date = strftime("%Y%m%d", localtime(time()-86400*1)); | |
my $partition = strftime("%Y-%m-%d", localtime(time()-86400*1)); | |
my $sql = "tunnel download cbmds.mds_user_attribute_original_big_data_table/requestdate=${date} -fd '\x01' ./data/user_attribute_${date}.txt "; | |
say $sql; | |
say "=== 开始下载odps ==="; | |
say qx{ /opt/odps/bin/odpscmd -e "$sql"}; | |
my $downloadfile = "./data/user_attribute_${date}.txt"; | |
die("file download error") unless -s $downloadfile; | |
#__----- 开始导入 | |
# | |
say "=== 开始导入clickhouse ==="; | |
say qx{ cat $downloadfile |sed -e "s/['\\"{}\\r\\t]//g" -e 's/\$/\\x01${partition}/' | clickhouse-client --format_csv_delimiter='\x01' --query="INSERT INTO user_attribute FORMAT CSV" --input_format_allow_errors_num=10000000}; | |
#__----- 开始清理旧数据 | |
my $old_date = strftime("%Y%m%d", localtime(time()-86400*9)); | |
my $old_partition = strftime("%Y-%m-%d", localtime(time()-86400*9)); | |
say "=== drop table partition '$old_partition' ==="; | |
qx{rm ./data/user_attribute_${old_date}.txt }; | |
say qx{ clickhouse-client --query=" alter table user_attribute drop partition '$old_partition' "}; | |
say "=== Done ===\n,", `date`; |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment