Last active
January 17, 2023 04:03
-
-
Save kougazhang/8ebe21efb1eec036dd17f104b857b805 to your computer and use it in GitHub Desktop.
#shell #awk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# run command: hdfs dfs -ls /cdn-logs/<pvdName>/raw/*/*/*|awk -f ./clean.awk | |
BEGIN{ | |
# file expiration date | |
remaining = 25 * 24 * 3600 | |
} | |
{ | |
# $8 is filepath, get datetime from filepath | |
split($8, a,"/"); | |
# split datetime | |
split(a[8], b, "-"); | |
# turn datetime to timestamp | |
formatted = sprintf("%s %s %s 00 00 00", b[1], b[2], b[3]); | |
# mktime: return -1 if param is invalid | |
dirTimestamp = mktime(formatted); | |
# current timestamp | |
current = systime(); | |
# expired if true | |
if (dirTimestamp > 0 && (current - dirTimestamp) > remaining ) { | |
print "expired", $8; | |
# delete dir by call awk interval function system | |
system(sprintf("/usr/local/hadoop/bin/hdfs dfs -rm -r -skipTrash %s", $8)); | |
} else { | |
print "it's valid", $8; | |
} | |
} | |
# v2 | |
# run command: /usr/local/hadoop/bin/hdfs dfs -ls /cdn-logs/huawei/raw/*/*.gz|awk -v is_file=true -f /home/filex/clean.awk 2>&1 > /dev/null | |
BEGIN { | |
# file expiration date | |
if (expire=="") { | |
expire=32; | |
} | |
remaining = expire * 24 * 3600; | |
print("expire is ", expire); | |
} | |
{ | |
filepath = $8; | |
split(filepath, a, "/"); | |
lastElements = a[length(a)]; | |
split(lastElements, b, "-"); | |
formatted = sprintf("%s %s %s 00 00 00", b[1], b[2], b[3]); | |
fileTimestamp = mktime(formatted); | |
if (fileTimestamp == -1) { | |
next; | |
} | |
# current timestamp | |
current = systime(); | |
if (is_file || match(filepath, ".gz") == 0) { | |
if ((current - fileTimestamp) > remaining ) { | |
# delete dir by call awk interval function system | |
system(sprintf("/usr/local/hadoop/bin/hdfs dfs -rm -r -skipTrash %s", filepath)); | |
} | |
} | |
} |
// 计算带宽
gzcat 21_00-yppcdn-jy.snmcoocaa.aisee.tv-12694923896623371723.gz|awk '$4 ~ /16/Jan/2023:20:2[01234]/ {a+=$10} END {print a}'
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
ruby 版