Created
September 13, 2016 03:35
-
-
Save chen206/294043c2c2a556da27245a70aadfaf8f to your computer and use it in GitHub Desktop.
Shell scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
0000ABC1111ABC2222,"id": "1001", "name": "Jack" | |
0000ABC1111ABC2222ABC3333,"id": "1002", "name": "Michael" | |
0000ABC1111ABC3333,"id": "1003", "name": "David" | |
0000ABC2222ABC3333,"id": "1001", "name": "Jack" | |
1111ABC2222ABC3333,"id": "1002", "name": "Michael" | |
0000ABC2222ABC3333,"id": "1003", "name": "David" | |
0000ABC1111,"id": "1003", "name": "David" | |
# 按id统计ABC出现的总次数,文件如上例 | |
less infile | awk -F"profile_image_url" '{first=match($0, "\"gsid\": \"");last=match($0, "\", \"s\"");s=substr($0, first+9, last-first-9)}{print NF-1,s}' | awk '{Counter[$2]+=$1;} END {for(key in Counter)print key, Counter[key];}' | sort -nr | |
# 按第2列数值倒序,分隔符为^A(\1);将\1替换为\t输出 | |
sort -k2 -rn -t $'\1' INFILE | tr '\001' '\t' | |
# 统计当前目录下每个文件第2列的sum(文件格式:filename sum) | |
for filename in *; do awk 'BEGIN{sum=0}{sum+=$2}END{print ARGV[1],sum}' $filename; done | |
# 定义输入输出分隔符 | |
awk 'BEGIN{FS="\1";OFS="\t"} {print $1,$2,$3}' infile | |
#从file1中删除出现在file2中的数据 | |
grep -v -x -f file2 file1 | |
#按第二列统计第3列的sum | |
awk -F"\1" '{Counter[$2]+=$3;} END {for(key in Counter)print key", "Counter[key];}' infile | |
#截取100到1000行 | |
sed -n 100,1000p infile > outfile |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment