Last active
June 9, 2016 11:10
-
-
Save disq/63f8fdc603976597df02 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/awk -f | |
function init() { | |
# allow max 50 fields | |
for(i=0;i<50;i++) { | |
last_data[i][""] = ""; | |
split("", last_data[i]); # make it an array | |
} | |
} | |
function dump() { | |
for(i=1;i<=maxnf;i++) { | |
if (i>1) printf FS # separate each field | |
valcnt = 0 | |
for(key in last_data[i]) { # iterate multiple values of each field | |
if (valcnt>0) printf CONCAT_SEPARATOR | |
printf key | |
valcnt++ | |
} | |
} | |
printf RS # separate each record | |
} | |
BEGIN { | |
CONCAT_SEPARATOR="|||" | |
last_id = "_invalid" | |
maxnf = 0 | |
split("", last_data) # make last_data an array | |
init() | |
} | |
{ | |
if (NF>maxnf) maxnf = NF | |
if (last_id != ""$1) { # force string comparison | |
if (last_id != "_invalid") dump() | |
last_id = ""$1 | |
init() | |
} | |
for(i=1;i<=NF;i++) { | |
if (!($i in last_data[i])) { | |
last_data[i][$i] = 1 # each unique value of a field is stored as the key of the inner array | |
} | |
} | |
} | |
END { | |
dump() | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment