Last active
March 7, 2024 11:49
-
-
Save psu/1aa92ed7da71bdc3755753ef3ef8c82a to your computer and use it in GitHub Desktop.
Miller - Translate values in CSV file with mapping ("old" to "new")
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /bin/zsh - | |
######################################################################## | |
# Miller - Translate values in CSV file with mapping ("old" to "new") # | |
# Pontus Sundén 2024 # | |
######################################################################## | |
# get input from fancy command line flags | |
zmodload zsh/zutil | |
zparseopts -A ARGUMENTS -input: -map: -column: | |
input=${ARGUMENTS[--input]:-input.csv} # data file | |
raw_map=${ARGUMENTS[--mapping]:-[]} # csv file with two columns in this order: "old, new" (only order is important) | |
column=${ARGUMENTS[--column]:-column} # name of columns to map on | |
separator=${ARGUMENTS[--separator]:-,} # the separator | |
# broken down: | |
# input=${ARGUMENTS[--input]:-input.csv} | |
# two things happening: | |
# set variable from command line flag: input=$ARGUMENTS[--input] | |
# use default value if empty: input=${VARIABLE:-default string} | |
# utility file name | |
mapping="_mapping_used_$raw_map.mlr" | |
# utility file content | |
mlr='begin {\n' | |
mlr+='@separator = "'$separator'";\n' | |
mlr+='@column = "'$column'";\n' | |
mlr+='@mapping = '$(mlr --c2j filter 'is_not_empty($[[[1]]])' then put 'old=$[[[1]]]; new=$[[[2]]]; unset $[[[1]]]; unset $[[[1]]]; $old=old; $new=new' "$raw_map") | |
mlr+=';\n}' | |
printf "$mlr" > "$mapping" | |
# main script doing the mapping | |
# loading variables from utility file: | |
# @column : the column to map on | |
# @separator : the separator to use if the field is multi-value | |
# @mapping : an array of objects with two elements: old and new, eg. {"old": "old_value", "new": "new value"} | |
mlr --csv \ | |
put -f "$mapping" -e ' | |
updated = []; | |
for ( n,old_value in splita($[@column], @separator) ) { | |
new_value = old_value; | |
for ( mapping_pair in @mapping ) { | |
if (old_value == mapping_pair.old) { | |
new_value = mapping_pair.new; | |
} | |
} | |
updated = append(updated, new_value); | |
} | |
$[@column] = joinv(updated, @separator); | |
' "$input" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment