MrDOS · April 13, 2018 15:09
diff --git a/mock.awk b/mock.awk
 # In retaliation to https://blog.theodo.fr/2018/03/regex-warrior/.
 #
 # Usage:
 #
 #     awk -v FS=';' -v OFS=';' -f mock.awk <mock.csv

 # “NR” is the number of the record we're operating on; effectively a sequential
 # counter. We don't want to filter on the first row, which is the header.
 NR > 1 && $7 == "false" {
    next;
 }

 # awk patterns can check for regex matches on a field. We'll use that to remove
 # users with old IDs.
 NR > 1 && $2 ~ /^[0-9]{5}$/ {
    next;
 }

 # No pattern means “just do this action for each record”.
 {
    # To remove the 6th column (“admin”), we'll shift all subsequent column
    # values to the left by one.
    for (i = 6; i < NF; i++) {
        $i = $(i + 1);
    }
    # And we'll decrement the number of fields in the record so `print` does
    # the right thing.
    NF--;

    # To make the “url” field relative, we'll use a regex. Regexes are probably
    # the right tool to use in this case. Fortunately, awk has pretty good
    # support built in. However, because the pattern literal requires the use
    # of forward slashes as delimiters, we do have to escape the forward
    # slashes in the pattern.
    sub(/.*:\/\/[^/]+\//, "", $5);
 }

 # We have two ways of splitting the name: we could either do the inverse of
 # removing a column (shift subsequent columns to the right to make room for the
 # new value), or we could do like the sed original did and just replace the
 # separation character with the output field separator. Let's be lame.
 #
 # We'll split this into two patterns: one to replace the header, and one to
 # split the values.
 #
 # Note that we're using the `OFS` variable here, not a semicolon literal: this
 # leaves us the flexibility of changing our output separator later.
 NR == 1 {
    $1 = "first name" OFS "last name";
 }

 NR > 1 {
    sub(/ /, OFS, $1);
 }

 # Print out the record.
 {
    print;
 }
	# In retaliation to https://blog.theodo.fr/2018/03/regex-warrior/.
	#
	# Usage:
	#
	# awk -v FS=';' -v OFS=';' -f mock.awk <mock.csv

	# “NR” is the number of the record we're operating on; effectively a sequential
	# counter. We don't want to filter on the first row, which is the header.
	NR > 1 && $7 == "false" {
	next;
	}

	# awk patterns can check for regex matches on a field. We'll use that to remove
	# users with old IDs.
	NR > 1 && $2 ~ /^[0-9]{5}$/ {
	next;
	}

	# No pattern means “just do this action for each record”.
	{
	# To remove the 6th column (“admin”), we'll shift all subsequent column
	# values to the left by one.
	for (i = 6; i < NF; i++) {
	$i = $(i + 1);
	}
	# And we'll decrement the number of fields in the record so `print` does
	# the right thing.
	NF--;

	# To make the “url” field relative, we'll use a regex. Regexes are probably
	# the right tool to use in this case. Fortunately, awk has pretty good
	# support built in. However, because the pattern literal requires the use
	# of forward slashes as delimiters, we do have to escape the forward
	# slashes in the pattern.
	sub(/.*:\/\/[^/]+\//, "", $5);
	}

	# We have two ways of splitting the name: we could either do the inverse of
	# removing a column (shift subsequent columns to the right to make room for the
	# new value), or we could do like the sed original did and just replace the
	# separation character with the output field separator. Let's be lame.
	#
	# We'll split this into two patterns: one to replace the header, and one to
	# split the values.
	#
	# Note that we're using the `OFS` variable here, not a semicolon literal: this
	# leaves us the flexibility of changing our output separator later.
	NR == 1 {
	$1 = "first name" OFS "last name";
	}

	NR > 1 {
	sub(/ /, OFS, $1);
	}

	# Print out the record.
	{
	print;
	}