Last active
October 4, 2022 15:45
-
-
Save ernstki/ed98dfaa1bc718bad1e15f810825c814 to your computer and use it in GitHub Desktop.
Truncate a field to an arbitrary length with awk
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env awk -f | |
## | |
## Truncate a string to an arbitrary length, with ellpsis in the middle | |
## | |
## Author: Kevin Ernst <ernstki -at- mail.uc.edu> | |
## Date: 4 October 2022 | |
## License: WTFPL | |
## Source: https://gist.github.com/ernstki/ed98dfaa1bc718bad1e15f810825c814 | |
## | |
BEGIN { | |
MAXW = 10 | |
} | |
function trunc(s, len,middle,excess,cutstart,cutstop) { | |
len = length(s) | |
if (len <= MAXW) | |
return s | |
# naïve version, just chop off the end | |
#return substr(s, 1, MAXW-1) "…" | |
# > substr(s, m, n) [substr(string, position, count)] | |
# > the n-character substring of s that begins at | |
# > position m counted from 1. | |
# Assuming `-m 10` (MAXW=10) | |
# | i · · l · o · v · e · · a · p · p · l · e · j · a · c · k · s | | |
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | - | 1 | 2 | 3 | 4 | 5 | 6 | 17 | | |
# i · · l < … > e · j · a · c · k · s | | |
# | |
# len = 17 middle = floor(17/2) = 8 | |
# excess = 17-10 = 7 | |
# cutstart = middle - floor(excess/2) = 8 - 3 - 1 = 4 | |
# cutstop = cutstart + excess + 1 = 4 + 7 + 1 = 12 | |
# | |
# | i · · l · o · v · e · · a · p · p · l · e · · j · a · c · k · s | | |
# | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | - | 1 | 2 | 3 | 4 | 5 | 6 | 7 | 18 | | |
# i · · l < … > · j · a · c · k · s | | |
# | |
# len = 18 middle = 18/2 = 9 | |
# excess = 18-10 = 8 | |
# cutstart = middle - excess/2 - 1 = 9 - 4 - 1 = 4 | |
# cutstop = cutstart + excess + 1 = 4 + 8 + 1 = 13 | |
# instead, put the ellipsis in the middle | |
middle = len/2 | |
excess = len - MAXW | |
printf "len=%d; middle=%d; excess=%d\n", len, middle, excess | |
cutstart = middle - excess/2 - 1 # -1 for the "…" | |
cutstop = cutstart + excess + 1 # +1 for the ellipsis | |
printf "cutstart=%d; cutstop=%d\n\n", cutstart, cutstop | |
return substr(s, 1, cutstart-1) "…" substr(s, cutstop) | |
} # trunc(s) | |
END { | |
as[1]="I love applejacks" | |
as[2]="I love apple jacks" | |
as[3]="I love apple jacks!" | |
printf "MAXW=%d\n\n", MAXW | |
for (a in as) { | |
m = trunc(as[a]) | |
len = length(m) | |
printf "> %s (len=%d %s)\n\n", m, len, len==MAXW ? "✓" : "✗" | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example output
Requires GNU
gawk
, which is justawk
if you're on Linux.$ gawk -f trunc.awk /dev/null
Output:
Compare: