Created
September 18, 2023 21:26
-
-
Save hinell/ae068ac7d993e6387b993fc0283fc1d9 to your computer and use it in GitHub Desktop.
Remove uniocde symbols & emoji from a file; see hinell/dotfiles/bash-scripts for up-to date version
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env -S bash | |
# Title.......: unicode-emoji-remove.sh | |
# Summary.....: Unicode tool to strip emoji & icons from textual file | |
# Version.....: 1.0.0 | |
# Created.....: September 18, 2023 | |
# Authors.....: Alex A. Davronov <[email protected]> (2023-) | |
# Repository..: N/A | |
# Description.: Strip emoji & icons from textual file | |
# Thanks to Marc Durdin (https://stackoverflow.com/a/67495684) | |
# Usage.......: Use --help | |
set -u # unset vars are reported as error | |
COMMAND_NAME=$(basename $0) | |
main-version(){ echo 1.0.0; } | |
main-help(){ | |
cat <<-EOL | |
Usage: main [OPTIONS] [FILE] | |
Strip/remove emojis from given input FILE; | |
use '-' (dash) to read from stdin | |
OPTIONS: | |
-i remove emoji in place (see sed -i) | |
-v, --version print script version | |
-h, --help print help | |
EOL | |
} | |
# ----------------------------------------------------------------main | |
main(){ | |
local SED=sed | |
local SED_ARGS=() | |
local FILE | |
test $# -eq 0 && { | |
main-help | |
return | |
} | |
while test $# -gt 0; do | |
case "${1}" in | |
(-v|--version) | |
main-version | |
return | |
;; | |
(-h|--help) | |
main-help | |
return | |
;; | |
(-i) | |
SED_ARGS+=(-i) | |
shift | |
;; | |
(-) | |
: # Read from stdin | |
break | |
;; | |
(*) | |
if test -f "${1}"; | |
then | |
FILE="${1}" | |
else | |
main-help | |
fi | |
break | |
;; | |
# switch.stub | |
esac | |
done; | |
local emoji=("\U1f300-\U1f5ff" | |
"\U1f900-\U1f9ff" | |
"\U1f600-\U1f64f" | |
"\U1f680-\U1f6ff" | |
"\U2600-\U26ff" | |
"\U2700-\U27bf" | |
"\U1f1e6-\U1f1ff" | |
"\U1f191-\U1f251" | |
"\U1f004\U1f0cf" | |
"\U1f170-\U1f171" | |
"\U1f17e-\U1f17f" | |
"\U1f18e\U3030\U2b50\U2b55" | |
"\U2934-\U2935\U2b05-\U2b07" | |
"\U2b1b-\U2b1c\U3297\U3299\U303d\U00a9\U00ae\U2122\U23f3\U24c2" | |
"\U23e9-\U23ef\U25b6" | |
"\U23f8-\U23fa" | |
) | |
LC_ALL=UTF-8 ${SED} ${SED_ARGS[@]} -e "s/[$(printf $emoji)]//g" $1 | |
} # main end | |
main "${@}" | |
unset -f main |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Unfortunatly it remove non-english letter too