Last active
January 25, 2022 20:02
-
-
Save icelander/9290acd29f722152a805ac41f3f62799 to your computer and use it in GitHub Desktop.
This script removes invalid JSON and posts longer than 16,383 characters from Mattermost bulk export files
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
## | |
# find_invalid_lines.sh | |
# | |
## About | |
# | |
# This script will process a Mattermost bulk export file and remove any lines | |
# with post messages longer than the allowable 16383 characters | |
## Usage | |
# | |
# The script accepts the path to the Mattermost bulk export file and sends valid | |
# lines to STDOUT and errors and invalid lines to STDERR. To save these to files use | |
# output redirection, for example: | |
# | |
# find_invalid_lines.sh import.jsonl 1> valid_import.jsonl 2>invalid_lines.jsonl | |
# | |
# Error messages will be prefixed with >>>>> with the JSON on the next line. | |
# | |
# Example STDERR output: | |
# | |
# >>>>> Error on line 2: Expecting : delimiter: line 1 column 30 (char 29) | |
# {"type":"team","team":{"name"":"reiciendis-0","display_name":"minus","type":"O"..... | |
# | |
max_length=16383 | |
if [[ ! -x `which jq` ]]; then | |
>&2 echo "Please install jq" | |
exit 1 | |
fi | |
if [[ ! -x `which python` ]]; then | |
>&2 echo "Please install python" | |
exit 1 | |
fi | |
if [[ ! -f $1 ]]; then | |
>&2 echo "Please provide the path to the bulk import file, like this:" | |
>&2 echo "find_invalid_lines.sh import.jsonl 1> valid_import.jsonl" | |
exit 1 | |
else | |
original_file=`realpath $1` | |
fi | |
line_num=0 | |
while read line; do | |
line_num=$((line_num+1)) | |
error=$(echo "${line}" | python -m json.tool 2>&1) | |
RET=$? | |
if [ $RET -gt 0 ] ; then | |
>&2 echo ">>>>> Error on line $line_num: $error" | |
>&2 echo $line | |
continue | |
fi | |
type=`echo $line | jq '.type'` | |
if [[ ! ($type == '"post"' || $type == '"direct_post"') ]]; then | |
echo $line | |
continue | |
else | |
message_length=0 | |
if [[ $type == '"post"' ]]; then | |
message_length=`echo $line | jq '.post.message' | wc -m` | |
fi | |
if [[ $type == '"direct_post"' ]]; then | |
message_length=`echo $line | jq '.direct_post.message' | wc -m` | |
fi | |
if [[ $((message_length)) -gt $((max_length)) ]]; then | |
# Send to STDOUT | |
>&2 echo ">>>>> Error on line $line_num - Maximum length exceeded ($message_length chars/$max_length max)" | |
>&2 echo $line | |
exit | |
else | |
echo $line | |
fi | |
fi | |
done <$original_file |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment