Skip to content

Instantly share code, notes, and snippets.

@louisje
Last active February 16, 2016 05:42
Show Gist options
  • Save louisje/d6df924303011592a018 to your computer and use it in GitHub Desktop.
Save louisje/d6df924303011592a018 to your computer and use it in GitHub Desktop.
#!/bin/bash
#
# <pre>
#
# Copyright (c) 2016 Louis Jeng <[email protected]>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to
# deal in the Software without restriction, including without limitation the
# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
# sell copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.
#
# </pre>
#
# <p>Split Parse.com exported json file
# to the size of 2G bytes.</p>
#
# prerequisite:
# <code>
# $ sudo apt-get install jq pv
# </code>
#
# @author Louis Jeng <[email protected]>
# @since 2016-01-20
#
if test -z "$1"; then
echo "Usage: $0 <source_file> [ <size_limit> ]"
exit 1
fi
size_padding="13"
size_limit=$(echo "2*2^30" | bc)
source_file="$1"
split_prefix="split-${source_file%.json}"
if test -n "$2"; then
if test "$2" -gt "0"; then
size_limit="$2"
if test -n "$3" -a "$3" = "line"; then
line_limit="yes"
size_padding="0"
fi
fi
fi
if test ! -f "${source_file}"; then
echo "${source_file} not found"
exit 1
fi
rm -fv ${split_prefix}.*.json 2>/dev/null
rm -fv ${source_file%.json}.*.json 2>/dev/null
echo "> [ step 0 ] processing ${source_file}"
echo "> limit size = ${size_limit}"
if test -n "${line_limit}"; then echo "> limit in line"; fi
pv "${source_file}" \
| sed '1s/^{ "results": \[//' | sed '$s/\] }$//' \
| sed 's/^ },$/ }/g' \
| sed 's/^ *//g' | sed 's/": /":/g' \
| tr -d "\n" | jq -c . \
| split "$(if test -n "${line_limit}"; then echo "-l"; else echo "-C"; fi)" \
"$(echo "${size_limit} - ${size_padding}" | bc)" \
-d -a 3 -u --verbose --additional-suffix=".json" - "${split_prefix}."
steps=$(ls -1 ${split_prefix}.*.json 2>/dev/null | wc -l)
if test "$steps" -gt "0"; then
for i in $(seq 0 $(echo "${steps} - 1" | bc)); do
split_file="${split_prefix}.$(printf "%03d" $i).json"
echo "> [ $(echo "$i + 1" | bc) of ${steps} ] processing ${split_file}"
if test -f "${split_file}"; then
pv "${split_file}" | tr "\n" "," \
| sed "s/^/{\"results\":[/" \
| sed "s/,$/]}/g" > ${split_file#split-}
fi
done
fi
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment