Last active
November 2, 2017 11:35
-
-
Save talperetz/1f6cbbe22f5ffe5dbcf156da5ea77582 to your computer and use it in GitHub Desktop.
this is a solution to export big query multiple csv files into one mongo collection
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Requirements: script should be ran on relevant mongo instance | |
# Input: <google storage path> <db_name> <collection_name> | |
# TL;DR: loads multiple csv files in to mongo collection | |
# Author: Tal Peretz | |
# init vars | |
gs_path=$1 | |
db_name=$2 | |
collection_name=$3 | |
temp_csv_name=partial_csv | |
files=`gsutil ls $gs_path` # get all partial csv files | |
progress_file=gs2mongo_finished_files_list.txt | |
# touch helper files | |
touch $progress_file | |
touch $temp_csv_name | |
# iterate files in gs_path | |
for f in $files | |
do | |
if grep -Fxq "$f" $progress_file | |
then | |
echo "$f is already imported to mongo" | |
else | |
gsutil cp $f $temp_csv_name # copy partial file locally | |
mongoimport -d $db_name -c $collection_name --type csv --file "$temp_csv_name" --headerline # import partial file to mongo | |
echo "finished importing $f to $collection_name" | |
echo "$f" >> $progress_file | |
rm -f $temp_csv_name | |
fi | |
done |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment