Created
February 8, 2019 20:30
-
-
Save gxercavins/5537b902b51d1ca5466e7da5f92932c1 to your computer and use it in GitHub Desktop.
Test for SO question 53404579
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
if [ "$#" -ne 2 ]; then | |
echo "Please specify Project ID and GCS Bucket Name (without gs:// prefix)" | |
echo "Usage: ./run_on_gcp.sh project-id bucket-name" | |
exit | |
fi | |
export PROJECT=$1 | |
export BUCKET=$2 | |
cat <<EOT >> generate_files.py | |
import random | |
products = ['BigQuery', 'Dataflow', 'Dataproc', 'ML Engine', 'Composer', 'Dialogflow', 'Dataprep', 'Datalab', 'Vision API'] | |
for i in range(100): | |
print(str(i) + ',' + random.choice(products)) | |
EOT | |
echo "Creating 30000 files with 100 rows..." | |
for i in {00000..30000} | |
do | |
python generate_files.py > "input${i}.csv" | |
done | |
echo "Uploading input* files to GCS bucket..." | |
gsutil -mq cp input* gs://$BUCKET/products/ | |
# rm input* | |
echo "Executing Dataflow job..." | |
python filenames.py \ | |
--runner DataflowRunner \ | |
--project $PROJECT \ | |
--staging_location gs://$BUCKET/staging \ | |
--temp_location gs://$BUCKET/temp |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment