Created
May 31, 2016 06:36
-
-
Save aouyang1/e749e9cef9ef9e9ca81050229a0b449f to your computer and use it in GitHub Desktop.
pull public transactions from venmo
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/bin/python | |
| import sys | |
| import os | |
| import requests | |
| import json | |
| from pprint import pprint | |
| from urllib.parse import urlparse, parse_qs | |
| import boto3 | |
| sess = requests.Session() | |
| adapter = requests.adapters.HTTPAdapter(max_retries=5) | |
| sess.mount('http://', adapter) | |
| sess.mount('https://', adapter) | |
| def collect_to_s3(limit=20, page="https://venmo.com/api/v5/public?", s3_bucket="venmo-json", batch_factor=200, folder="/data"): | |
| counter = 0 | |
| f_venmo = None | |
| s3_client = boto3.client('s3', | |
| aws_access_key_id=os.environ["AWS_ACCESS_KEY_ID"], | |
| aws_secret_access_key=os.environ["AWS_SECRET_ACCESS_KEY"]) | |
| while True: | |
| r = sess.get(page + "&limit={}".format(limit)).json() | |
| page = r["paging"]["next"] | |
| if counter == 0: | |
| if f_venmo is not None: | |
| print(f_venmo.name) | |
| s3_client.upload_file(f_venmo.name, s3_bucket, f_venmo.name) | |
| f_venmo.close() | |
| parsed = urlparse(page) | |
| until_ts = str(parse_qs(parsed.query)["until"][0]) | |
| fname = "venmo_{}.json".format(until_ts) | |
| if not os.path.exists(folder): | |
| os.makedirs(folder) | |
| f_venmo = open(folder+"/"+fname, "w") | |
| for item in r["data"]: | |
| f_venmo.write("{}\n".format(json.dumps(item))) | |
| counter += 1 | |
| counter = counter % batch_factor | |
| print("created_time: {}, num_records: {}, next_page: {}".format(r["data"][-1]["created_time"], len(r["data"]), page)) | |
| if __name__ == "__main__": | |
| args = sys.argv[1:] | |
| page = args[0] | |
| folder = args[1] | |
| collect_to_s3(limit=1200, page=page, folder=folder) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment