Last active
October 2, 2017 19:23
-
-
Save chrislawlor/5c8458c4e02dfcd7b045acb11d24cd84 to your computer and use it in GitHub Desktop.
Foursquare category update to PostgreSQL table.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Download the category list from Foursquare, and upsert it into | |
a flat table in a PostgreSQL database. | |
See https://developer.foursquare.com/docs/venues/categories | |
1. Create the table with the following DDL statement: | |
CREATE TABLE foursquare_categories | |
( | |
id TEXT NOT NULL | |
CONSTRAINT foursquare_categories_pkey | |
PRIMARY KEY | |
CONSTRAINT foursquare_categories_parent_fk | |
REFERENCES foursquare_categories | |
ON DELETE CASCADE, | |
name TEXT, | |
plural_name TEXT, | |
short_name TEXT, | |
icon TEXT, | |
parent TEXT | |
); | |
2. Install dependencies: | |
pip install sqlalchemy requests psycopg2 | |
3. Set the following environment variables: | |
FOURSQUARE_CLIENT_ID | |
FOURSQUARE_CLIENT_SECRET | |
FOURSQUARE_VERSION (optional, see https://developer.foursquare.com/overview/versioning) | |
DATABASE_URL (e.g postgresql://username:password@hostname:port/db) | |
4. Run `python foursquare_category_update.py`, via weekly cron job | |
Written for Python 3.6, though it should be trivial to adapt to older versions. | |
""" | |
import os | |
from urllib.parse import urlencode, urljoin | |
import requests | |
from sqlalchemy import text as sa_text, create_engine | |
from sqlalchemy.orm import sessionmaker, scoped_session | |
from sqlalchemy.exc import (DataError, IntegrityError, OperationalError, | |
ProgrammingError) | |
FOURSQUARE_CLIENT_ID = os.environ.get('FOURSQUARE_CLIENT_ID') | |
FOURSQUARE_CLIENT_SECRET = os.environ.get('FOURSQUARE_CLIENT_SECRET') | |
FOURSQUARE_VERSION = os.environ.get('FOURSQUARE_VERSION', '20170823') | |
DATABASE_URL = os.environ.get('DATABASE_URL') | |
BASE = 'https://api.foursquare.com/v2/' | |
class FoursquareError(Exception): | |
pass | |
# NOTE: The `get_session` and `get_foursquare_url` functions are | |
# more decomposed than they need to be for the purposes of this script, | |
# to allow them to be used as utility functions elsewhere. | |
def get_session(): | |
""" | |
Returns a session with Foursquare API request parameters set. | |
""" | |
s = requests.Session() | |
s.params = dict( | |
client_id=FOURSQUARE_CLIENT_ID, | |
client_secret=FOURSQUARE_CLIENT_SECRET, | |
v=FOURSQUARE_VERSION | |
) | |
return s | |
def get_foursquare_url(endpoint, **params): | |
""" | |
Given an andpoint, and query string parameters as | |
keyword arguments, return a URL suitable for submission | |
to the Foursquare API. | |
Does NOT automatically add authentication parameters, | |
use the session returned by `foursquare.get_session`. | |
""" | |
path = urljoin(BASE, endpoint) | |
qs = urlencode(params) | |
return f'{path}?{qs}' | |
def _get_categories(): | |
""" | |
Returns the JSON-formatted nested list of Foursquare | |
categories. | |
""" | |
s = get_session() | |
url = get_foursquare_url('venues/categories') | |
resp = s.get(url) | |
if resp.status_code == 200: | |
return resp.json()['response']['categories'] | |
raise FoursquareError(resp.text) | |
def _bulk_upsert_categories(categories): | |
""" | |
Do a bulk upsert of the given categories. | |
""" | |
# Get a SQLAlchemy session | |
engine = create_engine(DATABASE_URL) | |
session = scoped_session(sessionmaker(bind=engine)) | |
# Convert the category list of dicts into a dict of lists, | |
# keyed by column name | |
values = defaultdict(list) | |
for c in categories: | |
for attr in ('id', 'name', 'plural_name', 'short_name', | |
'icon', 'parent'): | |
values[attr].append(c[attr]) | |
# Upsert the categories, all in one query! | |
q = sa_text(""" | |
insert into foursquare_categories( | |
id, name, plural_name, short_name, icon, parent | |
) | |
select | |
unnest(:id) | |
, unnest(:name) | |
, unnest(:plural_name) | |
, unnest(:short_name) | |
, unnest(:icon) | |
, unnest(:parent) | |
on conflict (id) do update set | |
name=excluded.name | |
, plural_name=excluded.plural_name | |
, short_name=excluded.short_name | |
, icon=excluded.icon | |
, parent=excluded.parent | |
""").bindparams(**values) | |
try: | |
session.execute(q) | |
session.commit() | |
except (ProgrammingError, DataError, OperationalError, IntegrityError): | |
session.rollback() | |
raise | |
def update_categories(): | |
""" | |
Updates the Foursquare categories table in the Data Science | |
DB, with a fresh list from the Foursquare API. | |
Foursquare recommends running this weekly. | |
""" | |
category_db_list = [] | |
# Flatten the list with a simple recursive function | |
def transform_categories(categories, parent=None): | |
for c in categories: | |
category_db_list.append(dict( | |
id=c['id'], | |
name=c['name'], | |
plural_name=c['pluralName'], | |
short_name=c['shortName'], | |
icon=f"{c['icon']['prefix']}{c['icon']['suffix']}", | |
parent=parent)) | |
if c['categories']: | |
transform_categories(c['categories'], parent=c['id']) | |
transform_categories(_get_categories()) | |
_bulk_upsert_categories(category_db_list) | |
if __name__ == '__main__': | |
update_categories() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment