Last active
April 22, 2016 07:00
-
-
Save khalib/2f2bc6e5c4485de41399e187b0919938 to your computer and use it in GitHub Desktop.
Python script that imports JSON game data from the MLB Gameday API to a MS SQL database.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| """ | |
| JSON data importer to a MS SQL database. | |
| NOTE: This code has not been tested and is for the sole purpose of showing example code | |
| for a simple data import workflow using a non-optimized design without error handling. | |
| """ | |
| import json | |
| import urllib | |
| from os import getenv | |
| # Non-native Python package to connect to a MS SQL database. | |
| import pymssql | |
| # MLB Gameday Data - November 1st, 2010 - The day that changed SF Giants history forever!!! | |
| JSON_SOURCE = 'http://gd2.mlb.com/components/game/mlb/year_2010/month_11/day_01/linescore.json' | |
| class Game: | |
| """ | |
| Data object abstraction to account for if the source changes the JSON data structure. | |
| """ | |
| def __init__(self): | |
| # Initialize data object properties. | |
| self.game_id = None | |
| self.description = None | |
| self.home_team = None | |
| self.home_team_city = None | |
| self.home_team_runs = 0 | |
| self.away_team = None | |
| self.away_team_city = None | |
| self.away_team_runs = 0 | |
| def json_load(self, data): | |
| """ | |
| Load the JSON data into the data object. | |
| :param data: the JSON data | |
| :return: | |
| """ | |
| game_data = data['data']['game'] | |
| self.game_id = game_data['id'] | |
| self.description = game_data['description'] | |
| self.home_team = game_data['home_team_name'] | |
| self.home_team_city = game_data['home_team_city'] | |
| self.home_team_runs = int(game_data['home_team_runs']) | |
| self.away_team = game_data['away_team_name'] | |
| self.away_team_city = game_data['away_team_city'] | |
| self.away_team_runs = int(game_data['away_team_runs']) | |
| def db_load(self, cursor): | |
| """ | |
| Load the database record into the data object. | |
| :param cursor: the database cursor with the data to be loaded. | |
| """ | |
| if cursor: | |
| self.id = cursor['id'] | |
| self.game_id = cursor['game_id'] | |
| self.description = cursor['description'] | |
| self.home_team = cursor['home_team'] | |
| self.home_team_city = cursor['home_team_city'] | |
| self.home_team_runs = cursor['home_team_runs'] | |
| self.away_team = cursor['away_team'] | |
| self.away_team_city = cursor['away_team_city'] | |
| self.away_team_runs = cursor['away_team_runs'] | |
| def save(self): | |
| """ | |
| Saves/updates the game record into the database. | |
| :return: tuple (updated, game) | |
| updated: Boolean whether or not the game was updated (otherwise created) | |
| game: the Game object created/updated. | |
| """ | |
| # Connect to the database. | |
| connection = DatabaseUtil.connect() | |
| cursor = connection.cursor() | |
| updated = False | |
| # Query for the game database record. | |
| game = Game.get_by_game_id(self.game_id) | |
| if game is None: | |
| # Record does not exist - create. | |
| cursor.execute(""" | |
| INSERT INTO games ( | |
| game_id, | |
| description, | |
| home_team, | |
| home_team_city, | |
| home_team_runs, | |
| away_team, | |
| away_team_city, | |
| away_team_runs | |
| ) | |
| VALUES (%s, %s, %s, %s, %s, %s) | |
| """, ( | |
| game.game_id, | |
| game.description, | |
| game.home_team, | |
| game.home_team_city, | |
| game.home_team_runs, | |
| game.away_team, | |
| game.away_team_city, | |
| game.away_team_runs, | |
| ) | |
| ) | |
| else: | |
| # Record s - update. | |
| updated = True | |
| cursor.execute(""" | |
| UPDATE games | |
| SET | |
| description=%s, | |
| home_team=%s, | |
| home_team_city=%s, | |
| home_team_runs=%s, | |
| away_team=%s, | |
| away_team_city=%s, | |
| away_team_runs=%s, | |
| WHERE game_id=%s | |
| """, ( | |
| game.description, | |
| game.home_team, | |
| game.home_team_city, | |
| game.home_team_runs, | |
| game.away_team, | |
| game.away_team_city, | |
| game.away_team_runs, | |
| game.game_id, | |
| ) | |
| ) | |
| connection.close() | |
| # Refresh the game object data. | |
| game_new = Game.get_by_game_id(self.game_id) | |
| return (updated, game_new) | |
| @staticmethod | |
| def get_by_game_id(game_id): | |
| """ | |
| Gets a game record by game_id. | |
| :param game_id: string - the game ID as defined by the MLB Gameday API data. | |
| :return: game data object. | |
| """ | |
| # Connect to the database. | |
| connection = DatabaseUtil.connect() | |
| cursor = connection.cursor() | |
| cursor.execute("SELECT id FROM games WHERE game_id=%s", game_id) | |
| row = cursor.fetchone() | |
| # Load the data into the Game data object. | |
| game = Game() | |
| game.db_load(row) | |
| connection.close() | |
| return game | |
| class DatabaseUtil: | |
| """ | |
| Utility class that connects with an MS SQL database. | |
| Denormalized data structure to simplify this demo: | |
| CREATE TABLE games ( | |
| id INT NOT NULL, | |
| game_id VARCHAR(64), | |
| description VARCHAR(127), | |
| home_team VARCHAR(64), | |
| home_team_city VARCHAR(64), | |
| home_team_runs INT, | |
| away_team VARCHAR(64), | |
| away_team_city VARCHAR(64), | |
| away_team_runs INT, | |
| PRIMARY KEY(id) | |
| ) | |
| """ | |
| @staticmethod | |
| def connect(): | |
| """ | |
| Sets up a connection to the database. | |
| """ | |
| database = 'mlb_gameday' | |
| server = getenv('MSSQL_SERVER') | |
| user = getenv('MSSQL_USERNAME') | |
| password = getenv('MSSQL_PASSWORD') | |
| connection = pymssql.connect(server, user, password, database) | |
| return connection | |
| class ImportUtil: | |
| """ | |
| JSON data import utility. | |
| """ | |
| @staticmethod | |
| def fetch_json(source): | |
| """ | |
| Gets the JSON data from the provided source URL. | |
| :return: the JSON data. | |
| """ | |
| # Read the JSON data from a remote source. | |
| response = urllib.urlopen(source) | |
| data = json.loads(response.read()) | |
| return data | |
| if __name__ == '__main__': | |
| """ | |
| Main function that runs the import process. | |
| """ | |
| # Get the JSON data. | |
| data = ImportUtil.fetch_json(JSON_SOURCE) | |
| # Load the JSON data into the Game object. | |
| game = Game() | |
| game.json_load(data) | |
| # Create/update the game data to the database. | |
| game.save() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| /* | |
| Import query directly into a MS SQL database mirroring the data structure of the JSON file. | |
| The JSON file is read locally and imported into the database. | |
| */ | |
| SELECT game.* | |
| FROM OPENROWSET (BULK 'C:\MLB\linescore.json', SINGLE_CLOB) | |
| CROSS APPLY OPENJSON(BulkColumn) | |
| WITH( | |
| game_id nvarchar(100), | |
| description nvarchar(100), | |
| home_team_name nvarchar(100), | |
| home_team_city nvarchar(100), | |
| home_team_runs int, | |
| away_team_name nvarchar(100), | |
| away_team_city nvarchar(100), | |
| away_team_runs int) AS game |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment