Skip to content

Instantly share code, notes, and snippets.

@khalib
Last active April 22, 2016 07:00
Show Gist options
  • Select an option

  • Save khalib/2f2bc6e5c4485de41399e187b0919938 to your computer and use it in GitHub Desktop.

Select an option

Save khalib/2f2bc6e5c4485de41399e187b0919938 to your computer and use it in GitHub Desktop.
Python script that imports JSON game data from the MLB Gameday API to a MS SQL database.
"""
JSON data importer to a MS SQL database.
NOTE: This code has not been tested and is for the sole purpose of showing example code
for a simple data import workflow using a non-optimized design without error handling.
"""
import json
import urllib
from os import getenv
# Non-native Python package to connect to a MS SQL database.
import pymssql
# MLB Gameday Data - November 1st, 2010 - The day that changed SF Giants history forever!!!
JSON_SOURCE = 'http://gd2.mlb.com/components/game/mlb/year_2010/month_11/day_01/linescore.json'
class Game:
"""
Data object abstraction to account for if the source changes the JSON data structure.
"""
def __init__(self):
# Initialize data object properties.
self.game_id = None
self.description = None
self.home_team = None
self.home_team_city = None
self.home_team_runs = 0
self.away_team = None
self.away_team_city = None
self.away_team_runs = 0
def json_load(self, data):
"""
Load the JSON data into the data object.
:param data: the JSON data
:return:
"""
game_data = data['data']['game']
self.game_id = game_data['id']
self.description = game_data['description']
self.home_team = game_data['home_team_name']
self.home_team_city = game_data['home_team_city']
self.home_team_runs = int(game_data['home_team_runs'])
self.away_team = game_data['away_team_name']
self.away_team_city = game_data['away_team_city']
self.away_team_runs = int(game_data['away_team_runs'])
def db_load(self, cursor):
"""
Load the database record into the data object.
:param cursor: the database cursor with the data to be loaded.
"""
if cursor:
self.id = cursor['id']
self.game_id = cursor['game_id']
self.description = cursor['description']
self.home_team = cursor['home_team']
self.home_team_city = cursor['home_team_city']
self.home_team_runs = cursor['home_team_runs']
self.away_team = cursor['away_team']
self.away_team_city = cursor['away_team_city']
self.away_team_runs = cursor['away_team_runs']
def save(self):
"""
Saves/updates the game record into the database.
:return: tuple (updated, game)
updated: Boolean whether or not the game was updated (otherwise created)
game: the Game object created/updated.
"""
# Connect to the database.
connection = DatabaseUtil.connect()
cursor = connection.cursor()
updated = False
# Query for the game database record.
game = Game.get_by_game_id(self.game_id)
if game is None:
# Record does not exist - create.
cursor.execute("""
INSERT INTO games (
game_id,
description,
home_team,
home_team_city,
home_team_runs,
away_team,
away_team_city,
away_team_runs
)
VALUES (%s, %s, %s, %s, %s, %s)
""", (
game.game_id,
game.description,
game.home_team,
game.home_team_city,
game.home_team_runs,
game.away_team,
game.away_team_city,
game.away_team_runs,
)
)
else:
# Record s - update.
updated = True
cursor.execute("""
UPDATE games
SET
description=%s,
home_team=%s,
home_team_city=%s,
home_team_runs=%s,
away_team=%s,
away_team_city=%s,
away_team_runs=%s,
WHERE game_id=%s
""", (
game.description,
game.home_team,
game.home_team_city,
game.home_team_runs,
game.away_team,
game.away_team_city,
game.away_team_runs,
game.game_id,
)
)
connection.close()
# Refresh the game object data.
game_new = Game.get_by_game_id(self.game_id)
return (updated, game_new)
@staticmethod
def get_by_game_id(game_id):
"""
Gets a game record by game_id.
:param game_id: string - the game ID as defined by the MLB Gameday API data.
:return: game data object.
"""
# Connect to the database.
connection = DatabaseUtil.connect()
cursor = connection.cursor()
cursor.execute("SELECT id FROM games WHERE game_id=%s", game_id)
row = cursor.fetchone()
# Load the data into the Game data object.
game = Game()
game.db_load(row)
connection.close()
return game
class DatabaseUtil:
"""
Utility class that connects with an MS SQL database.
Denormalized data structure to simplify this demo:
CREATE TABLE games (
id INT NOT NULL,
game_id VARCHAR(64),
description VARCHAR(127),
home_team VARCHAR(64),
home_team_city VARCHAR(64),
home_team_runs INT,
away_team VARCHAR(64),
away_team_city VARCHAR(64),
away_team_runs INT,
PRIMARY KEY(id)
)
"""
@staticmethod
def connect():
"""
Sets up a connection to the database.
"""
database = 'mlb_gameday'
server = getenv('MSSQL_SERVER')
user = getenv('MSSQL_USERNAME')
password = getenv('MSSQL_PASSWORD')
connection = pymssql.connect(server, user, password, database)
return connection
class ImportUtil:
"""
JSON data import utility.
"""
@staticmethod
def fetch_json(source):
"""
Gets the JSON data from the provided source URL.
:return: the JSON data.
"""
# Read the JSON data from a remote source.
response = urllib.urlopen(source)
data = json.loads(response.read())
return data
if __name__ == '__main__':
"""
Main function that runs the import process.
"""
# Get the JSON data.
data = ImportUtil.fetch_json(JSON_SOURCE)
# Load the JSON data into the Game object.
game = Game()
game.json_load(data)
# Create/update the game data to the database.
game.save()
/*
Import query directly into a MS SQL database mirroring the data structure of the JSON file.
The JSON file is read locally and imported into the database.
*/
SELECT game.*
FROM OPENROWSET (BULK 'C:\MLB\linescore.json', SINGLE_CLOB)
CROSS APPLY OPENJSON(BulkColumn)
WITH(
game_id nvarchar(100),
description nvarchar(100),
home_team_name nvarchar(100),
home_team_city nvarchar(100),
home_team_runs int,
away_team_name nvarchar(100),
away_team_city nvarchar(100),
away_team_runs int) AS game
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment