Created
August 1, 2017 09:30
-
-
Save saahil1292/f2676149da4abf8169a54de73fcb1542 to your computer and use it in GitHub Desktop.
Exploratory Data Analysis of Game of Thrones Dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Future Imports | |
from __future__ import absolute_import | |
from __future__ import print_function | |
from __future__ import division | |
from __future__ import unicode_literals | |
# Data Cleaning | |
from pandas import read_csv | |
import numpy as np | |
# Data Visualization | |
import matplotlib.pyplot as plt | |
%matplotlib inline | |
# Time | |
import time | |
def timeit(method): | |
''' To check the execution time | |
Parameter | |
--------- | |
method: function | |
Return | |
--------- | |
float number | |
''' | |
def timed(*args, **kw): | |
ts = time.time() | |
result = method(*args, **kw) | |
te = time.time() | |
if 'log_time' in kw: | |
name = kw.get('log_name', method.__name__.upper()) | |
kw['log_time'][name] = int((te - ts) * 1000) | |
else: | |
print('%r %2.2f ms' % \ | |
(method.__name__, (te - ts) * 1000)) | |
return result | |
return timed | |
@timeit | |
def dataframe(name): | |
''' Creates the dataframe | |
Parameters | |
---------- | |
method: string | |
Return | |
---------- | |
dataframe | |
''' | |
try: | |
filepath = './'+ name +'.csv' | |
if name == 'battles': | |
return read_csv(filepath) | |
elif name == 'character-deaths': | |
return read_csv(filepath) | |
elif name == 'character-predictions': | |
return read_csv(filepath) | |
except IOError: | |
raise 'File does not exists' | |
# return (battles, character_deaths, character_predictions) | |
battles = dataframe('battles') | |
character_deaths = dataframe('character-deaths') | |
character_predictions = dataframe('character-predictions') | |
battles = battles[['name', 'year', 'battle_number', 'attacker_king', 'defender_king', \ | |
'attacker_outcome', 'major_death', 'major_capture', 'attacker_size', 'defender_size']] | |
battles.groupby('attacker_outcome').count() | |
#Plots | |
@timeit | |
def plots(dataframe): | |
''' Creates bar charts for the columns | |
Parameter | |
--------- | |
method: dataframe | |
Return | |
--------- | |
None | |
''' | |
dataframe.groupby('attacker_outcome')[['attacker_size', 'defender_size']].sum().plot(kind='bar') | |
plt.xlabel('') | |
dataframe.groupby('attacker_king')[['attacker_size', 'defender_size']].sum().plot(kind='bar') | |
plt.xlabel('') | |
plt.ylabel('Army Size') | |
dataframe.groupby('attacker_king')[['attacker_outcome']].count().plot(kind='barh') | |
plt.ylabel('') | |
plt.xlabel('Wins') | |
dataframe[dataframe.attacker_outcome == 'loss'].groupby('defender_king')[['defender_king']].count().plot(kind='barh') | |
plt.xlabel('wins') | |
plt.ylabel('') | |
dataframe.groupby('attacker_king')[['battle_number']].sum().plot(kind='barh') | |
plt.xlabel('Battle Count') | |
plt.ylabel('') | |
dataframe[dataframe.attacker_king == 'Joffrey/Tommen Baratheon'].groupby \ | |
('attacker_king')[['attacker_size']].sum() | |
plots(battles) | |
# Battles fought by Joffrey/Tommen | |
battles[battles.attacker_king == 'Joffrey/Tommen Baratheon'][['name']] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment