Created
April 7, 2019 01:16
-
-
Save jrjames83/c33b115a56211773124ab74d5cbcc3d2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd\n", | |
| "from collections import defaultdict\n", | |
| "import numpy as np" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "class AirlineAggregator:\n", | |
| " \n", | |
| " def __init__(self, filepath):\n", | |
| " self.filepath = filepath\n", | |
| " self.cols = ['CARRIER_DELAY', 'WEATHER_DELAY', 'NAS_DELAY', \n", | |
| " 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY']\n", | |
| " self.df = pd.read_csv(self.filepath)\n", | |
| " self.df = self.df.fillna(0)\n", | |
| "\n", | |
| " @property\n", | |
| " def airline_conditions_dict(self):\n", | |
| " \"\"\" \n", | |
| " create a dictionary whose keys are the airlines\n", | |
| " and whose values are a list of dictionaries by delay type\n", | |
| " \"\"\"\n", | |
| " airline_conditions = defaultdict(list)\n", | |
| " ### CAREFUL - note the use of sample to speed up compute\n", | |
| " for index, row in self.df.sample(n=20000).iterrows():\n", | |
| " airline = row.AIRLINE\n", | |
| " values = [ {col:getattr(row, col)} for col in self.cols]\n", | |
| " airline_conditions[airline].append(values)\n", | |
| " return airline_conditions\n", | |
| " \n", | |
| " def compute_metrics(self, airline_key, delay_type, aggregation_type):\n", | |
| " \"\"\"\n", | |
| " the compute_metrics method takes an airline, delay type\n", | |
| " and aggregation type, then computes the required metrics\n", | |
| " \"\"\"\n", | |
| " rows = self.airline_conditions_dict.get(airline_key)\n", | |
| " rows = [x[0].get(delay_type) for x in rows]\n", | |
| " \n", | |
| " if aggregation_type == 'sum':\n", | |
| " return sum(rows)\n", | |
| " elif aggregation_type == 'mean':\n", | |
| " return np.mean([x for x in rows if x > 0]) # avoid the zero skewing the mean\n", | |
| " elif aggregation_type == 'count':\n", | |
| " return np.count_nonzero(rows)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "a = AirlineAggregator('/Users/jeff/Downloads/Jan2018.csv') # Initialize an object with the filapath" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "226" | |
| ] | |
| }, | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.compute_metrics('AA', 'CARRIER_DELAY', 'count') # flights having that delay" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "8885.0" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.compute_metrics('AA', 'CARRIER_DELAY', 'sum') # mins delayed" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "42.767441860465119" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.compute_metrics('AA', 'CARRIER_DELAY', 'mean') # avg delayed" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[[{'CARRIER_DELAY': 0.0},\n", | |
| " {'WEATHER_DELAY': 0.0},\n", | |
| " {'NAS_DELAY': 0.0},\n", | |
| " {'SECURITY_DELAY': 0.0},\n", | |
| " {'LATE_AIRCRAFT_DELAY': 0.0}],\n", | |
| " [{'CARRIER_DELAY': 45.0},\n", | |
| " {'WEATHER_DELAY': 12.0},\n", | |
| " {'NAS_DELAY': 0.0},\n", | |
| " {'SECURITY_DELAY': 0.0},\n", | |
| " {'LATE_AIRCRAFT_DELAY': 118.0}],\n", | |
| " [{'CARRIER_DELAY': 0.0},\n", | |
| " {'WEATHER_DELAY': 0.0},\n", | |
| " {'NAS_DELAY': 0.0},\n", | |
| " {'SECURITY_DELAY': 0.0},\n", | |
| " {'LATE_AIRCRAFT_DELAY': 0.0}],\n", | |
| " [{'CARRIER_DELAY': 27.0},\n", | |
| " {'WEATHER_DELAY': 0.0},\n", | |
| " {'NAS_DELAY': 0.0},\n", | |
| " {'SECURITY_DELAY': 0.0},\n", | |
| " {'LATE_AIRCRAFT_DELAY': 0.0}],\n", | |
| " [{'CARRIER_DELAY': 0.0},\n", | |
| " {'WEATHER_DELAY': 0.0},\n", | |
| " {'NAS_DELAY': 0.0},\n", | |
| " {'SECURITY_DELAY': 0.0},\n", | |
| " {'LATE_AIRCRAFT_DELAY': 0.0}]]" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "a.airline_conditions_dict['AA'][:5] # access to the raw data for arbitrary computation" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.6.8" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment