Skip to content

Instantly share code, notes, and snippets.

@jrjames83
Created April 7, 2019 01:16
Show Gist options
  • Save jrjames83/c33b115a56211773124ab74d5cbcc3d2 to your computer and use it in GitHub Desktop.
Save jrjames83/c33b115a56211773124ab74d5cbcc3d2 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"from collections import defaultdict\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"class AirlineAggregator:\n",
" \n",
" def __init__(self, filepath):\n",
" self.filepath = filepath\n",
" self.cols = ['CARRIER_DELAY', 'WEATHER_DELAY', 'NAS_DELAY', \n",
" 'SECURITY_DELAY', 'LATE_AIRCRAFT_DELAY']\n",
" self.df = pd.read_csv(self.filepath)\n",
" self.df = self.df.fillna(0)\n",
"\n",
" @property\n",
" def airline_conditions_dict(self):\n",
" \"\"\" \n",
" create a dictionary whose keys are the airlines\n",
" and whose values are a list of dictionaries by delay type\n",
" \"\"\"\n",
" airline_conditions = defaultdict(list)\n",
" ### CAREFUL - note the use of sample to speed up compute\n",
" for index, row in self.df.sample(n=20000).iterrows():\n",
" airline = row.AIRLINE\n",
" values = [ {col:getattr(row, col)} for col in self.cols]\n",
" airline_conditions[airline].append(values)\n",
" return airline_conditions\n",
" \n",
" def compute_metrics(self, airline_key, delay_type, aggregation_type):\n",
" \"\"\"\n",
" the compute_metrics method takes an airline, delay type\n",
" and aggregation type, then computes the required metrics\n",
" \"\"\"\n",
" rows = self.airline_conditions_dict.get(airline_key)\n",
" rows = [x[0].get(delay_type) for x in rows]\n",
" \n",
" if aggregation_type == 'sum':\n",
" return sum(rows)\n",
" elif aggregation_type == 'mean':\n",
" return np.mean([x for x in rows if x > 0]) # avoid the zero skewing the mean\n",
" elif aggregation_type == 'count':\n",
" return np.count_nonzero(rows)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"a = AirlineAggregator('/Users/jeff/Downloads/Jan2018.csv') # Initialize an object with the filapath"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"226"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.compute_metrics('AA', 'CARRIER_DELAY', 'count') # flights having that delay"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"8885.0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.compute_metrics('AA', 'CARRIER_DELAY', 'sum') # mins delayed"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"42.767441860465119"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.compute_metrics('AA', 'CARRIER_DELAY', 'mean') # avg delayed"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[{'CARRIER_DELAY': 0.0},\n",
" {'WEATHER_DELAY': 0.0},\n",
" {'NAS_DELAY': 0.0},\n",
" {'SECURITY_DELAY': 0.0},\n",
" {'LATE_AIRCRAFT_DELAY': 0.0}],\n",
" [{'CARRIER_DELAY': 45.0},\n",
" {'WEATHER_DELAY': 12.0},\n",
" {'NAS_DELAY': 0.0},\n",
" {'SECURITY_DELAY': 0.0},\n",
" {'LATE_AIRCRAFT_DELAY': 118.0}],\n",
" [{'CARRIER_DELAY': 0.0},\n",
" {'WEATHER_DELAY': 0.0},\n",
" {'NAS_DELAY': 0.0},\n",
" {'SECURITY_DELAY': 0.0},\n",
" {'LATE_AIRCRAFT_DELAY': 0.0}],\n",
" [{'CARRIER_DELAY': 27.0},\n",
" {'WEATHER_DELAY': 0.0},\n",
" {'NAS_DELAY': 0.0},\n",
" {'SECURITY_DELAY': 0.0},\n",
" {'LATE_AIRCRAFT_DELAY': 0.0}],\n",
" [{'CARRIER_DELAY': 0.0},\n",
" {'WEATHER_DELAY': 0.0},\n",
" {'NAS_DELAY': 0.0},\n",
" {'SECURITY_DELAY': 0.0},\n",
" {'LATE_AIRCRAFT_DELAY': 0.0}]]"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"a.airline_conditions_dict['AA'][:5] # access to the raw data for arbitrary computation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment