Skip to content

Instantly share code, notes, and snippets.

@titipata
Created February 23, 2017 21:34
Show Gist options
  • Save titipata/2b1808e70d925287a28fd3fefbc1ee19 to your computer and use it in GitHub Desktop.
Save titipata/2b1808e70d925287a28fd3fefbc1ee19 to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import scipy.sparse as sp"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"A = sp.csr_matrix([[0,1], [0, 1]])"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1],\n",
" [0, 1]], dtype=int64)"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"A.toarray()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import pandas as pd # convention\n",
"df = pd.DataFrame([{'a': 2, 'b': 3},\n",
" {'a': 1, 'b': 2}])"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df2 = pd.DataFrame(A.toarray(), columns=['c1', 'c2'])"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[0, 1],\n",
" [0, 1]], dtype=int64)"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array(df2)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(2, 2)"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.shape"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 3\n",
"1 1 2"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"a 3\n",
"b 5\n",
"dtype: int64"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.sum()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>1.500000</td>\n",
" <td>2.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>0.707107</td>\n",
" <td>0.707107</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000000</td>\n",
" <td>2.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>1.250000</td>\n",
" <td>2.250000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>1.500000</td>\n",
" <td>2.500000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>1.750000</td>\n",
" <td>2.750000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>2.000000</td>\n",
" <td>3.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"count 2.000000 2.000000\n",
"mean 1.500000 2.500000\n",
"std 0.707107 0.707107\n",
"min 1.000000 2.000000\n",
"25% 1.250000 2.250000\n",
"50% 1.500000 2.500000\n",
"75% 1.750000 2.750000\n",
"max 2.000000 3.000000"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" b\n",
"0 3\n",
"1 2"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[['b']]"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df = pd.DataFrame([[2, 3], [1, 2]], columns=['a', 'b'])"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2 3\n",
"1 1 2"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"food = pd.read_csv('Food_Inspections.csv')"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Inspection ID</th>\n",
" <th>DBA Name</th>\n",
" <th>AKA Name</th>\n",
" <th>License #</th>\n",
" <th>Facility Type</th>\n",
" <th>Risk</th>\n",
" <th>Address</th>\n",
" <th>City</th>\n",
" <th>State</th>\n",
" <th>Zip</th>\n",
" <th>Inspection Date</th>\n",
" <th>Inspection Type</th>\n",
" <th>Results</th>\n",
" <th>Violations</th>\n",
" <th>Latitude</th>\n",
" <th>Longitude</th>\n",
" <th>Location</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1989361</td>\n",
" <td>BARRY'S BOOTCAMP</td>\n",
" <td>BARRY'S BOOTCAMP</td>\n",
" <td>2511955.0</td>\n",
" <td>Restaurant</td>\n",
" <td>Risk 2 (Medium)</td>\n",
" <td>1933 N HALSTED ST</td>\n",
" <td>CHICAGO</td>\n",
" <td>IL</td>\n",
" <td>60614.0</td>\n",
" <td>02/17/2017</td>\n",
" <td>License</td>\n",
" <td>Pass</td>\n",
" <td>34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOO...</td>\n",
" <td>41.916967</td>\n",
" <td>-87.648306</td>\n",
" <td>(41.91696663455583, -87.6483064098177)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1989356</td>\n",
" <td>PIZZA NOVA</td>\n",
" <td>PIZZA NOVA</td>\n",
" <td>5236.0</td>\n",
" <td>Restaurant</td>\n",
" <td>Risk 1 (High)</td>\n",
" <td>1842 W 18TH ST</td>\n",
" <td>CHICAGO</td>\n",
" <td>IL</td>\n",
" <td>60608.0</td>\n",
" <td>02/17/2017</td>\n",
" <td>Complaint</td>\n",
" <td>Pass w/ Conditions</td>\n",
" <td>3. POTENTIALLY HAZARDOUS FOOD MEETS TEMPERATUR...</td>\n",
" <td>41.857811</td>\n",
" <td>-87.672822</td>\n",
" <td>(41.8578111476272, -87.67282193683475)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Inspection ID DBA Name AKA Name License # Facility Type \\\n",
"0 1989361 BARRY'S BOOTCAMP BARRY'S BOOTCAMP 2511955.0 Restaurant \n",
"1 1989356 PIZZA NOVA PIZZA NOVA 5236.0 Restaurant \n",
"\n",
" Risk Address City State Zip \\\n",
"0 Risk 2 (Medium) 1933 N HALSTED ST CHICAGO IL 60614.0 \n",
"1 Risk 1 (High) 1842 W 18TH ST CHICAGO IL 60608.0 \n",
"\n",
" Inspection Date Inspection Type Results \\\n",
"0 02/17/2017 License Pass \n",
"1 02/17/2017 Complaint Pass w/ Conditions \n",
"\n",
" Violations Latitude Longitude \\\n",
"0 34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOO... 41.916967 -87.648306 \n",
"1 3. POTENTIALLY HAZARDOUS FOOD MEETS TEMPERATUR... 41.857811 -87.672822 \n",
"\n",
" Location \n",
"0 (41.91696663455583, -87.6483064098177) \n",
"1 (41.8578111476272, -87.67282193683475) "
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food.head(n=2)"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"60614"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"int(food.Zip.iloc[0])"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Inspection ID', 'DBA Name', 'AKA Name', 'License #', 'Facility Type',\n",
" 'Risk', 'Address', 'City', 'State', 'Zip', 'Inspection Date',\n",
" 'Inspection Type', 'Results', 'Violations', 'Latitude', 'Longitude',\n",
" 'Location'],\n",
" dtype='object')"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food.columns"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"writer = pd.ExcelWriter('example_sheets.xlsx')\n",
"df.to_excel(writer, 'sheet1')\n",
"df2.to_excel(writer, 'sheet2')\n",
"writer.save()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[2, 3],\n",
" [1, 2]])"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"np.array(df)"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array([[2, 3],\n",
" [1, 2]])"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.as_matrix()"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" 0 1\n",
"0 2 3\n",
"1 1 2"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame(df.as_matrix())"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import scipy.io as sio"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df1 = pd.DataFrame([{'a': 2, 'b': 3}, {'a': 1, 'b': 2}])\n",
"df2 = pd.DataFrame([{'a': 4, 'b': 6}, {'a': 3.5, 'b': 10}])\n",
"df_concat = pd.concat((df1, df2), axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>4.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2.0 3\n",
"1 1.0 2\n",
"0 4.0 6\n",
"1 3.5 10"
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df1 = pd.DataFrame([{'a': 2, 'b': 3}, {'a': 1, 'b': 2}])\n",
"df2 = pd.DataFrame([{'c': 4, 'd': 6}, {'c': 3.5, 'd': 10}])\n",
"df_concat = pd.concat((df1, df2), axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d\n",
"0 2 3 4.0 6\n",
"1 1 2 3.5 10"
]
},
"execution_count": 45,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df_concat = df_concat.fillna('')"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"df_concat.fillna('', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d\n",
"0 2 3 4.0 6\n",
"1 1 2 3.5 10"
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" <th>c</th>\n",
" <th>d</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4.0</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b c d\n",
"0 2 3 4.0 6\n",
"1 1 2 3.5 10"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_fillna"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'a': 2.0, 'b': 3.0, 'c': 4.0, 'd': 6.0}"
]
},
"execution_count": 51,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict(df_fillna.iloc[0])"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(144304, 17)"
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food.shape"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 Risk 2 (Medium)\n",
"1 Risk 1 (High)\n",
"2 Risk 1 (High)\n",
"Name: Risk, dtype: object"
]
},
"execution_count": 63,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food['Risk'].iloc[0:3] # same as food.Risk"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DBA Name</th>\n",
" <th>Risk</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>BARRY'S BOOTCAMP</td>\n",
" <td>Risk 2 (Medium)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>PIZZA NOVA</td>\n",
" <td>Risk 1 (High)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>NEW JB-ONE</td>\n",
" <td>Risk 1 (High)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>TWO WAY LOUNE</td>\n",
" <td>Risk 3 (Low)</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CAPO'S</td>\n",
" <td>Risk 2 (Medium)</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DBA Name Risk\n",
"0 BARRY'S BOOTCAMP Risk 2 (Medium)\n",
"1 PIZZA NOVA Risk 1 (High)\n",
"2 NEW JB-ONE Risk 1 (High)\n",
"3 TWO WAY LOUNE Risk 3 (Low)\n",
"4 CAPO'S Risk 2 (Medium)"
]
},
"execution_count": 58,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food.iloc[0:101][['DBA Name', 'Risk']].head()"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'AKA Name': 'PIZZA NOVA',\n",
" 'Address': '1842 W 18TH ST ',\n",
" 'City': 'CHICAGO',\n",
" 'DBA Name': 'PIZZA NOVA',\n",
" 'Facility Type': 'Restaurant',\n",
" 'Inspection Date': '02/17/2017',\n",
" 'Inspection ID': 1989356,\n",
" 'Inspection Type': 'Complaint',\n",
" 'Latitude': 41.857811147627203,\n",
" 'License #': 5236.0,\n",
" 'Location': '(41.8578111476272, -87.67282193683475)',\n",
" 'Longitude': -87.672821936834751,\n",
" 'Results': 'Pass w/ Conditions',\n",
" 'Risk': 'Risk 1 (High)',\n",
" 'State': 'IL',\n",
" 'Violations': \"3. POTENTIALLY HAZARDOUS FOOD MEETS TEMPERATURE REQUIREMENT DURING STORAGE, PREPARATION DISPLAY AND SERVICE - Comments: OBSERVED POTENTIALLY HAZARDOUS FOODS AT IMPROPER TEMPERATURES IN THE FOOD PREP AREA. OBSERVED SLABS OF RIBS SITTING ON THE FOOD PREP TABLE AT 59F. MANAGEMENT VOLUNTARILY DISCARDED APPROXIMATELY 30# OF FOOD WORTH $139.\\nCRITICAL VIOLATION 7-38-005A. | 32. FOOD AND NON-FOOD CONTACT SURFACES PROPERLY DESIGNED, CONSTRUCTED AND MAINTAINED - Comments: PAINT OR SEAL THE WOOD 2X4'S THAT RUN ALONG THE BASEMENT CEILING IN FRONT OF THE 3 COMPARTMENT SINK. | 33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSILS CLEAN, FREE OF ABRASIVE DETERGENTS - Comments: CLEAN THE GREASE BUILDUP ON THE VENTILATION HOOD. | 36. LIGHTING: REQUIRED MINIMUM FOOT-CANDLES OF LIGHT PROVIDED, FIXTURES SHIELDED - Comments: NO LIGHT SHIELDS PROVIDED FOR THE LIGHT FIXTURES IN THE BASEMENT IN FRONT OF THE 3 COMPARTMENT SINK.\",\n",
" 'Zip': 60608.0}"
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict(food.iloc[1])"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df1 = pd.DataFrame([{'a': 2, 'b': 3}, {'a': 1, 'b': 2}])\n",
"df2 = pd.DataFrame([{'a': 2, 'b': 3}, {'a': 3.5, 'b': 10}])\n",
"df_concat = pd.concat((df1, df2), axis=0)"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"4"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(df_concat.index)"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"2"
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(np.unique(df_concat.index))"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df_concat.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2.0 3\n",
"1 1.0 2\n",
"1 3.5 10"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"a 1.0\n",
"b 2.0\n",
"Name: 1, dtype: float64"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat.iloc[1]"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"1 1.0 2\n",
"1 3.5 10"
]
},
"execution_count": 81,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat.loc[1]"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df_concat.reset_index(drop=True, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>a</th>\n",
" <th>b</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1.0</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3.5</td>\n",
" <td>10</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" a b\n",
"0 2.0 3\n",
"1 1.0 2\n",
"2 3.5 10"
]
},
"execution_count": 77,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_concat"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## **Food spection in Chicago**\n",
"\n",
"- Which restaurant are the worst?\n",
"- Where is the worst place to go eat?\n",
"- Which violation they do at this worst place?"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fail_example = dict(food.iloc[1000])"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"13"
]
},
"execution_count": 96,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(fail_example['Violations'].split('|'))"
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"{'AKA Name': 'REGGAE ISLAND JERK CHICKEN',\n",
" 'Address': '5101 S HALSTED ST ',\n",
" 'City': 'CHICAGO',\n",
" 'DBA Name': 'REGGAE ISLAND JERK CHICKEN, INC',\n",
" 'Facility Type': 'Restaurant',\n",
" 'Inspection Date': '01/27/2017',\n",
" 'Inspection ID': 1982229,\n",
" 'Inspection Type': 'Canvass',\n",
" 'Latitude': 41.801540128340719,\n",
" 'License #': 2423096.0,\n",
" 'Location': '(41.80154012834072, -87.64519698905924)',\n",
" 'Longitude': -87.645196989059244,\n",
" 'Results': 'Fail',\n",
" 'Risk': 'Risk 1 (High)',\n",
" 'State': 'IL',\n",
" 'Violations': '8. SANITIZING RINSE FOR EQUIPMENT AND UTENSILS: CLEAN, PROPER TEMPERATURE, CONCENTRATION, EXPOSURE TIME - Comments: OBSERVED EMPLOYEE WASHING MULTI-USE CONTAINERS, KNIVES, TONGS, SPOONS, ETC IN 3-COMPARTMENT SINK WITH NO SANITIZER AND SINK NOT SET UP. INSTRUCTED MANAGER TO PROPERLY SET UP 3-COMPARTMENT SINK TO WASH, RINSE, AND SANITIZE FOOD EQUIPMENT AND UTENSILS. CRITICAL VIOLATION 7-38-030 | 11. ADEQUATE NUMBER, CONVENIENT, ACCESSIBLE, DESIGNED, AND MAINTAINED - Comments: NO HOT OR TEMPERED WATER AT WASHBOWL IN WASHROOM. INSTRUCTED MANGER TO PROVIDE HOT OR TEMPERED(85F) WATER AT WASHBOWL FOR PROPER HAND WASHING. CRITICAL VIOLATION 7-38-030 | 22. DISH MACHINES: PROVIDED WITH ACCURATE THERMOMETERS, CHEMICAL TEST KITS AND SUITABLE GAUGE COCK - Comments: NO SANITIZER TEST KIT AVAILABLE DURING INSPECTION. INSTRUCTED MANAGER TO PROVIDE TEST KIT FOR PROPER SANITIZER CONCENTRATION. SERIOUS VIOLATION 7-38-030 | 30. FOOD IN ORIGINAL CONTAINER, PROPERLY LABELED: CUSTOMER ADVISORY POSTED AS NEEDED - Comments: LABEL AND DATE ALL PREPARED FOOD IN COOLERS. | 32. FOOD AND NON-FOOD CONTACT SURFACES PROPERLY DESIGNED, CONSTRUCTED AND MAINTAINED - Comments: PROVIDE ADEQUATE SHELVING FOR CLEAN FOOD EQUIPMENT/UTENSIL STORAGE. | 33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSILS CLEAN, FREE OF ABRASIVE DETERGENTS - Comments: CLEAN INTERIOR AND EXTERIOR OF ALL COOKING EQUIPMENT, PREP TABLES, COOLERS, FREEZERS, SINKS, SHELVES AND FRONT COUNTER. | 34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOOD REPAIR, COVING INSTALLED, DUST-LESS CLEANING METHODS USED - Comments: CLEAN FLOORS UNDER ALL EQUIPMENT, ALONG WALLS AND IN ALL CORNERS IN PREP, DISH WASHING AND STORAGE AREAS. | 35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTRUCTED PER CODE: GOOD REPAIR, SURFACES CLEAN AND DUST-LESS CLEANING METHODS - Comments: REPLACE STAINED CEILING TILES IN PREP AREA. CLEAN WALLS AS NEEDED IN PREP AND DISH WASHING AREAS. CLEAN ALL LIGHT SHIELDS IN PREP AREA. | 36. LIGHTING: REQUIRED MINIMUM FOOT-CANDLES OF LIGHT PROVIDED, FIXTURES SHIELDED - Comments: REPLACE DAMAGED LIGHT SHIELD IN PREP AREA AND MISSING LIGHT SHIELD IN DISH WASHING AREA. | 40. REFRIGERATION AND METAL STEM THERMOMETERS PROVIDED AND CONSPICUOUS - Comments: PROVIDE THERMOMETER IN REACH-IN COOLER IN LOBBY. | 41. PREMISES MAINTAINED FREE OF LITTER, UNNECESSARY ARTICLES, CLEANING EQUIPMENT PROPERLY STORED - Comments: REMOVE ALL UNNECESSARY ARTICLES AND PROPERLY ORGANIZE PREP AND STORAGE AREAS. STORE MOP HEAD UPRIGHT TO PREVENT INSECT BREEDING. | 43. FOOD (ICE) DISPENSING UTENSILS, WASH CLOTHS PROPERLY STORED - Comments: PROVIDE LONG HANDLE UTENSILS FOR DISPENSING COOKED RICE AND PEAS. | 38. VENTILATION: ROOMS AND EQUIPMENT VENTED AS REQUIRED: PLUMBING: INSTALLED AND MAINTAINED - Comments: REPLACE MISSING HANDLES ON UTILITY SINK.',\n",
" 'Zip': 60609.0}"
]
},
"execution_count": 92,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dict(food.iloc[1000])"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import csv"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"food_csv = list(csv.DictReader(open('Food_Inspections.csv')))"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Inspection ID', 'DBA Name', 'AKA Name', 'License #', 'Facility Type',\n",
" 'Risk', 'Address', 'City', 'State', 'Zip', 'Inspection Date',\n",
" 'Inspection Type', 'Results', 'Violations', 'Latitude', 'Longitude',\n",
" 'Location'],\n",
" dtype='object')"
]
},
"execution_count": 104,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food.columns"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"array(['Pass', 'Pass w/ Conditions', 'Out of Business', 'Fail', 'No Entry',\n",
" 'Not Ready', 'Business Not Located'], dtype=object)"
]
},
"execution_count": 107,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"food['Results'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"fail = food[food.Results == 'Fail']"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fail_count = pd.DataFrame(fail.groupby('DBA Name').size()).reset_index()\n",
"fail_count.columns = ['DBA Name', 'number']"
]
},
{
"cell_type": "code",
"execution_count": 118,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DBA Name</th>\n",
" <th>number</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10577</th>\n",
" <td>SUBWAY</td>\n",
" <td>235</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3321</th>\n",
" <td>DUNKIN DONUTS</td>\n",
" <td>154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7401</th>\n",
" <td>MCDONALD'S</td>\n",
" <td>96</td>\n",
" </tr>\n",
" <tr>\n",
" <th>173</th>\n",
" <td>7-ELEVEN</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7414</th>\n",
" <td>MCDONALDS</td>\n",
" <td>42</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DBA Name number\n",
"10577 SUBWAY 235\n",
"3321 DUNKIN DONUTS 154\n",
"7401 MCDONALD'S 96\n",
"173 7-ELEVEN 52\n",
"7414 MCDONALDS 42"
]
},
"execution_count": 118,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fail_count.sort_values(by='number', ascending=False).head()"
]
},
{
"cell_type": "code",
"execution_count": 123,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/titipat/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" if __name__ == '__main__':\n"
]
}
],
"source": [
"fail['DBA Name'] = fail['DBA Name'].map(lambda x: x.replace(\"'\", \"\").upper())"
]
},
{
"cell_type": "code",
"execution_count": 125,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"fail_count = pd.DataFrame(fail.groupby('DBA Name').size()).reset_index()\n",
"fail_count.columns = ['DBA Name', 'number']"
]
},
{
"cell_type": "code",
"execution_count": 128,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/titipat/anaconda3/lib/python3.6/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)\n",
" if __name__ == '__main__':\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>DBA Name</th>\n",
" <th>number</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10516</th>\n",
" <td>SUBWAY</td>\n",
" <td>260</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7338</th>\n",
" <td>MCDONALDS</td>\n",
" <td>185</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3306</th>\n",
" <td>DUNKIN DONUTS</td>\n",
" <td>169</td>\n",
" </tr>\n",
" <tr>\n",
" <th>169</th>\n",
" <td>7-ELEVEN</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2371</th>\n",
" <td>CHIPOTLE MEXICAN GRILL</td>\n",
" <td>51</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" DBA Name number\n",
"10516 SUBWAY 260\n",
"7338 MCDONALDS 185\n",
"3306 DUNKIN DONUTS 169\n",
"169 7-ELEVEN 57\n",
"2371 CHIPOTLE MEXICAN GRILL 51"
]
},
"execution_count": 128,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fail_count.sort('number', ascending=False).head()"
]
},
{
"cell_type": "code",
"execution_count": 137,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Address</th>\n",
" <th>0</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>785</th>\n",
" <td>11601 W TOUHY AVE</td>\n",
" <td>220</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5273</th>\n",
" <td>324 N LEAVITT ST</td>\n",
" <td>67</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7839</th>\n",
" <td>500 W MADISON ST</td>\n",
" <td>63</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3405</th>\n",
" <td>2300 S THROOP ST</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>53</th>\n",
" <td>100 W RANDOLPH ST</td>\n",
" <td>40</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Address 0\n",
"785 11601 W TOUHY AVE 220\n",
"5273 324 N LEAVITT ST 67\n",
"7839 500 W MADISON ST 63\n",
"3405 2300 S THROOP ST 44\n",
"53 100 W RANDOLPH ST 40"
]
},
"execution_count": 137,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fail.groupby('Address').size().reset_index().sort_values(0, ascending=False).head()"
]
},
{
"cell_type": "code",
"execution_count": 139,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"ohare = fail[fail.Address.str.startswith('11601 W TOUHY AVE')]"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(221, 17)"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ohare.shape"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/titipat/anaconda3/lib/python3.6/site-packages/pandas/core/frame.py:2842: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n",
" downcast=downcast, **kwargs)\n"
]
}
],
"source": [
"ohare.fillna('', inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(221, 17)"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ohare.shape"
]
},
{
"cell_type": "code",
"execution_count": 155,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def find_violation(text):\n",
" if len(text.split('|')) >= 1:\n",
" all_violations = [v.strip() for v in text.split('|')]\n",
" else:\n",
" all_violations = []\n",
" return all_violations"
]
},
{
"cell_type": "code",
"execution_count": 158,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"all_violations_in_ohare = []\n",
"for v in ohare.Violations:\n",
" try:\n",
" all_violations_in_ohare.append(find_violation(v))\n",
" except:\n",
" pass"
]
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import itertools"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"all_violations_in_ohare_flatten = list(itertools.chain(*all_violations_in_ohare))"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from collections import Counter"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def get_violation_number(text):\n",
" try:\n",
" v_number = int(text[0:3].replace('.', '').strip())\n",
" except:\n",
" v_number = 0\n",
" return v_number"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"all_violations_in_ohare_flatten_number =list(map(get_violation_number, all_violations_in_ohare_flatten))"
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"import operator\n",
"sorted_violations = sorted(Counter(all_violations_in_ohare_flatten_number).items(), key=operator.itemgetter(1))"
]
},
{
"cell_type": "code",
"execution_count": 187,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[(17, 1),\n",
" (19, 1),\n",
" (14, 2),\n",
" (28, 2),\n",
" (22, 3),\n",
" (26, 3),\n",
" (10, 3),\n",
" (9, 5),\n",
" (30, 5),\n",
" (12, 7),\n",
" (6, 8),\n",
" (42, 9),\n",
" (43, 10),\n",
" (45, 11),\n",
" (24, 13),\n",
" (21, 14),\n",
" (31, 16),\n",
" (8, 18),\n",
" (11, 19),\n",
" (2, 26),\n",
" (41, 26),\n",
" (3, 27),\n",
" (0, 30),\n",
" (36, 33),\n",
" (40, 40),\n",
" (29, 43),\n",
" (16, 52),\n",
" (38, 71),\n",
" (32, 79),\n",
" (18, 95),\n",
" (35, 99),\n",
" (34, 125),\n",
" (33, 130)]"
]
},
"execution_count": 187,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_violations"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"\"33. FOOD AND NON-FOOD CONTACT EQUIPMENT UTENSILS CLEAN, FREE OF ABRASIVE DETERGENTS - Comments: Observed dirt accumulation on the shelves in the walk in cooler. Instructed facility to clean and maintain.\\n\\nObserved food debris on the interiors of the microwaves. Instructed facility to clean and maintain.\\n\\nObserved the large, stand alone ice machine near Stanley's Prep to have slight dirt accumulation in the upper, interior portion. Instructed facility to clean, sanitize, and maintain.\""
]
},
"execution_count": 186,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[a for a in all_violations_in_ohare_flatten if a.startswith('33.')][0]"
]
},
{
"cell_type": "code",
"execution_count": 188,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'34. FLOORS: CONSTRUCTED PER CODE, CLEANED, GOOD REPAIR, COVING INSTALLED, DUST-LESS CLEANING METHODS USED - Comments: Observed oil, dirt, and food debris on the floors under the line and around heavy equipment and fixtures. Instructed facility to clean and maintain.'"
]
},
"execution_count": 188,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[a for a in all_violations_in_ohare_flatten if a.startswith('34.')][0]"
]
},
{
"cell_type": "code",
"execution_count": 189,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'35. WALLS, CEILINGS, ATTACHED EQUIPMENT CONSTRUCTED PER CODE: GOOD REPAIR, SURFACES CLEAN AND DUST-LESS CLEANING METHODS - Comments: Observed dirt and food debris accumulated on the walls behind the bar and around the ice machine and pop boxes. Instructed facility to clean and maintain all.'"
]
},
"execution_count": 189,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[a for a in all_violations_in_ohare_flatten if a.startswith('35.')][0]"
]
},
{
"cell_type": "code",
"execution_count": 191,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'18. NO EVIDENCE OF RODENT OR INSECT OUTER OPENINGS PROTECTED/RODENT PROOFED, A WRITTEN LOG SHALL BE MAINTAINED AVAILABLE TO THE INSPECTORS - Comments: Observed 20 small flies throughout facility: on the walls, near the blender, near the cooler, and flying in the air. Instructed facility to remove all pests and maintain. Recommend to contact a licensed pest control provider for service. Serious citation issued 7-38-020.'"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"[a for a in all_violations_in_ohare_flatten if a.startswith('18.')][1]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment