Last active
July 14, 2021 14:44
-
-
Save aflansburg/79740baa9b4a653797ec6df7f74e8ff9 to your computer and use it in GitHub Desktop.
Typing & Docstrings: Python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to iterate over specified variables and view their value counts | |
# add typing to help understand our function if reused elsewhere | |
from typing import List | |
def value_count_rep(columns: List, df: pd.DataFrame) -> None: | |
''' | |
Parameters: List of columns to iterate over | |
Returns: No return value. Prints the value counts of each column(feature) to stdout | |
''' | |
for column in columns: | |
output = f'{column} - {df[column].dtype}\n'\ | |
'--------------------------------------\n'\ | |
'Value|Count\n'\ | |
'--------------------------------------\n'\ | |
f'{df[column].value_counts()}]\n'\ | |
'======================================\n' | |
print(output) | |
# Now we can see the description of the function with Shift+Tab in Jupyter Notebook | |
numerical_features = ['Age', 'Experience', 'Income', 'ZIPCode', 'Family', 'CCAvg', 'Mortgage'] | |
value_count_rep(numerical_features,data) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Age - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
35 151 | |
43 149 | |
52 145 | |
58 143 | |
54 143 | |
50 138 | |
41 136 | |
30 136 | |
56 135 | |
34 134 | |
39 133 | |
57 132 | |
59 132 | |
51 129 | |
60 127 | |
45 127 | |
46 127 | |
42 126 | |
31 125 | |
40 125 | |
55 125 | |
29 123 | |
62 123 | |
61 122 | |
44 121 | |
33 120 | |
32 120 | |
48 118 | |
49 115 | |
38 115 | |
47 113 | |
53 112 | |
63 108 | |
36 107 | |
37 106 | |
28 103 | |
27 91 | |
65 80 | |
64 78 | |
26 78 | |
25 53 | |
24 28 | |
66 24 | |
23 12 | |
67 12 | |
Name: Age, dtype: int64] | |
====================================== | |
Experience - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
32 154 | |
20 148 | |
9 147 | |
5 146 | |
23 144 | |
35 143 | |
25 142 | |
28 138 | |
18 137 | |
19 135 | |
26 134 | |
24 131 | |
3 129 | |
14 127 | |
16 127 | |
30 126 | |
34 125 | |
27 125 | |
17 125 | |
22 124 | |
29 124 | |
7 121 | |
15 119 | |
8 119 | |
6 119 | |
10 118 | |
33 117 | |
13 117 | |
11 116 | |
37 116 | |
36 114 | |
4 113 | |
21 113 | |
31 104 | |
12 102 | |
38 88 | |
2 85 | |
39 85 | |
1 74 | |
0 66 | |
40 57 | |
41 43 | |
-1 33 | |
-2 15 | |
42 8 | |
-3 4 | |
43 3 | |
Name: Experience, dtype: int64] | |
====================================== | |
Income - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
44 85 | |
38 84 | |
81 83 | |
41 82 | |
39 81 | |
.. | |
202 2 | |
189 2 | |
203 2 | |
218 1 | |
224 1 | |
Name: Income, Length: 162, dtype: int64] | |
====================================== | |
ZIPCode - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
94720 169 | |
94305 127 | |
95616 116 | |
90095 71 | |
93106 57 | |
... | |
94087 1 | |
96145 1 | |
90068 1 | |
92694 1 | |
94404 1 | |
Name: ZIPCode, Length: 467, dtype: int64] | |
====================================== | |
Family - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
1 1472 | |
2 1296 | |
4 1222 | |
3 1010 | |
Name: Family, dtype: int64] | |
====================================== | |
CCAvg - float64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
0.30 241 | |
1.00 231 | |
0.20 204 | |
2.00 188 | |
0.80 187 | |
... | |
5.33 1 | |
8.20 1 | |
3.67 1 | |
9.30 1 | |
8.90 1 | |
Name: CCAvg, Length: 108, dtype: int64] | |
====================================== | |
Mortgage - int64 | |
-------------------------------------- | |
Value|Count | |
-------------------------------------- | |
0 3462 | |
98 17 | |
89 16 | |
91 16 | |
83 16 | |
... | |
206 1 | |
210 1 | |
258 1 | |
278 1 | |
635 1 | |
Name: Mortgage, Length: 347, dtype: int64] | |
====================================== |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# function to iterate over specified variables and view their value counts | |
# add typing to help understand our function if reused elsewhere | |
from typing import List | |
import numpy as np | |
# our alias - Vector is a List of integers | |
Vector = List[np.int64] | |
def value_count_rep(columns: Vector, df: pd.DataFrame) -> None: | |
''' | |
Parameters: List of columns to iterate over | |
Returns: No return value. Prints the value counts of each column(feature) to stdout | |
''' | |
for column in columns: | |
output = f'{column} - {df[column].dtype}\n'\ | |
'--------------------------------------\n'\ | |
'Value|Count\n'\ | |
'--------------------------------------\n'\ | |
f'{df[column].value_counts()}]\n'\ | |
'======================================\n' | |
print(output) | |
# Now we can see the description of the function with Shift+Tab in Jupyter Notebook | |
numerical_features = ['Age', 'Experience', 'Income', 'ZIPCode', 'Family', 'CCAvg', 'Mortgage'] | |
value_count_rep(numerical_features,data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment