Last active
July 1, 2019 10:13
-
-
Save Miladiouss/8429337b78490a1f1c7591a8ab5e1e45 to your computer and use it in GitHub Desktop.
Calculate and appends ordinality to a dataframe
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def appendOrdinality(df, columns, ascending, strictlyIncreasing=False): | |
""" | |
Appends absolute and relative ordinality to a dataframe. | |
Oridinality is the normalized position of a row in a sorted dataframe. | |
df: | |
Pandas DataFrame | |
columns: | |
List of column names for to be used for sorting (e.g. ['prob_1']) | |
ascending: | |
List of ascending True/False for each row | |
strictlyIncreasing: | |
ordinality will increment even if two consecutive rows are the same | |
=== Example and Module Test === | |
rs = np.random.RandomState(9) | |
dfTest = pd.DataFrame(columns=['prob_0', 'prob_1']) | |
dfTest['prob_0'] = rs.randint(0, 10, 10) / 10 | |
dfTest['prob_1'] = rs.randint(0, 10, 10) / 10 | |
appendOrdinality(dfTest, ['prob_1', 'prob_0'], [False, True]) | |
dfTest | |
""" | |
# Sort by prob_0 from low to high, since two consecutive values might be the same, also use prob_1 to sort | |
df.sort_values(columns, ascending=ascending, inplace=True) | |
df.reset_index(inplace=True, drop=True) | |
# Create two columns | |
df['ordinality'] = np.nan | |
df['abs_ordinality'] = -1 | |
n = len(df) | |
# Initiate the first row | |
abs_ordinality = 0 | |
i = 0 | |
df.at[i, 'ordinality' ] = abs_ordinality / n | |
df.at[i, 'abs_ordinality'] = abs_ordinality | |
# iterate from the second row | |
for i in range(1, n): | |
# condition to only increment abs_ordinality if two rows are not the same, i.e. increasing/decreasing | |
conds = [( abs(df.at[i, columns[j]] - df.at[i - 1, columns[j]]) > 0) for j in range(len(columns))] | |
if sum(conds) > 0 or strictlyIncreasing: | |
abs_ordinality += 1 | |
# update elements | |
df.at[i, 'ordinality' ] = abs_ordinality / n | |
df.at[i, 'abs_ordinality'] = abs_ordinality | |
rs = np.random.RandomState(9) | |
dfTest = pd.DataFrame(columns=['prob_0', 'prob_1']) | |
dfTest['prob_0'] = rs.randint(0, 10, 10) / 10 | |
dfTest['prob_1'] = rs.randint(0, 10, 10) / 10 | |
appendOrdinality(dfTest, ['prob_1', 'prob_0'], [False, True]) | |
dfTest |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment