Skip to content

Instantly share code, notes, and snippets.

@breeko
Created March 24, 2018 02:42
Show Gist options
  • Select an option

  • Save breeko/8b72784d75e01a676bd3986abf5548db to your computer and use it in GitHub Desktop.

Select an option

Save breeko/8b72784d75e01a676bd3986abf5548db to your computer and use it in GitHub Desktop.
max_num_ratings = max(df.groupby("KEY").size())
columns = [idx for idx in range(max_num_ratings)]
columns.insert(0, "KEY")
df_rest = pd.DataFrame(columns=columns)
for key in df.KEY.unique():
df_key = df[df.KEY == key]
new_row = {col: "NA" for col in columns}
new_row = {"KEY": key}
for idx, (k, v) in enumerate(df_key.iterrows()):
new_row[idx] = v.GRADE
df_rest = pd.concat([df_rest, pd.DataFrame(new_row, index=[0])], ignore_index=True)
df_rolls = pd.DataFrame(columns=[1,2])
for c1 in range(max_num_ratings - 2):
c2 = c1 + 1
df_rest_valid = df_rest[(~df_rest[c1].isna()) & (~df_rest[c2].isna())]
df_roll = pd.concat([df_rest_valid[c1], df_rest_valid[c2]], axis=1)
df_roll.columns = [1,2]
df_rolls = pd.concat([df_rolls, df_roll], ignore_index=True)
states = ["A","B","C","P","Z"]
df_roll_rates = pd.DataFrame(np.zeros([5,5]), columns=states, index=states)
for s1 in states:
for s2 in states:
num_match = sum((df_rolls[1] == s1) & (df_rolls[2] == s2))
num_all = sum(df_rolls[1] == s1)
if num_all > 0:
df_roll_rates.loc[s2,s1] = num_match / num_all
df_roll_rates.columns.name = "from"
df_roll_rates.index.name = "to"
df_roll_rates * 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment