Skip to content

Instantly share code, notes, and snippets.

@pashri
Created February 5, 2025 09:52
Show Gist options
  • Save pashri/1347256ee59a9d719b41892b97ab9b21 to your computer and use it in GitHub Desktop.
Save pashri/1347256ee59a9d719b41892b97ab9b21 to your computer and use it in GitHub Desktop.
Determine the relationship between two columns (1:1, 1:M, M:1, M:M)
import pandas as pd
def determine_relationship(frame: pd.DataFrame, col1: str, col2: str) -> str:
"""Determine the relationship between two columns (1:1, 1:M, M:1, M:M)"""
unique_col1 = frame.groupby(col2, observed=True)[col1].nunique()
unique_col2 = frame.groupby(col1, observed=True)[col2].nunique()
if unique_col1.max() == 1 and unique_col2.max() == 1:
relationship = "1:1"
elif unique_col1.max() > 1 and unique_col2.max() == 1:
relationship = "M:1"
elif unique_col1.max() == 1 and unique_col2.max() > 1:
relationship = "1:M"
else:
relationship = "M:M"
return relationship
data: pd.DataFrame = ...
relationship = data.pipe(determine_relationship, 'a', 'b')
print(relationship)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment