Last active
February 25, 2026 00:25
-
-
Save allthesignals/bb9c1d9e25cb5005ae2d21621d6bd83a to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import numpy as np | |
| import pandas as pd | |
| from k_means_constrained import KMeansConstrained | |
| from sklearn.preprocessing import OneHotEncoder, StandardScaler | |
| # 2. Vectorize Categories (One-Hot Encoding) | |
| encoder = OneHotEncoder(sparse_output=False) | |
| encoded_features = encoder.fit_transform(df[['Contract', 'Domain', 'Portfolio', 'Department']]) | |
| feature_names = encoder.get_feature_names_out() | |
| # 3. Apply Hierarchy Weights | |
| # We multiply the columns to prioritize Project > Portfolio > Discipline | |
| weights = { | |
| 'Contract': 10.0, # Top priority | |
| 'Domain': 7.0, # Medium priority # todo - need supragroupings | |
| 'Portfolio': 3.0, # Medium priority # todo - need supragroupings | |
| 'Department': 1.0 # Lowest priority | |
| } | |
| weighted_features = encoded_features.copy() | |
| for i, col in enumerate(feature_names): | |
| for category, weight in weights.items(): | |
| if category in col: | |
| weighted_features[:, i] *= weight | |
| # 4. Run Constrained K-Means | |
| # Setting size_min=15 and size_max=20 to meet your specific grouping needs | |
| n_clusters = len(df) // 17 # Aim for ~17 per group | |
| clf = KMeansConstrained( | |
| n_clusters=n_clusters, | |
| size_min=15, | |
| size_max=20, | |
| random_state=42 | |
| ) | |
| df['cluster'] = clf.fit_predict(weighted_features) | |
| # 5. Output Results | |
| print(df.groupby('cluster').size()) # Verify sizes are between 15-20 | |
| print(df.sort_values('cluster').head(20)) | |
| df.sort_values('cluster').to_csv('clustered.csv', index=False) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment