Created
July 25, 2023 14:26
-
-
Save kdmsnr/ed71e9dbbf228b3a03cca99183484ac2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.cluster import KMeans | |
from sklearn.preprocessing import OneHotEncoder | |
import numpy as np | |
import random | |
# カテゴリカルな属性の値のリスト | |
genders = ["Male", "Female"] | |
blood_types = ["A", "B", "O", "AB"] | |
# メンバーの生成(50人、ランダムな属性) | |
members = [ | |
{"name": f"member{i}", | |
"attributes": [random.choice(genders), random.choice(blood_types), random.randint(1, 10)]} | |
for i in range(50) | |
] | |
# OneHotEncoderの初期化 | |
encoder = OneHotEncoder(sparse=False, categories=[genders, blood_types]) | |
# カテゴリカルな属性をOne-Hot Encoding | |
encoded_attributes = encoder.fit_transform([(member["attributes"][0], member["attributes"][1]) for member in members]) | |
# 数値属性と結合 | |
attributes = np.concatenate([encoded_attributes, np.array([[member["attributes"][2]] for member in members])], axis=1) | |
# チームの最大メンバー数 | |
max_members_per_team = 5 | |
# メンバーの総数をチームの上限人数で割った数をクラスタの数として設定 | |
n_clusters = len(members) // max_members_per_team | |
# K-meansクラスタリングを行う | |
kmeans = KMeans(n_clusters=n_clusters, random_state=0).fit(attributes) | |
# 各メンバーがどのクラスタに属するかを取得 | |
labels = kmeans.labels_ | |
# クラスタごとのメンバーリストを作成 | |
clusters = [[] for _ in range(n_clusters)] | |
for member, label in zip(members, labels): | |
clusters[label].append(member) | |
member["cluster"] = label # メンバーが属するクラスタの情報を追加 | |
# 各クラスタから順にメンバーを選び、各チームに割り当てていく | |
teams = [[] for _ in range(n_clusters)] | |
team_index = 0 | |
for cluster in clusters: | |
for member in cluster: | |
# チームのメンバー数が最大メンバー数を超えたら次のチームに進む | |
while len(teams[team_index]) >= max_members_per_team: | |
team_index = (team_index + 1) % n_clusters | |
teams[team_index].append(member) | |
team_index = (team_index + 1) % n_clusters | |
# チームのリストを出力 | |
for i, team in enumerate(teams, 1): | |
print(f"Team {i}:") | |
for member in team: | |
print(f" {member['name']} (Attributes: {', '.join(map(str, member['attributes']))}, Cluster: {member['cluster']})") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment