Created
December 4, 2023 15:13
-
-
Save benob/924dfe90a9a62faf0fbcdd8e3db7ae77 to your computer and use it in GitHub Desktop.
Find connected components in speaker-conversation graphs
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import sys | |
from collections import defaultdict | |
speakers = defaultdict(list) | |
conversations = defaultdict(list) | |
for filename in sys.argv[1:]: | |
with open(filename) as fp: | |
metadata = json.loads(fp.read()) | |
for speaker in metadata['speakers']: | |
speakers[speaker['user_id']].append(metadata['id']) | |
conversations[metadata['id']].append(speaker['user_id']) | |
parts = defaultdict(set) | |
speaker_colors = {} | |
def mark_reachable(target, color): | |
if target not in speaker_colors: | |
speaker_colors[target] = color | |
for conversation in speakers[target]: | |
parts[color].add(conversation) | |
for speaker in conversations[conversation]: | |
if speaker != target and speaker not in speaker_colors: | |
mark_reachable(speaker, color) | |
for color, target in enumerate(speakers.keys()): | |
mark_reachable(target, color) | |
# TODO: drop conversations to cut cliques | |
for i, part in enumerate(sorted(parts.values(), key=len, reverse=True)): | |
print(i, len(part), ' '.join(part)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment