Skip to content

Instantly share code, notes, and snippets.

@djinn
Created October 28, 2025 07:40
Show Gist options
  • Save djinn/53addcce5a9ed0602fde99c7fb678ed6 to your computer and use it in GitHub Desktop.
Save djinn/53addcce5a9ed0602fde99c7fb678ed6 to your computer and use it in GitHub Desktop.
Guess Who with heuristics
"""
Optimal Decision Tree for Guess Who Character Identification
Uses information theory to minimize average decision depth
"""
import math
from typing import List, Dict, Tuple, Any
# Character database with attributes as arrays
CHARACTERS = [
{
"name": "AMY",
"gender": "F",
"eyewear": True,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": True,
"earrings": True,
"hobby": "art",
},
{
"name": "DAVID",
"gender": "M",
"eyewear": False,
"headgear": True,
"facial_hair": False,
"hair_color": "blonde",
"highlights": False,
"earrings": False,
"hobby": "baseball",
},
{
"name": "LEO",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": True,
"hair_color": "gray",
"highlights": False,
"earrings": False,
"hobby": "gardening",
},
{
"name": "GABE",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "soccer",
},
{
"name": "KATIE",
"gender": "F",
"eyewear": False,
"headgear": True,
"facial_hair": False,
"hair_color": "blonde",
"highlights": False,
"earrings": False,
"hobby": "basketball",
},
{
"name": "OLIVIA",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": True,
"earrings": True,
"hobby": "art",
},
{
"name": "JORDAN",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "orange",
"highlights": False,
"earrings": False,
"hobby": "football",
},
{
"name": "CARMEN",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": True,
"hobby": "music",
},
{
"name": "LAURA",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": True,
"hobby": "guitar",
},
{
"name": "JOE",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "baseball",
},
{
"name": "MIKE",
"gender": "M",
"eyewear": False,
"headgear": True,
"facial_hair": False,
"hair_color": "brown",
"highlights": False,
"earrings": False,
"hobby": "science",
},
{
"name": "AL",
"gender": "M",
"eyewear": True,
"headgear": False,
"facial_hair": True,
"hair_color": "white",
"highlights": False,
"earrings": False,
"hobby": "science",
},
{
"name": "DANIEL",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": True,
"hair_color": "brown",
"highlights": False,
"earrings": False,
"hobby": "cooking",
},
{
"name": "SOPHIA",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "red",
"highlights": False,
"earrings": False,
"hobby": "cooking",
},
{
"name": "NICK",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": True,
"hair_color": "blonde",
"highlights": False,
"earrings": False,
"hobby": "music",
},
{
"name": "LILY",
"gender": "F",
"eyewear": False,
"headgear": True,
"facial_hair": False,
"hair_color": "blue",
"highlights": True,
"earrings": False,
"hobby": "fashion",
},
{
"name": "LIZ",
"gender": "F",
"eyewear": True,
"headgear": False,
"facial_hair": False,
"hair_color": "gray",
"highlights": False,
"earrings": False,
"hobby": "teaching",
},
{
"name": "MIA",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "science",
},
{
"name": "EMMA",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "red",
"highlights": False,
"earrings": False,
"hobby": "animals",
},
{
"name": "RACHEL",
"gender": "F",
"eyewear": True,
"headgear": False,
"facial_hair": False,
"hair_color": "brown",
"highlights": False,
"earrings": False,
"hobby": "drums",
},
{
"name": "BEN",
"gender": "M",
"eyewear": True,
"headgear": False,
"facial_hair": True,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "art",
},
{
"name": "ERIC",
"gender": "M",
"eyewear": False,
"headgear": False,
"facial_hair": True,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "music",
},
{
"name": "FARAH",
"gender": "F",
"eyewear": False,
"headgear": False,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "cooking",
},
{
"name": "SAM",
"gender": "M",
"eyewear": False,
"headgear": True,
"facial_hair": False,
"hair_color": "black",
"highlights": False,
"earrings": False,
"hobby": "cooking",
},
]
def calculate_entropy(characters: List[Dict]) -> float:
"""Calculate Shannon entropy of a character set"""
if len(characters) <= 1:
return 0.0
return math.log2(len(characters))
def calculate_information_gain(characters: List[Dict], attribute: str) -> float:
"""
Calculate information gain for splitting on an attribute.
Uses entropy-based calculation to find optimal splits.
"""
if len(characters) <= 1:
return 0.0
initial_entropy = calculate_entropy(characters)
# Split characters by attribute value
splits = {}
for char in characters:
value = char.get(attribute)
if value not in splits:
splits[value] = []
splits[value].append(char)
# Calculate weighted entropy after split
weighted_entropy = 0.0
for subset in splits.values():
probability = len(subset) / len(characters)
weighted_entropy += probability * calculate_entropy(subset)
return initial_entropy - weighted_entropy
def find_best_attribute(
characters: List[Dict], used_attributes: set
) -> Tuple[str, float]:
"""
Find the attribute that provides maximum information gain.
This minimizes the expected number of questions.
"""
available_attributes = [
"gender",
"eyewear",
"headgear",
"facial_hair",
"hair_color",
"highlights",
"earrings",
"hobby",
]
best_attribute = None
best_gain = -1.0
for attr in available_attributes:
if attr not in used_attributes:
gain = calculate_information_gain(characters, attr)
if gain > best_gain:
best_gain = gain
best_attribute = attr
return best_attribute, best_gain
def analyze_sample_characters():
"""
Peek at 5 representative characters to understand attribute distributions.
Sample selection: diverse across key attributes for optimal tree construction.
"""
# Select 5 diverse characters strategically
sample_indices = [0, 6, 11, 14, 19] # AMY, JORDAN, AL, NICK, RACHEL
sample = [CHARACTERS[i] for i in sample_indices]
print("=" * 70)
print("SAMPLING 5 CHARACTERS FOR ATTRIBUTE ANALYSIS")
print("=" * 70)
for char in sample:
print(f"\n{char['name']}:")
for key, value in char.items():
if key != "name":
print(f" {key:15} = {value}")
print("\n" + "=" * 70)
print("ATTRIBUTE INFORMATION GAIN ANALYSIS (on sample)")
print("=" * 70)
for attr in [
"gender",
"eyewear",
"headgear",
"facial_hair",
"hair_color",
"highlights",
]:
gain = calculate_information_gain(sample, attr)
print(f"{attr:15} : Information Gain = {gain:.4f}")
print("\n" + "=" * 70)
print("FULL DATASET OPTIMAL SPLIT SEQUENCE")
print("=" * 70)
# Build optimal decision sequence
remaining = CHARACTERS.copy()
used = set()
depth = 0
while len(remaining) > 1 and depth < 8:
best_attr, gain = find_best_attribute(remaining, used)
if best_attr is None:
break
splits = {}
for char in remaining:
value = char[best_attr]
if value not in splits:
splits[value] = []
splits[value].append(char)
print(f"\nDepth {depth}: Split on '{best_attr}' (gain={gain:.4f})")
for value, subset in splits.items():
names = [c["name"] for c in subset]
print(
f" {best_attr}={value}: {len(subset)} chars -> {names[:5]}{'...' if len(names) > 5 else ''}"
)
used.add(best_attr)
remaining = max(splits.values(), key=len)
depth += 1
def identify_character_optimal():
"""
Interactive identification using information-theoretic optimal ordering.
Dynamically recalculates best question at each step.
"""
print("\n" + "=" * 70)
print("OPTIMAL CHARACTER IDENTIFICATION")
print("=" * 70)
print("Think of a character! I'll identify them with minimal questions.\n")
remaining = CHARACTERS.copy()
used_attributes = set()
question_count = 0
while len(remaining) > 1:
# Find optimal next question
best_attr, gain = find_best_attribute(remaining, used_attributes)
if best_attr is None:
break
# Get unique values for this attribute in remaining characters
values = sorted(set(char[best_attr] for char in remaining))
# Format question based on attribute type
if best_attr == "gender":
question = "Is the character male? (yes/no)"
response = input(f"Q{question_count + 1}: {question} ").strip().lower()
value = "M" if response in ["yes", "y"] else "F"
elif best_attr in [
"eyewear",
"headgear",
"facial_hair",
"highlights",
"earrings",
]:
attr_name = best_attr.replace("_", " ")
question = f"Does the character have {attr_name}? (yes/no)"
response = input(f"Q{question_count + 1}: {question} ").strip().lower()
value = True if response in ["yes", "y"] else False
else:
question = f"What is the character's {best_attr}?"
print(f"Q{question_count + 1}: {question}")
print(f" Options: {', '.join(str(v) for v in values)}")
value = input(" Your answer: ").strip().lower()
# Filter remaining characters
remaining = [char for char in remaining if char[best_attr] == value]
used_attributes.add(best_attr)
question_count += 1
print(f" → {len(remaining)} character(s) remaining")
if len(remaining) <= 3:
names = [c["name"] for c in remaining]
print(f" → Candidates: {', '.join(names)}")
if len(remaining) == 1:
print(f"\n{'=' * 70}")
print(f"✓ IDENTIFIED: {remaining[0]['name']} (in {question_count} questions)")
print(f"{'=' * 70}")
return remaining[0]["name"]
elif len(remaining) == 0:
print("\n✗ No matching character found. Please check your answers.")
else:
print(f"\n✓ Narrowed to: {', '.join(c['name'] for c in remaining)}")
def calculate_average_depth():
"""Calculate theoretical average decision depth for optimal tree"""
total_depth = 0
for target in CHARACTERS:
remaining = CHARACTERS.copy()
used = set()
depth = 0
while len(remaining) > 1 and depth < 10:
best_attr, _ = find_best_attribute(remaining, used)
if best_attr is None:
break
target_value = target[best_attr]
remaining = [c for c in remaining if c[best_attr] == target_value]
used.add(best_attr)
depth += 1
total_depth += depth
avg_depth = total_depth / len(CHARACTERS)
optimal_depth = math.log2(len(CHARACTERS))
print(f"\n{'=' * 70}")
print("DECISION TREE PERFORMANCE METRICS")
print(f"{'=' * 70}")
print(f"Total characters: {len(CHARACTERS)}")
print(f"Theoretical optimal depth: {optimal_depth:.2f} questions")
print(f"Actual average depth: {avg_depth:.2f} questions")
print(f"Efficiency: {(optimal_depth / avg_depth) * 100:.1f}%")
print(f"{'=' * 70}\n")
if __name__ == "__main__":
print("\nINFORMATION-THEORETIC GUESS WHO SOLVER\n")
# Phase 1: Sample analysis
analyze_sample_characters()
# Phase 2: Performance metrics
calculate_average_depth()
# Phase 3: Interactive identification
while True:
choice = (
input("\nWould you like to identify a character? (yes/no): ")
.strip()
.lower()
)
if choice in ["yes", "y"]:
identify_character_optimal()
else:
print("\nThanks for playing!")
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment