Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save EatZeBaby/46dd6249086c4475241d79004620ff55 to your computer and use it in GitHub Desktop.
Save EatZeBaby/46dd6249086c4475241d79004620ff55 to your computer and use it in GitHub Desktop.
import pandas as pd
import numpy as np
from faker import Faker
from datetime import datetime, timedelta
from faker import Faker
import random
from faker.providers import BaseProvider
cost_variability = {
"hourly_rate_range": (35, 65), # Agent experience levels
"peak_hour_multiplier": {
"morning": 1.25, # 8-11am
"afternoon": 1.0, # 11am-3pm
"evening": 1.15, # 3-7pm
"night": 1.4 # 7pm-8am
},
"complexity_factors": {
"Hardware": (0.8, 2.0), # (min, max) multiplier
"Software": (0.9, 1.5),
"Service": (0.7, 1.8),
"Training": (0.4, 1.3),
"Other": (0.2, 1.2),
},
"overtime_penalty": 1.5, # 50% extra after 8h
"weekend_surcharge": 1.3 # 30% weekend premium
}
# Progressive AI Performance Degradation Configuration (for billing cases only)
progressive_ai_degradation = {
"april_2025": {
"resolution_time_multiplier": 1.80, # 33% increase
"ai_acceptance_drop": 0.15, # 15% drop in acceptance
"first_resolution_drop": 0.20 # 20% drop in FTR
},
"may_2025": {
"resolution_time_multiplier": 3.5, # 80% increase
"ai_acceptance_drop": 0.25, # 25% drop in acceptance
"first_resolution_drop": 0.35 # 35% drop in FTR
}
}
# Add to your existing configuration
product_impacts = {
"Hardware": {
"sla_penalty_multiplier": 1.5, # 50% higher penalties
"upsell_probability": 0.15,
"clv_base": 7000,
"clv_std": 2000,
"cost_multiplier": 1.2 # 20% higher resolution costs
},
"Software": {
"sla_penalty_multiplier": 1.0,
"upsell_probability": 0.25,
"clv_base": 5000,
"clv_std": 1500,
"cost_multiplier": 1.0
},
"Service": {
"sla_penalty_multiplier": 0.8, # 20% lower penalties
"upsell_probability": 0.35,
"clv_base": 10000,
"clv_std": 3000,
"cost_multiplier": 0.9
},
"Training": {
"sla_penalty_multiplier": 0.7, # 20% lower penalties
"upsell_probability": 0.45,
"clv_base": 10000,
"clv_std": 3000,
"cost_multiplier": 0.9
},
"Other": {
"sla_penalty_multiplier": 1, # 20% lower penalties
"upsell_probability": 0.5,
"clv_base": 10000,
"clv_std": 3000,
"cost_multiplier": 1
}
}
# List of European countries with their approximate populations (in millions)
european_countries = [
("Albania", 2.8),
("Andorra", 0.077),
("Armenia", 3.0),
("Austria", 8.9),
("Azerbaijan", 10.0),
("Belarus", 9.4),
("Belgium", 11.5),
("Bosnia and Herzegovina", 3.3),
("Bulgaria", 6.9),
("Croatia", 4.0),
("Cyprus", 1.2),
("Czech Republic", 10.7),
("Denmark", 5.8),
("Estonia", 1.3),
("Finland", 5.5),
("France", 67.4),
("Australia", 26.4),
("Canada", 40.1),
("Japan", 124.1),
("United States",340),
("Georgia", 3.7),
("Germany", 83.2),
("Greece", 10.4),
("Hungary", 9.7),
("Iceland", 0.36),
("Ireland", 4.9),
("Italy", 59.3),
("Kazakhstan", 18.8),
("Kosovo", 1.8),
("Latvia", 1.9),
("Liechtenstein", 0.038),
("Lithuania", 2.8),
("Luxembourg", 0.63),
("Malta", 0.51),
("Moldova", 2.6),
("Monaco", 0.039),
("Montenegro", 0.62),
("Netherlands", 17.5),
("North Macedonia", 2.1),
("Norway", 5.4),
("Poland", 37.9),
("Portugal", 10.3),
("Romania", 19.2),
#("Russia", 144.1),
("San Marino", 0.034),
("Serbia", 6.7),
("Slovakia", 5.4),
("Slovenia", 2.1),
("Spain", 47.3),
("Sweden", 10.3),
("Switzerland", 8.6),
("Turkey", 84.3),
("Ukraine", 41.4),
("United Kingdom", 67.2),
("Vatican City", 0.0008)
]
# Extract countries and populations
countries, populations = zip(*european_countries)
# Custom provider for generating European countries based on population distribution
class EuropeanCountryProvider(BaseProvider):
def european_country(self):
return random.choices(countries, weights=populations, k=1)[0]
# Initialize Faker
fake = Faker()
# Add the custom provider to Faker
fake.add_provider(EuropeanCountryProvider)
np.random.seed(42)
# --- Configuration ---
n_tickets = 24472
ai_ratio = 0.7 # 70% AI-assisted tickets
n_agents = 16 # 8 AI, 8 human
premium_ratio = 0.3 # 30% Premium support tickets
base_hourly_cost = 45 # Average agent hourly rate
ai_cost_multiplier = 0.4 # AI is 60% cheaper to operate
premium_upsell_rate = 0.25 # 25% upsell chance on premium tickets
# NEW: AI Performance Degradation Configuration
ai_performance_degradation = {
"ai_acceptance_drop": 0.15, # 15% drop in AI suggestion acceptance
"billing_csat_impact": -0.8, # 0.8 point drop for billing cases
"time_resolution_increase": 1.25, # 25% increase in resolution time
"first_resolution_drop": 0.20 # 20% drop in first-time resolution rate
}
# --- Create Agents ---
ai_agents = [fake.unique.name() + " (AI)" for _ in range(n_agents//2)]
human_agents = [fake.unique.name() for _ in range(n_agents//2)]
all_agents = ai_agents + human_agents
# --- Generate Unified Dataset ---
data = {
"status": [],
"ticket_id": [],
"priority": [],
"source": [],
"topic": [],
"agent_group": [],
"agent_name": [],
"created_time": [],
"expected_sla_to_resolve": [],
"expected_sla_to_first_response": [],
"first_response_time": [],
"sla_for_first_response": [],
"resolution_time": [],
"sla_for_resolution": [],
"close_time": [],
"agent_interactions": [], # This will store interaction quality scores
"interaction_notes": [], # New: Textual notes
"survey_results": [],
"product_group": [],
"support_level": [],
"country": [],
"latitude": [],
"longitude": [],
"call_transcript": [],
"compliance_greeting": [],
"compliance_farewell": [],
"compliance_data_leak": [],
"call_sentiment_score": [],
"compliance_score": [],
# NEW FIELDS
"ai_suggestion_accepted": [], # Only for AI-assisted agents
"first_time_resolution": [], # Boolean: resolved on first contact
"csat_score": [], # Customer satisfaction score (1-10)
"resolution_attempt_number": [] # How many attempts to resolve (1 = first time)
}
# Quality note templates
good_notes = [
"Used AI suggestions to quickly identify root cause",
"Automated troubleshooting steps applied",
"Knowledge base article referenced for standard solution",
"Escalated to specialist with detailed context",
"Proactive follow-up scheduled"
]
bad_notes = [
"Customer required multiple explanations",
"Had to consult supervisor for guidance",
"Solution required manual research",
"Back-and-forth communication needed",
"Documentation was unclear"
]
# Add to your existing configuration
compliance_rules = {
"greeting": ["Hello", "Good [morning/afternoon]", "Thank you for calling"],
"farewell": ["Have a nice day", "Thank you for your time"],
"data_protection_phrases": ["Let me verify your identity", "For security purposes"],
"forbidden_phrases": ["I'm angry", "That's stupid", "I don't care"]
}
def generate_transcript(is_ai):
"""Generate synthetic call transcript with compliance markers"""
transcript = []
# --- Greeting ---
if is_ai or np.random.rand() < 0.8: # AI has 95% compliance
transcript.append(np.random.choice([
"Hello! Thank you for calling [Insurance Co]. How can I help you today?",
"Good morning! You've reached [Insurance Co]. May I have your policy number?"
]))
else:
if np.random.rand() < 0.2: # 20% non-compliant greeting
transcript.append("Yeah, what do you need?")
else:
transcript.append("Hi, [Insurance Co], policy number?")
# --- Main Conversation ---
transcript.append(f"Customer: {fake.sentence()}")
# AI agents reference knowledge base
if is_ai:
transcript.append("System: Suggested response - 'Let me verify that in our system'")
transcript.append("Agent: Let me verify that in our system...")
else:
transcript.append("Agent: Umm, let me check...")
# --- Data Protection ---
if is_ai or np.random.rand() < 0.9:
transcript.append("Agent: Could you confirm the last 4 digits of your SSN for verification?")
else:
if np.random.rand() < 0.1: # 10% data leak risk
transcript.append("Agent: So your full SSN is 123-45-6789 right?")
# --- Sentiment Control ---
if not is_ai and np.random.rand() < 0.15: # 15% frustration risk
transcript.append("Agent: *sigh* I already explained this...")
# --- Farewell ---
if is_ai or np.random.rand() < 0.85:
transcript.append(np.random.choice([
"Thank you for calling [Insurance Co]. Have a wonderful day!",
"We appreciate your business. Goodbye!"
]))
else:
transcript.append("Bye")
return "\n".join(transcript)
def calculate_financials(row):
product = row['product_group']
params = product_impacts[product]
# Operational Costs
base_cost = base_hourly_cost * (row['resolution_time']/60)
if row['agent_group'] == "AI-Assisted":
cost_multiplier = ai_cost_multiplier
else:
cost_multiplier = 1.0
cost_per_ticket = base_cost * cost_multiplier * params['cost_multiplier']
# SLA Penalties
if row['sla_for_resolution'] == "Breached SLA":
base_penalty = 150 if row['priority'] == "High" else 75
sla_penalty = base_penalty * params['sla_penalty_multiplier']
else:
sla_penalty = 0
# Upsell Revenue
if row['support_level'] == "Premium" and np.random.rand() < params['upsell_probability']:
upsell_revenue = np.random.choice([99, 199, 299]) * (1.5 if product == "Service" else 1.0)
else:
upsell_revenue = 0
# CLV Calculations
clv_base = np.random.normal(params['clv_base'], params['clv_std'])
clv_risk = clv_base * (0.02 if row['survey_results'] < 3 else 0) * (1.2 if product == "Service" else 1.0)
# --- Dynamic Hourly Rate ---
agent_exp = hash(row['agent_name']) % 10 # 0-9 experience level
base_rate = np.linspace(*cost_variability["hourly_rate_range"], 10)[agent_exp]
# --- Time-of-Day Premium ---
hour = row['created_time'].hour
if 8 <= hour < 11:
time_mult = cost_variability["peak_hour_multiplier"]["morning"]
elif 11 <= hour < 15:
time_mult = 1.0
elif 15 <= hour < 19:
time_mult = cost_variability["peak_hour_multiplier"]["evening"]
else:
time_mult = cost_variability["peak_hour_multiplier"]["night"]
# --- Product Complexity ---
product = row['product_group']
complexity = np.random.uniform(*cost_variability["complexity_factors"][product])
# --- Weekend/Overtime Costs ---
is_weekend = row['created_time'].weekday() >= 5
overtime = 1.0 + max(0, row['resolution_time'] - 8)/24 * cost_variability["overtime_penalty"]
# --- AI Cost Logic ---
if row['agent_group'] == "AI-Assisted":
cost_mult = ai_cost_multiplier * (0.9 + np.random.normal(0, 0.1)) # 10% AI variance
else:
cost_mult = 1.0 * (0.8 + np.random.normal(0, 0.2)) # 20% human variance
# --- Final Calculation ---
raw_cost = base_rate * (row['resolution_time']/60)
final_cost = raw_cost * time_mult * complexity * overtime * cost_mult
if is_weekend:
final_cost *= cost_variability["weekend_surcharge"]
return pd.Series([
final_cost,
sla_penalty,
upsell_revenue,
clv_base,
clv_risk
])
# NEW: Function to determine if issue is resolved on first attempt
def calculate_first_time_resolution(topic, agent_group, ai_accepted=None, created_time=None):
"""Calculate if ticket is resolved on first contact"""
base_ftr_rate = 0.75 # 75% baseline first-time resolution
# Topic impact on FTR
topic_multipliers = {
"Feature request": 0.9,
"Bug report": 0.8,
"Other": 0.85,
"Billing Cases": 0.8, # Normal performance before April 2025
"Pricing and licensing": 0.7
}
ftr_prob = base_ftr_rate * topic_multipliers.get(topic, 0.8)
# AI assistance impact
if agent_group == "AI-Assisted":
if ai_accepted:
ftr_prob *= 1.1 # 10% boost when AI accepted
else:
ftr_prob *= 0.8 # 20% penalty when AI rejected
# Apply progressive performance degradation ONLY to billing cases
if topic == "Billing Cases" and created_time and agent_group == "AI-Assisted":
if created_time >= datetime(2025, 5, 1): # May 2025
ftr_prob *= (1 - progressive_ai_degradation["may_2025"]["first_resolution_drop"])
elif created_time >= datetime(2025, 4, 1): # April 2025
ftr_prob *= (1 - progressive_ai_degradation["april_2025"]["first_resolution_drop"])
return np.random.rand() < ftr_prob
# NEW: Function to calculate CSAT score
def calculate_csat_score(topic, first_time_resolution, agent_group, survey_results, created_time=None):
"""Calculate Customer Satisfaction (CSAT) score 1-10"""
base_csat = 7.2 # Average CSAT score
# Topic-specific impact (billing cases hit hard ONLY from April 2025)
if topic == "Billing Cases" and created_time and created_time >= datetime(2025, 4, 1):
base_csat += ai_performance_degradation["billing_csat_impact"]
# First-time resolution bonus
if first_time_resolution:
base_csat += 0.8
else:
base_csat -= 0.5
# Agent type impact
if agent_group == "AI-Assisted":
base_csat += 0.3 # Slight AI advantage
# Correlation with survey results
csat = base_csat + (survey_results - 3) * 0.4
# Add some randomness and constrain to 1-10
csat += np.random.normal(0, 0.5)
return np.clip(csat, 1, 10)
for i in range(n_tickets):
# --- Common Fields ---
ticket_id = f"{1000+i:04d}"
created_time = fake.date_time_between(start_date=datetime(2024,6,1), end_date=datetime(2025,5,30))
is_premium = np.random.rand() < premium_ratio
# Topic selection (keeping original distribution)
topic = np.random.choice(["Feature request","Bug report","Other","Billing Cases","Pricing and licensing"])
# Assign agent
if np.random.rand() < ai_ratio:
agent_name = np.random.choice(ai_agents)
agent_group = "AI-Assisted"
else:
agent_name = np.random.choice(human_agents)
agent_group = "Human Only"
is_ai = agent_group == "AI-Assisted"
# NEW: AI Suggestion Acceptance (only for AI-assisted agents)
if is_ai:
base_acceptance_rate = 0.85 # 85% baseline acceptance
# Apply progressive degradation ONLY to billing cases
if topic == "Billing Cases":
if created_time >= datetime(2025, 5, 1): # May 2025
degraded_acceptance_rate = base_acceptance_rate * (1 - progressive_ai_degradation["may_2025"]["ai_acceptance_drop"])
elif created_time >= datetime(2025, 4, 1): # April 2025
degraded_acceptance_rate = base_acceptance_rate * (1 - progressive_ai_degradation["april_2025"]["ai_acceptance_drop"])
else:
degraded_acceptance_rate = base_acceptance_rate # Normal performance before April 2025
else:
degraded_acceptance_rate = base_acceptance_rate # Normal performance for non-billing cases
ai_suggestion_accepted = np.random.rand() < degraded_acceptance_rate
else:
ai_suggestion_accepted = None # N/A for human-only agents
# NEW: Calculate first-time resolution
first_time_resolution = calculate_first_time_resolution(topic, agent_group, ai_suggestion_accepted, created_time)
resolution_attempts = 1 if first_time_resolution else np.random.choice([2, 3, 4], p=[0.6, 0.3, 0.1])
# --- Performance Logic (UPDATED with time-based degradation) ---
# Resolution times - apply increase ONLY from April 2025
if is_ai:
base_res_time = max(2, np.random.normal(3.5, 0.5))
# Apply progressive performance degradation ONLY to billing cases
if topic == "Billing Cases":
if created_time >= datetime(2025, 5, 1): # May 2025
base_res_time *= progressive_ai_degradation["may_2025"]["resolution_time_multiplier"]
elif created_time >= datetime(2025, 4, 1): # April 2025
base_res_time *= progressive_ai_degradation["april_2025"]["resolution_time_multiplier"]
# Before April 2025: no degradation
# Factor in AI acceptance
if ai_suggestion_accepted:
base_res_time *= 0.9 # 10% faster when AI accepted
else:
base_res_time *= 1.3 # 30% slower when AI rejected
else:
base_res_time = max(2, np.random.normal(6, 1))
# Multiple attempts increase resolution time
if not first_time_resolution:
base_res_time *= resolution_attempts * 0.8
if is_premium:
res_time = base_res_time * 0.6
support_level = "Premium"
else:
res_time = base_res_time
support_level = "Basic"
# First response time
frt = max(0.1, np.random.normal(0.5, 0.2)) if is_ai else max(0.5, np.random.normal(2, 0.5))
# --- Interaction Quality ---
if is_ai and is_premium:
# Best notes (AI + Premium)
note_quality = np.random.choice([4, 5]) # 4-5/5
notes = np.random.choice(good_notes, size=2, replace=False)
elif is_ai:
# Good notes (AI only) - but factor in acceptance
if ai_suggestion_accepted:
note_quality = np.random.choice([3, 4]) # 3-4/5
notes = [np.random.choice(good_notes)]
else:
note_quality = np.random.choice([2, 3]) # 2-3/5 when AI rejected
notes = [np.random.choice(bad_notes)]
elif is_premium:
# Medium notes (Premium only)
note_quality = np.random.choice([2, 3]) # 2-3/5
notes = [np.random.choice(good_notes + bad_notes)]
else:
# Poor notes (Human + Basic)
note_quality = np.random.choice([1, 2]) # 1-2/5
notes = np.random.choice(bad_notes, size=2, replace=False)
# Format notes
formatted_notes = " | ".join(notes)
# Other metrics
survey = np.random.randint(3, 6) if is_ai else np.random.randint(1, 5)
# NEW: Calculate CSAT score
csat_score = calculate_csat_score(topic, first_time_resolution, agent_group, survey, created_time)
# --- Populate Data ---
data["ticket_id"].append(ticket_id)
data["status"].append(np.random.choice(["Resolved","Closed","In progress","Open"], p=[0.3, 0.2, 0.4,0.1]))
data["priority"].append(np.random.choice(["Low", "Medium", "High"], p=[0.5, 0.3, 0.2]))
data["source"].append(np.random.choice(["Phone", "Email", "Chat"],p=[0.15, 0.55, 0.3]))
data["topic"].append(topic)
data["agent_group"].append(agent_group)
data["agent_name"].append(agent_name)
data["created_time"].append(created_time)
data["expected_sla_to_resolve"].append(np.random.randint(4, 24))
data["expected_sla_to_first_response"].append(np.random.randint(1, 4))
data["first_response_time"].append(frt)
data["sla_for_first_response"].append("Within SLA" if frt <= data["expected_sla_to_first_response"][-1] else "Breached SLA")
data["resolution_time"].append(res_time)
data["sla_for_resolution"].append("Within SLA" if res_time <= data["expected_sla_to_resolve"][-1] else "Breached SLA")
data["close_time"].append(created_time + timedelta(hours=res_time))
data["agent_interactions"].append(note_quality) # Quality score 1-5
data["interaction_notes"].append(formatted_notes) # Text notes
data["survey_results"].append(survey)
data["product_group"].append(np.random.choice(["Hardware", "Software", "Training","Service","Other"],p=[0.1, 0.15, 0.45, 0.25, 0.05]))
data["support_level"].append(support_level)
data["country"].append(fake.european_country())
data["latitude"].append(fake.latitude())
data["longitude"].append(fake.longitude())
# NEW FIELDS
data["ai_suggestion_accepted"].append(ai_suggestion_accepted)
data["first_time_resolution"].append(first_time_resolution)
data["csat_score"].append(round(csat_score, 1))
data["resolution_attempt_number"].append(resolution_attempts)
transcript = generate_transcript(is_ai)
# AI Analysis Results
compliance_greeting = any(phrase in transcript for phrase in compliance_rules["greeting"])
compliance_farewell = any(phrase in transcript for phrase in compliance_rules["farewell"])
data_leak_risk = any(ssn in transcript for ssn in ["SSN", "social security"]) and not any(
phrase in transcript for phrase in compliance_rules["data_protection_phrases"])
sentiment_score = np.clip(np.random.normal(0.8 if is_ai else 0.4, 0.2), 0, 1) # 0-1 scale
# Add to data dictionary
data["call_transcript"].append(transcript)
data["compliance_greeting"].append(compliance_greeting)
data["compliance_farewell"].append(compliance_farewell)
data["compliance_data_leak"].append(data_leak_risk)
data["call_sentiment_score"].append(sentiment_score)
data["compliance_score"].append(
0.3*compliance_greeting +
0.2*compliance_farewell +
0.4*(not data_leak_risk) +
0.1*sentiment_score
)
customer_review = pd.DataFrame(data)
customer_review[[
'operational_cost',
'sla_penalty_cost',
'upsell_revenue',
'customer_clv',
'clv_risk'
]] = customer_review.apply(calculate_financials, axis=1)
# --- Add Churn Risk ---
customer_review['churn_risk'] = np.where(
(customer_review['survey_results'] < 3) &
(customer_review['sla_for_resolution'] == "Breached SLA"),
np.random.uniform(0.15, 0.4), # 15-40% churn risk
np.random.uniform(0.01, 0.1) # 1-10% baseline
)
# --- Update SLA Table ---
sla = customer_review[[
"ticket_id", "expected_sla_to_resolve", "expected_sla_to_first_response",
"first_response_time", "sla_for_first_response", "resolution_time",
"sla_for_resolution", "survey_results", "sla_penalty_cost","clv_risk",
"interaction_notes", "first_time_resolution", "csat_score", "resolution_attempt_number"
]].copy()
# --- Split into Tables ---
tickets = customer_review[[
"ticket_id", "status", "priority", "source", "topic",
"created_time", "close_time", "product_group",
"support_level", "country", "latitude", "longitude","operational_cost","call_transcript","compliance_greeting",
"compliance_data_leak",
"call_sentiment_score", "compliance_score", "csat_score", "first_time_resolution"
]].copy()
agents = customer_review[[
"ticket_id", "agent_group", "agent_name", "agent_interactions", "ai_suggestion_accepted"
]].copy()
tickets["created_time"] = tickets["created_time"].astype(str)
tickets["close_time"] = tickets["close_time"].astype(str)
# --- Save to Parquet ---
customer_review.to_parquet("./datasets/customer_review_axr.parquet", engine="pyarrow")
tickets.to_parquet("./datasets/tickets_axr.parquet", engine="pyarrow")
sla.to_parquet("./datasets/sla_axr.parquet", engine="pyarrow")
agents.to_parquet("./datasets/agents_axr.parquet", engine="pyarrow")
# --- Verification ---
print("\nPerformance Metrics Summary:")
print("="*50)
print(f"\nPerformance Metrics Summary:")
print("="*50)
# Split metrics by time period for comparison
pre_april_2025 = customer_review[customer_review['created_time'] < datetime(2025, 4, 1)]
post_april_2025 = customer_review[customer_review['created_time'] >= datetime(2025, 4, 1)]
print(f"\nAI Suggestion Acceptance Rate:")
if len(pre_april_2025[pre_april_2025['agent_group'] == 'AI-Assisted']) > 0:
pre_acceptance = pre_april_2025[pre_april_2025['agent_group'] == 'AI-Assisted']['ai_suggestion_accepted'].mean()
print(f"Before April 2025: {pre_acceptance:.2%}")
if len(post_april_2025[post_april_2025['agent_group'] == 'AI-Assisted']) > 0:
post_acceptance = post_april_2025[post_april_2025['agent_group'] == 'AI-Assisted']['ai_suggestion_accepted'].mean()
print(f"April 2025 onwards: {post_acceptance:.2%}")
print(f"\nFirst-Time Resolution Rate:")
print("Before April 2025:")
if len(pre_april_2025) > 0:
print(pre_april_2025.groupby('agent_group')['first_time_resolution'].mean())
print("April 2025 onwards:")
if len(post_april_2025) > 0:
print(post_april_2025.groupby('agent_group')['first_time_resolution'].mean())
print(f"\nAverage Resolution Time by Agent Group:")
print("Before April 2025:")
if len(pre_april_2025) > 0:
print(pre_april_2025.groupby('agent_group')['resolution_time'].mean())
print("April 2025 onwards:")
if len(post_april_2025) > 0:
print(post_april_2025.groupby('agent_group')['resolution_time'].mean())
print(f"\nCSAT Score for Billing Cases:")
pre_billing = pre_april_2025[pre_april_2025['topic'] == 'Billing Cases']
post_billing = post_april_2025[post_april_2025['topic'] == 'Billing Cases']
if len(pre_billing) > 0:
print(f"Before April 2025: {pre_billing['csat_score'].mean():.1f}")
if len(post_billing) > 0:
print(f"April 2025 onwards: {post_billing['csat_score'].mean():.1f}")
print(f"\nOverall CSAT Score by Agent Group:")
print("Before April 2025:")
if len(pre_april_2025) > 0:
print(pre_april_2025.groupby('agent_group')['csat_score'].mean())
print("April 2025 onwards:")
if len(post_april_2025) > 0:
print(post_april_2025.groupby('agent_group')['csat_score'].mean())
print("\nInteraction Quality by Agent Type and Support Level:")
print(agents.merge(tickets[["ticket_id", "support_level"]], on="ticket_id")
.groupby(["agent_group", "support_level"])
["agent_interactions"].mean())
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment