Created
July 7, 2025 08:04
-
-
Save EatZeBaby/46dd6249086c4475241d79004620ff55 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import numpy as np | |
from faker import Faker | |
from datetime import datetime, timedelta | |
from faker import Faker | |
import random | |
from faker.providers import BaseProvider | |
cost_variability = { | |
"hourly_rate_range": (35, 65), # Agent experience levels | |
"peak_hour_multiplier": { | |
"morning": 1.25, # 8-11am | |
"afternoon": 1.0, # 11am-3pm | |
"evening": 1.15, # 3-7pm | |
"night": 1.4 # 7pm-8am | |
}, | |
"complexity_factors": { | |
"Hardware": (0.8, 2.0), # (min, max) multiplier | |
"Software": (0.9, 1.5), | |
"Service": (0.7, 1.8), | |
"Training": (0.4, 1.3), | |
"Other": (0.2, 1.2), | |
}, | |
"overtime_penalty": 1.5, # 50% extra after 8h | |
"weekend_surcharge": 1.3 # 30% weekend premium | |
} | |
# Progressive AI Performance Degradation Configuration (for billing cases only) | |
progressive_ai_degradation = { | |
"april_2025": { | |
"resolution_time_multiplier": 1.80, # 33% increase | |
"ai_acceptance_drop": 0.15, # 15% drop in acceptance | |
"first_resolution_drop": 0.20 # 20% drop in FTR | |
}, | |
"may_2025": { | |
"resolution_time_multiplier": 3.5, # 80% increase | |
"ai_acceptance_drop": 0.25, # 25% drop in acceptance | |
"first_resolution_drop": 0.35 # 35% drop in FTR | |
} | |
} | |
# Add to your existing configuration | |
product_impacts = { | |
"Hardware": { | |
"sla_penalty_multiplier": 1.5, # 50% higher penalties | |
"upsell_probability": 0.15, | |
"clv_base": 7000, | |
"clv_std": 2000, | |
"cost_multiplier": 1.2 # 20% higher resolution costs | |
}, | |
"Software": { | |
"sla_penalty_multiplier": 1.0, | |
"upsell_probability": 0.25, | |
"clv_base": 5000, | |
"clv_std": 1500, | |
"cost_multiplier": 1.0 | |
}, | |
"Service": { | |
"sla_penalty_multiplier": 0.8, # 20% lower penalties | |
"upsell_probability": 0.35, | |
"clv_base": 10000, | |
"clv_std": 3000, | |
"cost_multiplier": 0.9 | |
}, | |
"Training": { | |
"sla_penalty_multiplier": 0.7, # 20% lower penalties | |
"upsell_probability": 0.45, | |
"clv_base": 10000, | |
"clv_std": 3000, | |
"cost_multiplier": 0.9 | |
}, | |
"Other": { | |
"sla_penalty_multiplier": 1, # 20% lower penalties | |
"upsell_probability": 0.5, | |
"clv_base": 10000, | |
"clv_std": 3000, | |
"cost_multiplier": 1 | |
} | |
} | |
# List of European countries with their approximate populations (in millions) | |
european_countries = [ | |
("Albania", 2.8), | |
("Andorra", 0.077), | |
("Armenia", 3.0), | |
("Austria", 8.9), | |
("Azerbaijan", 10.0), | |
("Belarus", 9.4), | |
("Belgium", 11.5), | |
("Bosnia and Herzegovina", 3.3), | |
("Bulgaria", 6.9), | |
("Croatia", 4.0), | |
("Cyprus", 1.2), | |
("Czech Republic", 10.7), | |
("Denmark", 5.8), | |
("Estonia", 1.3), | |
("Finland", 5.5), | |
("France", 67.4), | |
("Australia", 26.4), | |
("Canada", 40.1), | |
("Japan", 124.1), | |
("United States",340), | |
("Georgia", 3.7), | |
("Germany", 83.2), | |
("Greece", 10.4), | |
("Hungary", 9.7), | |
("Iceland", 0.36), | |
("Ireland", 4.9), | |
("Italy", 59.3), | |
("Kazakhstan", 18.8), | |
("Kosovo", 1.8), | |
("Latvia", 1.9), | |
("Liechtenstein", 0.038), | |
("Lithuania", 2.8), | |
("Luxembourg", 0.63), | |
("Malta", 0.51), | |
("Moldova", 2.6), | |
("Monaco", 0.039), | |
("Montenegro", 0.62), | |
("Netherlands", 17.5), | |
("North Macedonia", 2.1), | |
("Norway", 5.4), | |
("Poland", 37.9), | |
("Portugal", 10.3), | |
("Romania", 19.2), | |
#("Russia", 144.1), | |
("San Marino", 0.034), | |
("Serbia", 6.7), | |
("Slovakia", 5.4), | |
("Slovenia", 2.1), | |
("Spain", 47.3), | |
("Sweden", 10.3), | |
("Switzerland", 8.6), | |
("Turkey", 84.3), | |
("Ukraine", 41.4), | |
("United Kingdom", 67.2), | |
("Vatican City", 0.0008) | |
] | |
# Extract countries and populations | |
countries, populations = zip(*european_countries) | |
# Custom provider for generating European countries based on population distribution | |
class EuropeanCountryProvider(BaseProvider): | |
def european_country(self): | |
return random.choices(countries, weights=populations, k=1)[0] | |
# Initialize Faker | |
fake = Faker() | |
# Add the custom provider to Faker | |
fake.add_provider(EuropeanCountryProvider) | |
np.random.seed(42) | |
# --- Configuration --- | |
n_tickets = 24472 | |
ai_ratio = 0.7 # 70% AI-assisted tickets | |
n_agents = 16 # 8 AI, 8 human | |
premium_ratio = 0.3 # 30% Premium support tickets | |
base_hourly_cost = 45 # Average agent hourly rate | |
ai_cost_multiplier = 0.4 # AI is 60% cheaper to operate | |
premium_upsell_rate = 0.25 # 25% upsell chance on premium tickets | |
# NEW: AI Performance Degradation Configuration | |
ai_performance_degradation = { | |
"ai_acceptance_drop": 0.15, # 15% drop in AI suggestion acceptance | |
"billing_csat_impact": -0.8, # 0.8 point drop for billing cases | |
"time_resolution_increase": 1.25, # 25% increase in resolution time | |
"first_resolution_drop": 0.20 # 20% drop in first-time resolution rate | |
} | |
# --- Create Agents --- | |
ai_agents = [fake.unique.name() + " (AI)" for _ in range(n_agents//2)] | |
human_agents = [fake.unique.name() for _ in range(n_agents//2)] | |
all_agents = ai_agents + human_agents | |
# --- Generate Unified Dataset --- | |
data = { | |
"status": [], | |
"ticket_id": [], | |
"priority": [], | |
"source": [], | |
"topic": [], | |
"agent_group": [], | |
"agent_name": [], | |
"created_time": [], | |
"expected_sla_to_resolve": [], | |
"expected_sla_to_first_response": [], | |
"first_response_time": [], | |
"sla_for_first_response": [], | |
"resolution_time": [], | |
"sla_for_resolution": [], | |
"close_time": [], | |
"agent_interactions": [], # This will store interaction quality scores | |
"interaction_notes": [], # New: Textual notes | |
"survey_results": [], | |
"product_group": [], | |
"support_level": [], | |
"country": [], | |
"latitude": [], | |
"longitude": [], | |
"call_transcript": [], | |
"compliance_greeting": [], | |
"compliance_farewell": [], | |
"compliance_data_leak": [], | |
"call_sentiment_score": [], | |
"compliance_score": [], | |
# NEW FIELDS | |
"ai_suggestion_accepted": [], # Only for AI-assisted agents | |
"first_time_resolution": [], # Boolean: resolved on first contact | |
"csat_score": [], # Customer satisfaction score (1-10) | |
"resolution_attempt_number": [] # How many attempts to resolve (1 = first time) | |
} | |
# Quality note templates | |
good_notes = [ | |
"Used AI suggestions to quickly identify root cause", | |
"Automated troubleshooting steps applied", | |
"Knowledge base article referenced for standard solution", | |
"Escalated to specialist with detailed context", | |
"Proactive follow-up scheduled" | |
] | |
bad_notes = [ | |
"Customer required multiple explanations", | |
"Had to consult supervisor for guidance", | |
"Solution required manual research", | |
"Back-and-forth communication needed", | |
"Documentation was unclear" | |
] | |
# Add to your existing configuration | |
compliance_rules = { | |
"greeting": ["Hello", "Good [morning/afternoon]", "Thank you for calling"], | |
"farewell": ["Have a nice day", "Thank you for your time"], | |
"data_protection_phrases": ["Let me verify your identity", "For security purposes"], | |
"forbidden_phrases": ["I'm angry", "That's stupid", "I don't care"] | |
} | |
def generate_transcript(is_ai): | |
"""Generate synthetic call transcript with compliance markers""" | |
transcript = [] | |
# --- Greeting --- | |
if is_ai or np.random.rand() < 0.8: # AI has 95% compliance | |
transcript.append(np.random.choice([ | |
"Hello! Thank you for calling [Insurance Co]. How can I help you today?", | |
"Good morning! You've reached [Insurance Co]. May I have your policy number?" | |
])) | |
else: | |
if np.random.rand() < 0.2: # 20% non-compliant greeting | |
transcript.append("Yeah, what do you need?") | |
else: | |
transcript.append("Hi, [Insurance Co], policy number?") | |
# --- Main Conversation --- | |
transcript.append(f"Customer: {fake.sentence()}") | |
# AI agents reference knowledge base | |
if is_ai: | |
transcript.append("System: Suggested response - 'Let me verify that in our system'") | |
transcript.append("Agent: Let me verify that in our system...") | |
else: | |
transcript.append("Agent: Umm, let me check...") | |
# --- Data Protection --- | |
if is_ai or np.random.rand() < 0.9: | |
transcript.append("Agent: Could you confirm the last 4 digits of your SSN for verification?") | |
else: | |
if np.random.rand() < 0.1: # 10% data leak risk | |
transcript.append("Agent: So your full SSN is 123-45-6789 right?") | |
# --- Sentiment Control --- | |
if not is_ai and np.random.rand() < 0.15: # 15% frustration risk | |
transcript.append("Agent: *sigh* I already explained this...") | |
# --- Farewell --- | |
if is_ai or np.random.rand() < 0.85: | |
transcript.append(np.random.choice([ | |
"Thank you for calling [Insurance Co]. Have a wonderful day!", | |
"We appreciate your business. Goodbye!" | |
])) | |
else: | |
transcript.append("Bye") | |
return "\n".join(transcript) | |
def calculate_financials(row): | |
product = row['product_group'] | |
params = product_impacts[product] | |
# Operational Costs | |
base_cost = base_hourly_cost * (row['resolution_time']/60) | |
if row['agent_group'] == "AI-Assisted": | |
cost_multiplier = ai_cost_multiplier | |
else: | |
cost_multiplier = 1.0 | |
cost_per_ticket = base_cost * cost_multiplier * params['cost_multiplier'] | |
# SLA Penalties | |
if row['sla_for_resolution'] == "Breached SLA": | |
base_penalty = 150 if row['priority'] == "High" else 75 | |
sla_penalty = base_penalty * params['sla_penalty_multiplier'] | |
else: | |
sla_penalty = 0 | |
# Upsell Revenue | |
if row['support_level'] == "Premium" and np.random.rand() < params['upsell_probability']: | |
upsell_revenue = np.random.choice([99, 199, 299]) * (1.5 if product == "Service" else 1.0) | |
else: | |
upsell_revenue = 0 | |
# CLV Calculations | |
clv_base = np.random.normal(params['clv_base'], params['clv_std']) | |
clv_risk = clv_base * (0.02 if row['survey_results'] < 3 else 0) * (1.2 if product == "Service" else 1.0) | |
# --- Dynamic Hourly Rate --- | |
agent_exp = hash(row['agent_name']) % 10 # 0-9 experience level | |
base_rate = np.linspace(*cost_variability["hourly_rate_range"], 10)[agent_exp] | |
# --- Time-of-Day Premium --- | |
hour = row['created_time'].hour | |
if 8 <= hour < 11: | |
time_mult = cost_variability["peak_hour_multiplier"]["morning"] | |
elif 11 <= hour < 15: | |
time_mult = 1.0 | |
elif 15 <= hour < 19: | |
time_mult = cost_variability["peak_hour_multiplier"]["evening"] | |
else: | |
time_mult = cost_variability["peak_hour_multiplier"]["night"] | |
# --- Product Complexity --- | |
product = row['product_group'] | |
complexity = np.random.uniform(*cost_variability["complexity_factors"][product]) | |
# --- Weekend/Overtime Costs --- | |
is_weekend = row['created_time'].weekday() >= 5 | |
overtime = 1.0 + max(0, row['resolution_time'] - 8)/24 * cost_variability["overtime_penalty"] | |
# --- AI Cost Logic --- | |
if row['agent_group'] == "AI-Assisted": | |
cost_mult = ai_cost_multiplier * (0.9 + np.random.normal(0, 0.1)) # 10% AI variance | |
else: | |
cost_mult = 1.0 * (0.8 + np.random.normal(0, 0.2)) # 20% human variance | |
# --- Final Calculation --- | |
raw_cost = base_rate * (row['resolution_time']/60) | |
final_cost = raw_cost * time_mult * complexity * overtime * cost_mult | |
if is_weekend: | |
final_cost *= cost_variability["weekend_surcharge"] | |
return pd.Series([ | |
final_cost, | |
sla_penalty, | |
upsell_revenue, | |
clv_base, | |
clv_risk | |
]) | |
# NEW: Function to determine if issue is resolved on first attempt | |
def calculate_first_time_resolution(topic, agent_group, ai_accepted=None, created_time=None): | |
"""Calculate if ticket is resolved on first contact""" | |
base_ftr_rate = 0.75 # 75% baseline first-time resolution | |
# Topic impact on FTR | |
topic_multipliers = { | |
"Feature request": 0.9, | |
"Bug report": 0.8, | |
"Other": 0.85, | |
"Billing Cases": 0.8, # Normal performance before April 2025 | |
"Pricing and licensing": 0.7 | |
} | |
ftr_prob = base_ftr_rate * topic_multipliers.get(topic, 0.8) | |
# AI assistance impact | |
if agent_group == "AI-Assisted": | |
if ai_accepted: | |
ftr_prob *= 1.1 # 10% boost when AI accepted | |
else: | |
ftr_prob *= 0.8 # 20% penalty when AI rejected | |
# Apply progressive performance degradation ONLY to billing cases | |
if topic == "Billing Cases" and created_time and agent_group == "AI-Assisted": | |
if created_time >= datetime(2025, 5, 1): # May 2025 | |
ftr_prob *= (1 - progressive_ai_degradation["may_2025"]["first_resolution_drop"]) | |
elif created_time >= datetime(2025, 4, 1): # April 2025 | |
ftr_prob *= (1 - progressive_ai_degradation["april_2025"]["first_resolution_drop"]) | |
return np.random.rand() < ftr_prob | |
# NEW: Function to calculate CSAT score | |
def calculate_csat_score(topic, first_time_resolution, agent_group, survey_results, created_time=None): | |
"""Calculate Customer Satisfaction (CSAT) score 1-10""" | |
base_csat = 7.2 # Average CSAT score | |
# Topic-specific impact (billing cases hit hard ONLY from April 2025) | |
if topic == "Billing Cases" and created_time and created_time >= datetime(2025, 4, 1): | |
base_csat += ai_performance_degradation["billing_csat_impact"] | |
# First-time resolution bonus | |
if first_time_resolution: | |
base_csat += 0.8 | |
else: | |
base_csat -= 0.5 | |
# Agent type impact | |
if agent_group == "AI-Assisted": | |
base_csat += 0.3 # Slight AI advantage | |
# Correlation with survey results | |
csat = base_csat + (survey_results - 3) * 0.4 | |
# Add some randomness and constrain to 1-10 | |
csat += np.random.normal(0, 0.5) | |
return np.clip(csat, 1, 10) | |
for i in range(n_tickets): | |
# --- Common Fields --- | |
ticket_id = f"{1000+i:04d}" | |
created_time = fake.date_time_between(start_date=datetime(2024,6,1), end_date=datetime(2025,5,30)) | |
is_premium = np.random.rand() < premium_ratio | |
# Topic selection (keeping original distribution) | |
topic = np.random.choice(["Feature request","Bug report","Other","Billing Cases","Pricing and licensing"]) | |
# Assign agent | |
if np.random.rand() < ai_ratio: | |
agent_name = np.random.choice(ai_agents) | |
agent_group = "AI-Assisted" | |
else: | |
agent_name = np.random.choice(human_agents) | |
agent_group = "Human Only" | |
is_ai = agent_group == "AI-Assisted" | |
# NEW: AI Suggestion Acceptance (only for AI-assisted agents) | |
if is_ai: | |
base_acceptance_rate = 0.85 # 85% baseline acceptance | |
# Apply progressive degradation ONLY to billing cases | |
if topic == "Billing Cases": | |
if created_time >= datetime(2025, 5, 1): # May 2025 | |
degraded_acceptance_rate = base_acceptance_rate * (1 - progressive_ai_degradation["may_2025"]["ai_acceptance_drop"]) | |
elif created_time >= datetime(2025, 4, 1): # April 2025 | |
degraded_acceptance_rate = base_acceptance_rate * (1 - progressive_ai_degradation["april_2025"]["ai_acceptance_drop"]) | |
else: | |
degraded_acceptance_rate = base_acceptance_rate # Normal performance before April 2025 | |
else: | |
degraded_acceptance_rate = base_acceptance_rate # Normal performance for non-billing cases | |
ai_suggestion_accepted = np.random.rand() < degraded_acceptance_rate | |
else: | |
ai_suggestion_accepted = None # N/A for human-only agents | |
# NEW: Calculate first-time resolution | |
first_time_resolution = calculate_first_time_resolution(topic, agent_group, ai_suggestion_accepted, created_time) | |
resolution_attempts = 1 if first_time_resolution else np.random.choice([2, 3, 4], p=[0.6, 0.3, 0.1]) | |
# --- Performance Logic (UPDATED with time-based degradation) --- | |
# Resolution times - apply increase ONLY from April 2025 | |
if is_ai: | |
base_res_time = max(2, np.random.normal(3.5, 0.5)) | |
# Apply progressive performance degradation ONLY to billing cases | |
if topic == "Billing Cases": | |
if created_time >= datetime(2025, 5, 1): # May 2025 | |
base_res_time *= progressive_ai_degradation["may_2025"]["resolution_time_multiplier"] | |
elif created_time >= datetime(2025, 4, 1): # April 2025 | |
base_res_time *= progressive_ai_degradation["april_2025"]["resolution_time_multiplier"] | |
# Before April 2025: no degradation | |
# Factor in AI acceptance | |
if ai_suggestion_accepted: | |
base_res_time *= 0.9 # 10% faster when AI accepted | |
else: | |
base_res_time *= 1.3 # 30% slower when AI rejected | |
else: | |
base_res_time = max(2, np.random.normal(6, 1)) | |
# Multiple attempts increase resolution time | |
if not first_time_resolution: | |
base_res_time *= resolution_attempts * 0.8 | |
if is_premium: | |
res_time = base_res_time * 0.6 | |
support_level = "Premium" | |
else: | |
res_time = base_res_time | |
support_level = "Basic" | |
# First response time | |
frt = max(0.1, np.random.normal(0.5, 0.2)) if is_ai else max(0.5, np.random.normal(2, 0.5)) | |
# --- Interaction Quality --- | |
if is_ai and is_premium: | |
# Best notes (AI + Premium) | |
note_quality = np.random.choice([4, 5]) # 4-5/5 | |
notes = np.random.choice(good_notes, size=2, replace=False) | |
elif is_ai: | |
# Good notes (AI only) - but factor in acceptance | |
if ai_suggestion_accepted: | |
note_quality = np.random.choice([3, 4]) # 3-4/5 | |
notes = [np.random.choice(good_notes)] | |
else: | |
note_quality = np.random.choice([2, 3]) # 2-3/5 when AI rejected | |
notes = [np.random.choice(bad_notes)] | |
elif is_premium: | |
# Medium notes (Premium only) | |
note_quality = np.random.choice([2, 3]) # 2-3/5 | |
notes = [np.random.choice(good_notes + bad_notes)] | |
else: | |
# Poor notes (Human + Basic) | |
note_quality = np.random.choice([1, 2]) # 1-2/5 | |
notes = np.random.choice(bad_notes, size=2, replace=False) | |
# Format notes | |
formatted_notes = " | ".join(notes) | |
# Other metrics | |
survey = np.random.randint(3, 6) if is_ai else np.random.randint(1, 5) | |
# NEW: Calculate CSAT score | |
csat_score = calculate_csat_score(topic, first_time_resolution, agent_group, survey, created_time) | |
# --- Populate Data --- | |
data["ticket_id"].append(ticket_id) | |
data["status"].append(np.random.choice(["Resolved","Closed","In progress","Open"], p=[0.3, 0.2, 0.4,0.1])) | |
data["priority"].append(np.random.choice(["Low", "Medium", "High"], p=[0.5, 0.3, 0.2])) | |
data["source"].append(np.random.choice(["Phone", "Email", "Chat"],p=[0.15, 0.55, 0.3])) | |
data["topic"].append(topic) | |
data["agent_group"].append(agent_group) | |
data["agent_name"].append(agent_name) | |
data["created_time"].append(created_time) | |
data["expected_sla_to_resolve"].append(np.random.randint(4, 24)) | |
data["expected_sla_to_first_response"].append(np.random.randint(1, 4)) | |
data["first_response_time"].append(frt) | |
data["sla_for_first_response"].append("Within SLA" if frt <= data["expected_sla_to_first_response"][-1] else "Breached SLA") | |
data["resolution_time"].append(res_time) | |
data["sla_for_resolution"].append("Within SLA" if res_time <= data["expected_sla_to_resolve"][-1] else "Breached SLA") | |
data["close_time"].append(created_time + timedelta(hours=res_time)) | |
data["agent_interactions"].append(note_quality) # Quality score 1-5 | |
data["interaction_notes"].append(formatted_notes) # Text notes | |
data["survey_results"].append(survey) | |
data["product_group"].append(np.random.choice(["Hardware", "Software", "Training","Service","Other"],p=[0.1, 0.15, 0.45, 0.25, 0.05])) | |
data["support_level"].append(support_level) | |
data["country"].append(fake.european_country()) | |
data["latitude"].append(fake.latitude()) | |
data["longitude"].append(fake.longitude()) | |
# NEW FIELDS | |
data["ai_suggestion_accepted"].append(ai_suggestion_accepted) | |
data["first_time_resolution"].append(first_time_resolution) | |
data["csat_score"].append(round(csat_score, 1)) | |
data["resolution_attempt_number"].append(resolution_attempts) | |
transcript = generate_transcript(is_ai) | |
# AI Analysis Results | |
compliance_greeting = any(phrase in transcript for phrase in compliance_rules["greeting"]) | |
compliance_farewell = any(phrase in transcript for phrase in compliance_rules["farewell"]) | |
data_leak_risk = any(ssn in transcript for ssn in ["SSN", "social security"]) and not any( | |
phrase in transcript for phrase in compliance_rules["data_protection_phrases"]) | |
sentiment_score = np.clip(np.random.normal(0.8 if is_ai else 0.4, 0.2), 0, 1) # 0-1 scale | |
# Add to data dictionary | |
data["call_transcript"].append(transcript) | |
data["compliance_greeting"].append(compliance_greeting) | |
data["compliance_farewell"].append(compliance_farewell) | |
data["compliance_data_leak"].append(data_leak_risk) | |
data["call_sentiment_score"].append(sentiment_score) | |
data["compliance_score"].append( | |
0.3*compliance_greeting + | |
0.2*compliance_farewell + | |
0.4*(not data_leak_risk) + | |
0.1*sentiment_score | |
) | |
customer_review = pd.DataFrame(data) | |
customer_review[[ | |
'operational_cost', | |
'sla_penalty_cost', | |
'upsell_revenue', | |
'customer_clv', | |
'clv_risk' | |
]] = customer_review.apply(calculate_financials, axis=1) | |
# --- Add Churn Risk --- | |
customer_review['churn_risk'] = np.where( | |
(customer_review['survey_results'] < 3) & | |
(customer_review['sla_for_resolution'] == "Breached SLA"), | |
np.random.uniform(0.15, 0.4), # 15-40% churn risk | |
np.random.uniform(0.01, 0.1) # 1-10% baseline | |
) | |
# --- Update SLA Table --- | |
sla = customer_review[[ | |
"ticket_id", "expected_sla_to_resolve", "expected_sla_to_first_response", | |
"first_response_time", "sla_for_first_response", "resolution_time", | |
"sla_for_resolution", "survey_results", "sla_penalty_cost","clv_risk", | |
"interaction_notes", "first_time_resolution", "csat_score", "resolution_attempt_number" | |
]].copy() | |
# --- Split into Tables --- | |
tickets = customer_review[[ | |
"ticket_id", "status", "priority", "source", "topic", | |
"created_time", "close_time", "product_group", | |
"support_level", "country", "latitude", "longitude","operational_cost","call_transcript","compliance_greeting", | |
"compliance_data_leak", | |
"call_sentiment_score", "compliance_score", "csat_score", "first_time_resolution" | |
]].copy() | |
agents = customer_review[[ | |
"ticket_id", "agent_group", "agent_name", "agent_interactions", "ai_suggestion_accepted" | |
]].copy() | |
tickets["created_time"] = tickets["created_time"].astype(str) | |
tickets["close_time"] = tickets["close_time"].astype(str) | |
# --- Save to Parquet --- | |
customer_review.to_parquet("./datasets/customer_review_axr.parquet", engine="pyarrow") | |
tickets.to_parquet("./datasets/tickets_axr.parquet", engine="pyarrow") | |
sla.to_parquet("./datasets/sla_axr.parquet", engine="pyarrow") | |
agents.to_parquet("./datasets/agents_axr.parquet", engine="pyarrow") | |
# --- Verification --- | |
print("\nPerformance Metrics Summary:") | |
print("="*50) | |
print(f"\nPerformance Metrics Summary:") | |
print("="*50) | |
# Split metrics by time period for comparison | |
pre_april_2025 = customer_review[customer_review['created_time'] < datetime(2025, 4, 1)] | |
post_april_2025 = customer_review[customer_review['created_time'] >= datetime(2025, 4, 1)] | |
print(f"\nAI Suggestion Acceptance Rate:") | |
if len(pre_april_2025[pre_april_2025['agent_group'] == 'AI-Assisted']) > 0: | |
pre_acceptance = pre_april_2025[pre_april_2025['agent_group'] == 'AI-Assisted']['ai_suggestion_accepted'].mean() | |
print(f"Before April 2025: {pre_acceptance:.2%}") | |
if len(post_april_2025[post_april_2025['agent_group'] == 'AI-Assisted']) > 0: | |
post_acceptance = post_april_2025[post_april_2025['agent_group'] == 'AI-Assisted']['ai_suggestion_accepted'].mean() | |
print(f"April 2025 onwards: {post_acceptance:.2%}") | |
print(f"\nFirst-Time Resolution Rate:") | |
print("Before April 2025:") | |
if len(pre_april_2025) > 0: | |
print(pre_april_2025.groupby('agent_group')['first_time_resolution'].mean()) | |
print("April 2025 onwards:") | |
if len(post_april_2025) > 0: | |
print(post_april_2025.groupby('agent_group')['first_time_resolution'].mean()) | |
print(f"\nAverage Resolution Time by Agent Group:") | |
print("Before April 2025:") | |
if len(pre_april_2025) > 0: | |
print(pre_april_2025.groupby('agent_group')['resolution_time'].mean()) | |
print("April 2025 onwards:") | |
if len(post_april_2025) > 0: | |
print(post_april_2025.groupby('agent_group')['resolution_time'].mean()) | |
print(f"\nCSAT Score for Billing Cases:") | |
pre_billing = pre_april_2025[pre_april_2025['topic'] == 'Billing Cases'] | |
post_billing = post_april_2025[post_april_2025['topic'] == 'Billing Cases'] | |
if len(pre_billing) > 0: | |
print(f"Before April 2025: {pre_billing['csat_score'].mean():.1f}") | |
if len(post_billing) > 0: | |
print(f"April 2025 onwards: {post_billing['csat_score'].mean():.1f}") | |
print(f"\nOverall CSAT Score by Agent Group:") | |
print("Before April 2025:") | |
if len(pre_april_2025) > 0: | |
print(pre_april_2025.groupby('agent_group')['csat_score'].mean()) | |
print("April 2025 onwards:") | |
if len(post_april_2025) > 0: | |
print(post_april_2025.groupby('agent_group')['csat_score'].mean()) | |
print("\nInteraction Quality by Agent Type and Support Level:") | |
print(agents.merge(tickets[["ticket_id", "support_level"]], on="ticket_id") | |
.groupby(["agent_group", "support_level"]) | |
["agent_interactions"].mean()) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment