Created
November 11, 2025 13:56
-
-
Save janduplessis883/1f7d3486b1402bafc7a5194b83e3fe2d to your computer and use it in GitHub Desktop.
SetFit Synthetic Dataset
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Healthcare Feedback Dataset for SetFit | |
| # 90 examples (15 per class) - suitable for few-shot learning | |
| # Optimized label names for ML | |
| LABEL_MAPPING = { | |
| 0: "access_availability", | |
| 1: "information_provision", | |
| 2: "privacy_confidentiality", | |
| 3: "continuity_care", | |
| 4: "clinical_communication", | |
| 5: "admin_communication" | |
| } | |
| # Training dataset | |
| healthcare_dataset = [ | |
| # access_availability (0) | |
| {"text": "Tried to book an appointment but earliest slot was 3 weeks away", "label": 0}, | |
| {"text": "Phoned at 9am and got a same-day appointment - brilliant service", "label": 0}, | |
| {"text": "Parking is a nightmare at this clinic, always full and no disabled spaces", "label": 0}, | |
| {"text": "The wheelchair ramp has been broken for weeks with no signage", "label": 0}, | |
| {"text": "Walk-in hours are very convenient for working professionals", "label": 0}, | |
| {"text": "Waited 45 minutes past my scheduled appointment time", "label": 0}, | |
| {"text": "Online booking system is intuitive and available 24/7", "label": 0}, | |
| {"text": "They close at 5pm which is impossible for those with full-time jobs", "label": 0}, | |
| {"text": "Receptionist always finds me an urgent slot when my condition flares up", "label": 0}, | |
| {"text": "Phone line is constantly engaged, took 20 attempts to get through", "label": 0}, | |
| {"text": "Telemedicine option saved me a 2-hour round trip", "label": 0}, | |
| {"text": "Building is impossible to find with poor signage from the main road", "label": 0}, | |
| {"text": "The triage system works well for urgent cases but routine waits are 4+ weeks", "label": 0}, | |
| {"text": "Weekend and evening appointments make managing care so much easier", "label": 0}, | |
| {"text": "Waiting area is overcrowded and uncomfortable for long waits", "label": 0}, | |
| # information_provision (1) | |
| {"text": "Doctor took time to explain my diagnosis using diagrams I could understand", "label": 1}, | |
| {"text": "Never received my test results, had to chase the clinic three times", "label": 1}, | |
| {"text": "The leaflet about diabetes management was comprehensive but I needed more personalization", "label": 1}, | |
| {"text": "Nurse explained the vaccination process step-by-step before proceeding", "label": 1}, | |
| {"text": "Left the appointment more confused than when I arrived", "label": 1}, | |
| {"text": "Website has excellent resources about managing chronic conditions", "label": 1}, | |
| {"text": "Doctor used excessive medical jargon without translating for me", "label": 1}, | |
| {"text": "Received clear written instructions for my new medication dosage", "label": 1}, | |
| {"text": "No information provided about potential side effects of treatment", "label": 1}, | |
| {"text": "The dietitian gave me practical nutrition advice I could actually use", "label": 1}, | |
| {"text": "Wish they'd explain what alternative treatment options exist", "label": 1}, | |
| {"text": "Health promotion posters in waiting area are genuinely informative", "label": 1}, | |
| {"text": "They sent detailed prep instructions 48 hours before my procedure", "label": 1}, | |
| {"text": "While the brochure was thorough, I still had unanswered questions", "label": 1}, | |
| {"text": "Consultant took time to discuss lifestyle factors affecting my condition", "label": 1}, | |
| # privacy_confidentiality (2) | |
| {"text": "Overheard another patient's full diagnosis details at the reception desk", "label": 2}, | |
| {"text": "Doctors always close the door and pull the curtain for complete privacy", "label": 2}, | |
| {"text": "Concerned about who can access my medical records through their new patient portal", "label": 2}, | |
| {"text": "Receptionist discussed my test results within earshot of other patients", "label": 2}, | |
| {"text": "Private consultation rooms ensure conversations remain confidential", "label": 2}, | |
| {"text": "Not confident my data is secure with their digital check-in system", "label": 2}, | |
| {"text": "They have a discreet area for sensitive conversations about billing", "label": 2}, | |
| {"text": "Heard staff discussing identifiable patient details in the corridor", "label": 2}, | |
| {"text": "The privacy screens at reception prevent others seeing my personal details", "label": 2}, | |
| {"text": "Worried about confidentiality when leaving voicemails with symptoms", "label": 2}, | |
| {"text": "The new open-plan pharmacy counter feels less private than the old setup", "label": 2}, | |
| {"text": "They explicitly asked for my consent before sharing info with a specialist", "label": 2}, | |
| {"text": "Private room made me comfortable discussing sensitive mental health issues", "label": 2}, | |
| {"text": "Not sure if my email communications with the clinic are encrypted", "label": 2}, | |
| {"text": "Staff are extremely discreet when handling personal information", "label": 2}, | |
| # continuity_care (3) | |
| {"text": "Love that I can see Dr. Smith every time, she knows my full medical history", "label": 3}, | |
| {"text": "Saw three different doctors in one month, had to repeat my story each time", "label": 3}, | |
| {"text": "My GP coordinated all my specialist referrals seamlessly", "label": 3}, | |
| {"text": "No follow-up appointment scheduled after my hospital discharge", "label": 3}, | |
| {"text": "The care team communicates well with each other about my case", "label": 3}, | |
| {"text": "Had to explain my rare condition from scratch to each new locum doctor", "label": 3}, | |
| {"text": "They have a robust system for tracking ongoing chronic issues", "label": 3}, | |
| {"text": "Dr. Jones left and no one informed me who my new named GP would be", "label": 3}, | |
| {"text": "Excellent continuity in managing my complex multi-system condition", "label": 3}, | |
| {"text": "The referral to physiotherapy was processed within 48 hours", "label": 3}, | |
| {"text": "Feel like I'm starting over with every single visit", "label": 3}, | |
| {"text": "My named nurse is consistent and knows my case inside out", "label": 3}, | |
| {"text": "They lost my referral letter and I had to wait 4 months for a new one", "label": 3}, | |
| {"text": "Great handover communication between hospital team and my GP", "label": 3}, | |
| {"text": "Appreciate having the same pediatrician for my child's ongoing care", "label": 3}, | |
| # clinical_communication (4) | |
| {"text": "Doctor was dismissive and interrupted me whenever I tried to speak", "label": 4}, | |
| {"text": "Nurse was incredibly empathetic and listened without rushing me", "label": 4}, | |
| {"text": "Felt rushed during consultation despite having multiple concerns", "label": 4}, | |
| {"text": "The surgeon explained the procedure with patience and visual aids", "label": 4}, | |
| {"text": "Doctor maintained eye contact and treated me as an equal partner", "label": 4}, | |
| {"text": "Midwife was abrupt and brushed off my concerns about pain management", "label": 4}, | |
| {"text": "Excellent bedside manner, very reassuring before my operation", "label": 4}, | |
| {"text": "Physiotherapist used encouraging language during my rehabilitation", "label": 4}, | |
| {"text": "Doctor seemed distracted, kept checking their phone during our session", "label": 4}, | |
| {"text": "The anesthetist calmed my nerves with clear, compassionate explanations", "label": 4}, | |
| {"text": "Felt like a number on a conveyor belt, not a person", "label": 4}, | |
| {"text": "Nurse practitioner had exceptional communication skills and emotional intelligence", "label": 4}, | |
| {"text": "Doctor was condescending when I asked questions about treatment", "label": 4}, | |
| {"text": "The therapeutic communication was outstanding", "label": 4}, | |
| {"text": "Consultant was approachable and made complex information accessible", "label": 4}, | |
| # admin_communication (5) | |
| {"text": "Receptionist was cheerful and went above and beyond to help me", "label": 5}, | |
| {"text": "The lady at the desk was abrupt and made me feel like a nuisance", "label": 5}, | |
| {"text": "Booking team worked miracles to accommodate my schedule", "label": 5}, | |
| {"text": "Administrative staff ignore patients while they chat about personal matters", "label": 5}, | |
| {"text": "Front desk team is efficient, professional, and genuinely welcoming", "label": 5}, | |
| {"text": "Receptionist sounded annoyed and impatient on the phone", "label": 5}, | |
| {"text": "The admin team explains billing and insurance matters clearly", "label": 5}, | |
| {"text": "Waited at the desk while two staff members discussed weekend plans", "label": 5}, | |
| {"text": "Called to reschedule and received wonderfully helpful service", "label": 5}, | |
| {"text": "Receptionist couldn't answer basic questions about forms", "label": 5}, | |
| {"text": "Check-in process is smooth and staff maintain friendly professionalism", "label": 5}, | |
| {"text": "The clerk was impatient when I asked for clarification on paperwork", "label": 5}, | |
| {"text": "Administrative team handles complex queries with patience", "label": 5}, | |
| {"text": "Desk staff were gossiping about a patient - completely unprofessional", "label": 5}, | |
| {"text": "Appointment reminder calls are polite, clear, and not robotic", "label": 5}, | |
| ] | |
| # Convert to Huggingface Dataset | |
| from datasets import Dataset | |
| dataset = Dataset.from_list(healthcare_dataset) | |
| # Optional: Create train/test split | |
| dataset = dataset.train_test_split(test_size=0.2, seed=42) | |
| # SetFit training example | |
| """ | |
| from setfit import SetFitModel, SetFitTrainer | |
| from sentence_transformers import SentenceTransformer | |
| # Initialize model | |
| model = SetFitModel.from_pretrained( | |
| "sentence-transformers/paraphrase-mpnet-base-v2", | |
| labels=list(LABEL_MAPPING.values()) | |
| ) | |
| # Create trainer | |
| trainer = SetFitTrainer( | |
| model=model, | |
| train_dataset=dataset["train"], | |
| eval_dataset=dataset["test"], | |
| metric="accuracy", | |
| batch_size=16, | |
| num_iterations=20, # Number of text pairs to generate | |
| num_epochs=1, # Number of epochs to train the classifier | |
| ) | |
| # Train and evaluate | |
| trainer.train() | |
| metrics = trainer.evaluate() | |
| """ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment