Skip to content

Instantly share code, notes, and snippets.

@rdemorais
Created November 15, 2023 15:52
Show Gist options
  • Save rdemorais/8ed9e17f450b36ffcbe6a30491450913 to your computer and use it in GitHub Desktop.
Save rdemorais/8ed9e17f450b36ffcbe6a30491450913 to your computer and use it in GitHub Desktop.
Remover nomes do texto
data = [
{
"text": "paciente diagnosticado com dm , nega has . paciente acompanhado da mãe , dona Maria Fagundes",
"clinical_entities": [
{
"entity": "dm",
"entity_tokens": ["dm"],
"label": "DISEASE",
"start": 27,
"end": 29
},
{
"entity": "has",
"entity_tokens": ["has"],
"label": "DISEASE",
"start": 37,
"end": 40
},
{
"entity": "Maria",
"entity_tokens": ["Maria"],
"label": "NAME",
"start": 78,
"end": 83
},
{
"entity": "Fagundes",
"entity_tokens": ["Fagundes"],
"label": "NAME",
"start": 84,
"end": 92
}
],
"biomarkers": [],
"lab_tests": [],
"vital_signs": [],
"entities_relations": []
}
]
# Iterate through data
for item in data:
# Get entities with label "NAME"
name_entities = [entity for entity in item['clinical_entities'] if entity['label'] == "NAME"]
# Sort entities by start index in descending order to remove text correctly
name_entities = sorted(name_entities, key=lambda x: x['start'], reverse=True)
# Remove text corresponding to each "NAME" entity
text = item['text']
for name_entity in name_entities:
start_idx = name_entity['start']
end_idx = name_entity['end']
text = text[:start_idx] + text[end_idx:]
# Update the 'text' field in the data with modified text
item['text'] = text
print(data[0]['text'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment