rdemorais · November 15, 2023 15:52
diff --git a/remove_names.py b/remove_names.py
 data = [
    {
        "text": "paciente diagnosticado com dm , nega has . paciente acompanhado da mãe , dona Maria Fagundes",
        "clinical_entities": [
            {
                "entity": "dm",
                "entity_tokens": ["dm"],
                "label": "DISEASE",
                "start": 27,
                "end": 29
            },
            {
                "entity": "has",
                "entity_tokens": ["has"],
                "label": "DISEASE",
                "start": 37,
                "end": 40
            },
            {
                "entity": "Maria",
                "entity_tokens": ["Maria"],
                "label": "NAME",
                "start": 78,
                "end": 83
            },
            {
                "entity": "Fagundes",
                "entity_tokens": ["Fagundes"],
                "label": "NAME",
                "start": 84,
                "end": 92
            }
        ],
        "biomarkers": [],
        "lab_tests": [],
        "vital_signs": [],
        "entities_relations": []
    }
 ]

 # Iterate through data
 for item in data:
    # Get entities with label "NAME"
    name_entities = [entity for entity in item['clinical_entities'] if entity['label'] == "NAME"]

    # Sort entities by start index in descending order to remove text correctly
    name_entities = sorted(name_entities, key=lambda x: x['start'], reverse=True)

    # Remove text corresponding to each "NAME" entity
    text = item['text']
    for name_entity in name_entities:
        start_idx = name_entity['start']
        end_idx = name_entity['end']
        text = text[:start_idx] + text[end_idx:]

    # Update the 'text' field in the data with modified text
    item['text'] = text

 print(data[0]['text'])
	data = [
	{
	"text": "paciente diagnosticado com dm , nega has . paciente acompanhado da mãe , dona Maria Fagundes",
	"clinical_entities": [
	{
	"entity": "dm",
	"entity_tokens": ["dm"],
	"label": "DISEASE",
	"start": 27,
	"end": 29
	},
	{
	"entity": "has",
	"entity_tokens": ["has"],
	"label": "DISEASE",
	"start": 37,
	"end": 40
	},
	{
	"entity": "Maria",
	"entity_tokens": ["Maria"],
	"label": "NAME",
	"start": 78,
	"end": 83
	},
	{
	"entity": "Fagundes",
	"entity_tokens": ["Fagundes"],
	"label": "NAME",
	"start": 84,
	"end": 92
	}
	],
	"biomarkers": [],
	"lab_tests": [],
	"vital_signs": [],
	"entities_relations": []
	}
	]

	# Iterate through data
	for item in data:
	# Get entities with label "NAME"
	name_entities = [entity for entity in item['clinical_entities'] if entity['label'] == "NAME"]

	# Sort entities by start index in descending order to remove text correctly
	name_entities = sorted(name_entities, key=lambda x: x['start'], reverse=True)

	# Remove text corresponding to each "NAME" entity
	text = item['text']
	for name_entity in name_entities:
	start_idx = name_entity['start']
	end_idx = name_entity['end']
	text = text[:start_idx] + text[end_idx:]

	# Update the 'text' field in the data with modified text
	item['text'] = text

	print(data[0]['text'])
No results found