Skip to content

Instantly share code, notes, and snippets.

@ucotta
Last active July 10, 2024 09:30
Show Gist options
  • Save ucotta/c5ebbb61a67f1bed4f11cb325ded7bd0 to your computer and use it in GitHub Desktop.
Save ucotta/c5ebbb61a67f1bed4f11cb325ded7bd0 to your computer and use it in GitHub Desktop.
Split spanish full name in different fields
def separate_name_surnames(full_name):
# Creamos una copia del nombre reemplazando todas las partículas
modified_name = full_name.lower()
particles = ["de la ", "de los ", "de las ", "del ", "de "]
for particle in particles:
modified_name = modified_name.replace(" " + particle, " " + particle.replace(" ", "_"))
# Dividimos el nombre modificado
parts = modified_name.split()
# Contamos cuántas partes tiene el último "apellido"
last_surname_parts = len(parts[-1].split('_'))
# Si solo hay un apellido
if len(parts) == 2 or (len(parts) == 3 and last_surname_parts > 1):
first_name = ' '.join(full_name.split()[:-1])
surname1 = full_name.split()[-1]
return {'first_name': first_name, 'surname1': surname1, 'surname2': None}
# Si hay dos apellidos
surname2 = ' '.join(full_name.split()[-last_surname_parts:])
remaining = ' '.join(full_name.split()[:-last_surname_parts])
# Contamos cuántas partes tiene el primer apellido
first_surname_parts = len(parts[-2].split('_'))
surname1 = ' '.join(remaining.split()[-first_surname_parts:])
first_name = ' '.join(remaining.split()[:-first_surname_parts])
return {'first_name': first_name, 'surname1': surname1, 'surname2': surname2}
examples = [
"Juan Antonio Conde Pérez",
"María de la Cruz Martínez",
"Juan de los Ríos García",
"Peter O'Connor",
"Pedro Sánchez de la Vega Martínez",
"María José Gómez Pérez",
"José María de la Cruz Sánchez",
"Antonia Rosa Isabel Rodriguez De Los Monteros"
]
for name in examples:
print(separate_name_surnames(name))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment