Skip to content

Instantly share code, notes, and snippets.

@restrepo
Created October 7, 2021 20:39
Show Gist options
  • Save restrepo/8c4f1c8721dfe06232bab713a53be4cd to your computer and use it in GitHub Desktop.
Save restrepo/8c4f1c8721dfe06232bab713a53be4cd to your computer and use it in GitHub Desktop.
import re
def split_names(s,exceptions=['Gil', 'Lew', 'Liz', 'Paz', 'Rey', 'Rio', 'Roa', 'Rua', 'Sus', 'Zea'],
nacionality='Colombiana'):
"""
Extract the parts of the full name `s` in the format ([] → optional):
[SMALL_CONECTORS] FIRST_LAST_NAME [SMALL_CONECTORS] [SECOND_LAST_NAME] NAMES
If len(s) == 3 → Not Ibero-America name is asked
Add short last names to `exceptions` list if necessary
Works with:
----
s='DANIEL ANDRES LA ROTTA FORERO'
s='MARIA DEL CONSUELO MONTES RAMIREZ'
s='RICARDO DE LA MERCED CALLEJAS POSADA'
s='MARIA DEL CARMEN DE LA CUESTA BENJUMEA'
s='NICOLAS CARLOS MARTI JARAMILLO OCAMPO'
s='DIEGO ALEJANDRO RESTREPO QUINTERO'
s='JAIRO HUMBERTO RESTREPO ZEA'
s='MARLEN JIMENEZ DEL RIO'
Fails with more than 2 last names:
----
s='ANDRES MAURICIO RANGEL MARTINEZ VILLAL'
"""
s=s.title()
sl=re.sub('(\s\w{1,3})\s',r'\1::',s,re.UNICODE)
sl=re.sub('(\s\w{1,3}\::\w{1,3})\s',r'\1::',sl,re.UNICODE)
sl=re.sub('^(\w{1,3})\s',r'\1::' ,sl,re.UNICODE)
#Clean exceptions
#Extract short names list
lst=[s for s in re.split( '(\w{1,3})\:\:',sl ) if len(s)>=1 and len(s)<=3 ]
#intersection with exceptions list
exc=[value for value in exceptions if value in lst]
if exc:
for e in exc:
sl=sl.replace(f'{e}::',f'{e} ')
#if sl.find('-')>-1:
#sll=[s.replace('-',' ') for s in sl.split()]
#
sll=sl.split()
if len(s.split())==3:
IBERO=True
if nacionality in ['Extranjero - otra', 'Brasilera']:
NOT_IA=input(f'Is {s} from Ibero-America? (y/n)')
if NOT_IA.lower()=='n' or NOT_IA.lower()=='not' or NOT_IA.lower()=='no':
IBERO=False
if IBERO:
sll=['']+s.split()
else:
sll=s.split()+['']
if len(s.split())==2:
sll=['']+s.split()+['']
d={'NOMBRE COMPLETO' : ' '.join([x for x in sll[2:] if x]+
[x for x in sll[:2] if x]).replace('::',' '),
'PRIMER APELLIDO' : sll[-2].replace('::',' '),
'SEGUNDO APELLIDO': sll[-1].replace('::',' '),
'NOMBRES' :' '.join([x for x in sll[:-2] if x]).replace('::',' '),
'INICIALES' :' '.join( [i[0]+'.' for i in ' '.join(
[x for x in sll[:-2] if x]).replace('::',' ').split() ] )
}
return d
assert split_names('DANIEL ANDRES LA ROTTA FORERO')['NOMBRES']=='Daniel Andres'
assert split_names('Jairo Humberto Zea Restrepo')['NOMBRES']=='Jairo Humberto'
assert split_names('RICARDO DE LA MERCED CALLEJAS POSADA')['NOMBRES']=='Ricardo De La Merced'
assert (split_names('MARIA DEL CARMEN DE LA CUESTA BENJUMEA')['NOMBRES']== 'Maria Del Carmen' and
split_names('MARIA DEL CARMEN DE LA CUESTA BENJUMEA')['PRIMER APELLIDO']=='De La Cuesta')
assert split_names('NICOLAS CARLOS MARTI JARAMILLO OCAMPO')['NOMBRES']=='Nicolas Carlos Marti'
assert split_names('DIEGO ALEJANDRO RESTREPO QUINTERO')['NOMBRES']=='Diego Alejandro'
assert split_names('DIEGO RESTREPO QUINTERO',nacionality='Colombiana')['NOMBRES']=='Diego'
assert split_names('DIEGO RESTREPO',nacionality='Colombiana')['NOMBRES']=='Diego'
assert split_names('Anand Jagadeesh Puppala',nacionality='Extranjero - otra')['NOMBRES']=='Anand Jagadeesh'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment