Skip to content

Instantly share code, notes, and snippets.

@mario21ic
Last active October 25, 2017 19:02
Show Gist options
  • Save mario21ic/1878f4934566751a9ee9b69562af8045 to your computer and use it in GitHub Desktop.
Save mario21ic/1878f4934566751a9ee9b69562af8045 to your computer and use it in GitHub Desktop.
Python script to filter names and documents
#!/usr/bin/env python
import csv
import re
def extract_document(names_document):
return [int(s) for s in names_document.split() if s.isdigit()]
def extract_letters(names_document):
return " ".join(re.findall(r"(?i)\b[a-z]+\b", names_document))
def invert_last_name(names):
names = names.split(" ")
if len(names)>=4:
return names[2] + " " + names[3] + " " + names[0] + " " + names[1]
if len(names)==3:
return names[1] + " " + names[2] + " " + names[0]
if len(names)==2:
return names[1] + " " + names[0]
return names
def main():
results = []
with open('Docker_Lima_Meetup_26_de_Octubre.xls', newline='') as csvfile:
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|')
x = 0
for row in spamreader:
x = x + 1
if x==1:
continue
names = row[0]
name_2 = extract_letters(row[9])
if len(name_2)>0:
names = name_2
document = extract_document(row[9])
document_2 = extract_document(row[10])
if len(document_2)>0:
document = document_2
results.append([invert_last_name(names), document])
with open('results.csv', 'w', newline='') as csvfile:
spamwriter = csv.writer(csvfile, delimiter=',',
quotechar='|', quoting=csv.QUOTE_MINIMAL)
for row in results:
spamwriter.writerow(row)
if __name__=="__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment