Last active
October 25, 2017 19:02
-
-
Save mario21ic/1878f4934566751a9ee9b69562af8045 to your computer and use it in GitHub Desktop.
Python script to filter names and documents
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
import csv | |
import re | |
def extract_document(names_document): | |
return [int(s) for s in names_document.split() if s.isdigit()] | |
def extract_letters(names_document): | |
return " ".join(re.findall(r"(?i)\b[a-z]+\b", names_document)) | |
def invert_last_name(names): | |
names = names.split(" ") | |
if len(names)>=4: | |
return names[2] + " " + names[3] + " " + names[0] + " " + names[1] | |
if len(names)==3: | |
return names[1] + " " + names[2] + " " + names[0] | |
if len(names)==2: | |
return names[1] + " " + names[0] | |
return names | |
def main(): | |
results = [] | |
with open('Docker_Lima_Meetup_26_de_Octubre.xls', newline='') as csvfile: | |
spamreader = csv.reader(csvfile, delimiter=',', quotechar='|') | |
x = 0 | |
for row in spamreader: | |
x = x + 1 | |
if x==1: | |
continue | |
names = row[0] | |
name_2 = extract_letters(row[9]) | |
if len(name_2)>0: | |
names = name_2 | |
document = extract_document(row[9]) | |
document_2 = extract_document(row[10]) | |
if len(document_2)>0: | |
document = document_2 | |
results.append([invert_last_name(names), document]) | |
with open('results.csv', 'w', newline='') as csvfile: | |
spamwriter = csv.writer(csvfile, delimiter=',', | |
quotechar='|', quoting=csv.QUOTE_MINIMAL) | |
for row in results: | |
spamwriter.writerow(row) | |
if __name__=="__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment