Last active
November 16, 2015 11:49
-
-
Save logc/8c5ba934e4f4b2f32fa5 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Convert: filename of Gmail contacts -> XLSX file | |
Converts the contents of a filename where exported Gmail contacts can be found | |
into an XLSX spreadsheet where only the relevant information of the contacts is | |
found. | |
""" | |
import argparse | |
import codecs | |
import os | |
import os.path | |
import pandas as pd | |
OUTFILE_EXT = '.xlsx' | |
RELEVANT_FIELDS = [ | |
'First Name', 'Last Name', 'Job Title', 'Company', 'Categories', | |
'Mobile Phone', 'Home Address', 'E-mail Address'] | |
def parse_commandline(): | |
"""Parses the command line""" | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename') | |
return parser.parse_args() | |
def ensure_codecs(args, tmpfilename): | |
""" | |
Ensures that the file we are reading the DataFrame from is encoded as | |
utf-8 | |
""" | |
infile = codecs.open(args.filename, 'r', 'latin1') | |
contents = infile.read() | |
with codecs.open(tmpfilename, 'w', 'utf-8') as tmpfile: | |
tmpfile.write(contents) | |
def filename_without_extension(filename): | |
""" | |
Returns the base name of a filename, removing its extension, e.g. | |
'input.csv' results in 'input' | |
""" | |
return os.path.splitext(filename)[0] | |
def main(): | |
"""Main processing""" | |
tmpfilename = 'input.encoded.csv' | |
args = parse_commandline() | |
ensure_codecs(args, tmpfilename) | |
dataframe = pd.read_csv(tmpfilename, index_col=False, header=0) | |
outfile = filename_without_extension(args.filename) + OUTFILE_EXT | |
out_enconding = 'utf8' | |
# pylint: disable=no-member | |
dataframe.to_excel(outfile, encoding=out_enconding, columns=RELEVANT_FIELDS) | |
# pylint: enable=no-member | |
os.remove(tmpfilename) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment