Created
September 26, 2017 08:56
-
-
Save larytet/6a9975be27f07819ba2150af04d60566 to your computer and use it in GitHub Desktop.
Example or using pandas for reading a CSV file
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
"""Process CSV data | |
Usage: | |
process.py -h | --help | |
process.py --csvfile=<FILE> | |
Options: | |
-h --help Show this screen. | |
-c --csvfile=<FILE> Input file | |
""" | |
from docopt import docopt | |
import sys | |
import logging | |
import pandas | |
import re | |
import collections | |
def pandas_csv_converter_dummy(arg): | |
return arg | |
RePattern = namedtuple('RePattern', ['pattern', 'name', 'count_hits', 'count_runs']) | |
RE_FILE_PATHS = | |
def pandas_csv_converter_process(arg): | |
return arg | |
def pandas_csv_read(filename, csv_delimiter='\t'): | |
''' | |
pandas allow to use '\s+' | |
''' | |
df = pandas.read_csv(filename, | |
sep=csv_delimiter, | |
error_bad_lines=False, | |
engine="c", | |
#dtype=object, | |
memory_map=True, | |
converters={'process':pandas_csv_converter_process, | |
'command':pandas_csv_converter_dummy, | |
'host_name':pandas_csv_converter_dummy, | |
'timestmap':pandas_csv_converter_dummy, | |
'host':pandas_csv_converter_dummy, | |
'instance':pandas_csv_converter_dummy, | |
'process_id':pandas_csv_converter_dummy, | |
'file':pandas_csv_converter_dummy, | |
'old_file':pandas_csv_converter_dummy, | |
'remote_host':pandas_csv_converter_dummy, | |
'username':pandas_csv_converter_dummy | |
} | |
) | |
data = df.values | |
return data | |
if __name__ == '__main__': | |
logging.basicConfig() | |
logger = logging.getLogger('process') | |
logger.setLevel("INFO") | |
arguments = docopt(__doc__, version='process') | |
filename = arguments["--csvfile"] | |
data = pandas_csv_read(filename) | |
#data = open_with_python_csv(filename) | |
logger.info("parsed {0} lines from file {1}".format(len(data), filename)) | |
print data[0] | |
print data[1] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment