Skip to content

Instantly share code, notes, and snippets.

@larytet
Created September 26, 2017 08:56
Show Gist options
  • Save larytet/6a9975be27f07819ba2150af04d60566 to your computer and use it in GitHub Desktop.
Save larytet/6a9975be27f07819ba2150af04d60566 to your computer and use it in GitHub Desktop.
Example or using pandas for reading a CSV file
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""Process CSV data
Usage:
process.py -h | --help
process.py --csvfile=<FILE>
Options:
-h --help Show this screen.
-c --csvfile=<FILE> Input file
"""
from docopt import docopt
import sys
import logging
import pandas
import re
import collections
def pandas_csv_converter_dummy(arg):
return arg
RePattern = namedtuple('RePattern', ['pattern', 'name', 'count_hits', 'count_runs'])
RE_FILE_PATHS =
def pandas_csv_converter_process(arg):
return arg
def pandas_csv_read(filename, csv_delimiter='\t'):
'''
pandas allow to use '\s+'
'''
df = pandas.read_csv(filename,
sep=csv_delimiter,
error_bad_lines=False,
engine="c",
#dtype=object,
memory_map=True,
converters={'process':pandas_csv_converter_process,
'command':pandas_csv_converter_dummy,
'host_name':pandas_csv_converter_dummy,
'timestmap':pandas_csv_converter_dummy,
'host':pandas_csv_converter_dummy,
'instance':pandas_csv_converter_dummy,
'process_id':pandas_csv_converter_dummy,
'file':pandas_csv_converter_dummy,
'old_file':pandas_csv_converter_dummy,
'remote_host':pandas_csv_converter_dummy,
'username':pandas_csv_converter_dummy
}
)
data = df.values
return data
if __name__ == '__main__':
logging.basicConfig()
logger = logging.getLogger('process')
logger.setLevel("INFO")
arguments = docopt(__doc__, version='process')
filename = arguments["--csvfile"]
data = pandas_csv_read(filename)
#data = open_with_python_csv(filename)
logger.info("parsed {0} lines from file {1}".format(len(data), filename))
print data[0]
print data[1]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment