Skip to content

Instantly share code, notes, and snippets.

@moritzschaefer
Last active August 29, 2015 14:18
Show Gist options
  • Select an option

  • Save moritzschaefer/aacddb3ea16a8bea957b to your computer and use it in GitHub Desktop.

Select an option

Save moritzschaefer/aacddb3ea16a8bea957b to your computer and use it in GitHub Desktop.
Small script for importing csv files to elasticsearch
#!/usr/bin/env python
import argparse
import sys, os, time
import pandas
import requests
from elasticsearch import Elasticsearch
es = Elasticsearch()
def process_file(datafile, host, port, filetype, index, group, clear_index, sep):
''' Host and Port not supported yet '''
if 'csv' not in filetype:
print('Just csv supported for now')
sys.exit(1)
if clear_index:
resp = es.indices.delete(index=index, ignore=[404])
print resp
df = pandas.io.parsers.read_csv(datafile, sep=sep)
df.fillna(0,inplace=True)
for (i, row) in df.iterrows():
res = es.index(index=index, doc_type=group, body=row.to_dict())
if i%1000==0:
print i
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--host', default='localhost')
parser.add_argument('--port', default='9200')
parser.add_argument('--index', required=True)
parser.add_argument('--group', required=True)
parser.add_argument('--sep', default=',', help="Separator for csv files")
parser.add_argument('--clear-index', dest='clear_index', action='store_true')
parser.add_argument('--dont-clear-index', dest='clear_index', action='store_false')
parser.set_defaults(clear_index=True)
parser.add_argument('datafile')
args = parser.parse_args()
_, fileext = os.path.splitext(args.datafile)
process_file(filetype=fileext, **vars(args))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment