Skip to content

Instantly share code, notes, and snippets.

@boxmein
Created June 7, 2015 11:59
Show Gist options
  • Select an option

  • Save boxmein/fd79e8df405b68cb4733 to your computer and use it in GitHub Desktop.

Select an option

Save boxmein/fd79e8df405b68cb4733 to your computer and use it in GitHub Desktop.
Indexes all CSV rows under the specified index in Elasticsearch. Uses a pre-defined array of column headers to parse the CSV into a dict.
#!/usr/bin/env python3
# encoding: utf8
# pip install elasticsearch for the elasticsearch shim :)
import csv
from elasticsearch import Elasticsearch, Urllib3HttpConnection
CSVFILE = "tptsaves.csv"
HTTP_AUTH_DETAILS = "username:password"
INDEX_NAME = "tptsaves"
DOC_NAME = "save"
COLUMNS = ["ID","Author","Name","Description","Status","Published","Uploaded","Updated","Votes","Views"]
es = Elasticsearch(http_auth=HTTP_AUTH_DETAILS)
print ("creating index unless existing...")
es.indices.create(index=INDEX_NAME, ignore=400)
# Import Saves
with open(CSVFILE, 'r', encoding="utf8") as csvf:
reader = csv.reader(csvf, delimiter=',')
for row in reader:
# header row
if row[0] == COLUMNS[0]:
continue
stuff = dict(zip(KEYS, row))
stuffid = int(stuff['ID'])
es.index(index=INDEX_NAME, doc_type=DOC_NAME, id=stuffid, body=stuff)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment