Skip to content

Instantly share code, notes, and snippets.

@scottrice10
Last active December 22, 2015 17:08
Show Gist options
  • Select an option

  • Save scottrice10/6503565 to your computer and use it in GitHub Desktop.

Select an option

Save scottrice10/6503565 to your computer and use it in GitHub Desktop.
Indexing health provider data listed in NPI database using Elasticsearch.
#!/usr/bin/env python2.7
import csv
from pyes import *
reader = csv.reader(open('npidata_20050523-20130811.csv', 'rb'))
conn = ES('localhost:9200', timeout=20.0)
counter = 0
for row in reader:
try:
data = {
"org_name": row[4],
"last_name": row[5],
"first_name": row[6],
"middle_name": row[7],
"name_prefix": row[8],
"name_suffix": row[9],
"credential": row[10],
"other_org_name": row[11],
"other_last_name_type_code": row[12],
"other_last_name": row[13],
"other_first_name": row[14],
"other_middle_name": row[15],
"other_name_prefix": row[16],
"other_name_suffix": row[17],
"other_credential": row[18],
"other_last_name_type_code": row[19],
"address_first_mailing": row[20],
"address_second_mailing": row[21],
"city_mailing": row[22],
"state_mailing": row[23],
"postal_code_mailing": row[24],
"country_code_mailing": row[25],
"phone_mailing": row[26],
"fax_mailing": row[27],
"address_first_practice": row[28],
"address_second_practice": row[29],
"city_practice": row[30],
"state_practice": row[31],
"postal_code_practice": row[32],
"country_code_practice": row[33],
"phone_practice": row[34],
"fax_practice": row[35],
"enumeration_date": row[36],
"last_update_date": row[37],
"npi_deactivation_reason_code": row[38],
"npi_deactivation_date": row[39],
"npi_reactivation_date": row[40],
"gender": row[41],
"last_name_official": row[42],
"first_name_official": row[43],
"middle_name_official": row[44],
"title_official": row[45],
"phone_official": row[46],
"taxonomy_code": row[47]
}
conn.index(data,'doctors_index',"doctors", counter, bulk=True)
counter += 1
except:
pass
#!/bin/sh
mkdir archive && \
cd archive && \
curl http://nppes.viva-it.com/NPPES_Data_Dissemination_Aug_2013.zip > file.zip && \
jar xvf file.zip && \
/opt/elasticsearch/npiBulk.py
rm -R /opt/elasticsearch/archive
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment