scottrice10 · September 10, 2013 00:57
diff --git a/elasticsearchIndexingNPIData.py b/elasticsearchIndexingNPIData.py
 #!/usr/bin/env python
 
 import requests
 import json
 
 data = []
 limit = 100
 offset = 740
 chunks = 74
 
 def es_bulk():
    n = 0
    if ( offset % chunks != 0):
            print "Error: Chunks must divide evenly into offset with no remainder."
    else:
        while (offset > n):
            data = [] 
            n = n + (offset/chunks)
            for o in range(0, (offset/chunks)):
                for l in range(limit): 
                    url = "https://www.npiapi.com/api/providers?token=4a74e1440aaa5609f44d97d829c03c8e0b76fef9&format=json&state=MO&limit=%(l)s&offset=%(o)s" % {"l" : limit, "o" : o}
                    r = requests.get(url)
                    c = r.content
                    j = json.loads(c)
                    docs1 = '%s\n' % '{ "index" : { "_index" : "doctors_index", "_type" : "doctors"} }'
                    source = j['providers'][l] 
                    docs2 = '%s\n' % json.dumps(source) 
                    data.append(docs1 + docs2)
                    bulk = ''.join(data) 
 
            #print bulk
            requests.post("localhost:9200/_bulk", data=bulk)
 
    
 if __name__ == '__main__':
    es_bulk()
	#!/usr/bin/env python

	import requests
	import json

	data = []
	limit = 100
	offset = 740
	chunks = 74

	def es_bulk():
	n = 0
	if ( offset % chunks != 0):
	print "Error: Chunks must divide evenly into offset with no remainder."
	else:
	while (offset > n):
	data = []
	n = n + (offset/chunks)
	for o in range(0, (offset/chunks)):
	for l in range(limit):
	url = "https://www.npiapi.com/api/providers?token=4a74e1440aaa5609f44d97d829c03c8e0b76fef9&format=json&state=MO&limit=%(l)s&offset=%(o)s" % {"l" : limit, "o" : o}
	r = requests.get(url)
	c = r.content
	j = json.loads(c)
	docs1 = '%s\n' % '{ "index" : { "_index" : "doctors_index", "_type" : "doctors"} }'
	source = j['providers'][l]
	docs2 = '%s\n' % json.dumps(source)
	data.append(docs1 + docs2)
	bulk = ''.join(data)

	#print bulk
	requests.post("localhost:9200/_bulk", data=bulk)


	if __name__ == '__main__':
	es_bulk()
No results found