wrenoud · March 25, 2016 21:56
diff --git a/Hsda.py b/Hsda.py
 # coding: utf-8
 import os
 import requests
 import re
 import json

 # hdsa
 if not os.path.exists('hdsa.html'):
    req =requests.request('get','http://hdsa.org/about-hdsa/locate-resources/')
    f = open('hdsa.html','w')
    f.write(req.content)
    f.close()

 f = open('hdsa.html','r')
 contents = f.read()
 f.close()

 match = re.search('\<script\> var allmarkers = (.*?)\<\/script>', contents)
 data = json.loads(match.group(1))

 fields = ['hdsatitle', 'hdsaname', 'address', 'suitefloor', 'citystate', 'zip', 'cliniccoordinator', 'affiliation', 'hdsatype', 'hdsaphone', 'hdsaemail', 'website', 'hdsaid']

 with open('hdsa.csv','w') as f:
    f.write("'"+"','".join(list(s for s in fields))+"'\n")

    for record in data:
        opts = record['options']
        f.write("'"+"','".join((opts['hdsatitle'], opts['hdsaname'], opts['address'], opts['suitefloor'], opts['citystate'], opts['zip'], opts['cliniccoordinator'], opts['affiliation'], opts['hdsatype'], opts['hdsaphone'], opts['hdsaemail'], opts['website'], str(opts['hdsaid'])))+"'\n")

 # hsa
 if not os.path.exists('hsa.html'):
    req = requests.request('get','http://hda.org.uk/international.js')
    with open('hsa.html','w') as f:
        f.write(req.content)

 with open('hsa.html','r') as f:
    contents = f.read()
        
 match = re.search('IHA_Listing_Items = ([\S\s]*?);\n', contents)

 data = json.loads(match.group(1).replace("'",'"').replace(',\n    }','\n    }').replace('\&','&'))
 with open('hsa.csv','w') as f:
    f.write("'region','info','email','phone','website'\n")
    for record in data:
        region = record['Region']
        for contact in record['DetailsPlain'].split('\n\n'):
            emails = []
            numbers = []
            details = []
            websites = []
            contact = contact.strip()
            for line in contact.split('\n'):
                if '@' in line:
                    emails += [s.strip() for s in re.split(':|;|/',line)]
                elif 'Tel' in line or 'tel' in line or 'Fax' in line or 'Mobil' in line or 'Hotline' in line or 'Cellphone' in line:
                    numbers += [s.strip() for s in re.split(':|;',line)]
                elif 'Website' in line or 'www' in line or 'Facebook' in line or 'Twitter' in line:
                    websites += [s.strip() for s in re.split(':|;',line)]
                else:
                    details += [line]
            f.write("'"+"','".join((region, ';'.join(details), ';'.join(emails), ';'.join(numbers), ';'.join(websites)))+"'\n")
 print('done')
	# coding: utf-8
	import os
	import requests
	import re
	import json

	# hdsa
	if not os.path.exists('hdsa.html'):
	req =requests.request('get','http://hdsa.org/about-hdsa/locate-resources/')
	f = open('hdsa.html','w')
	f.write(req.content)
	f.close()

	f = open('hdsa.html','r')
	contents = f.read()
	f.close()

	match = re.search('\<script\> var allmarkers = (.*?)\<\/script>', contents)
	data = json.loads(match.group(1))

	fields = ['hdsatitle', 'hdsaname', 'address', 'suitefloor', 'citystate', 'zip', 'cliniccoordinator', 'affiliation', 'hdsatype', 'hdsaphone', 'hdsaemail', 'website', 'hdsaid']

	with open('hdsa.csv','w') as f:
	f.write("'"+"','".join(list(s for s in fields))+"'\n")

	for record in data:
	opts = record['options']
	f.write("'"+"','".join((opts['hdsatitle'], opts['hdsaname'], opts['address'], opts['suitefloor'], opts['citystate'], opts['zip'], opts['cliniccoordinator'], opts['affiliation'], opts['hdsatype'], opts['hdsaphone'], opts['hdsaemail'], opts['website'], str(opts['hdsaid'])))+"'\n")

	# hsa
	if not os.path.exists('hsa.html'):
	req = requests.request('get','http://hda.org.uk/international.js')
	with open('hsa.html','w') as f:
	f.write(req.content)

	with open('hsa.html','r') as f:
	contents = f.read()

	match = re.search('IHA_Listing_Items = ([\S\s]*?);\n', contents)

	data = json.loads(match.group(1).replace("'",'"').replace(',\n }','\n }').replace('\&','&'))
	with open('hsa.csv','w') as f:
	f.write("'region','info','email','phone','website'\n")
	for record in data:
	region = record['Region']
	for contact in record['DetailsPlain'].split('\n\n'):
	emails = []
	numbers = []
	details = []
	websites = []
	contact = contact.strip()
	for line in contact.split('\n'):
	if '@' in line:
	emails += [s.strip() for s in re.split(':\|;\|/',line)]
	elif 'Tel' in line or 'tel' in line or 'Fax' in line or 'Mobil' in line or 'Hotline' in line or 'Cellphone' in line:
	numbers += [s.strip() for s in re.split(':\|;',line)]
	elif 'Website' in line or 'www' in line or 'Facebook' in line or 'Twitter' in line:
	websites += [s.strip() for s in re.split(':\|;',line)]
	else:
	details += [line]
	f.write("'"+"','".join((region, ';'.join(details), ';'.join(emails), ';'.join(numbers), ';'.join(websites)))+"'\n")
	print('done')
No results found