nygeog · July 29, 2018 03:29
diff --git a/get-wework-office-locations.py b/get-wework-office-locations.py
 import urllib2
 import json
 import csv
 import pandas as pd
 import numpy as np
 import decimal
 D = decimal.Decimal

 url = "https://www.wework.com/locations/all"
 data = urllib2.urlopen(url).read()
 data = json.loads(data)

 ouJSON = 'data/wework.json'
 with open(ouJSON, 'w') as f:
 	json.dump(data, f)

 varsList = ['uuid','zip','line1','location_path','city','latitude','longitude','path','slug','open_for_touring','name','phone','image_url','location_address','format_desk_pricing','format_office_pricing']

 dataList = [] #dictionaries and json is cool, but just being lazy with this list. 

 for i in range(len(data)):
 	locVarsList = []
 	for j in varsList:
 		locVarsList.append(data[i][j])
 	dataList.append(locVarsList)

 df = pd.DataFrame(dataList, columns=varsList)
 df['url'] = 'https://www.wework.com' + df.path.map(str) 

 df['format_desk_pricing_us'] = (df['format_desk_pricing'].str.replace(r'[^-+\d.]', '').astype(D))	
 df['format_office_pricing_us'] = (df['format_office_pricing'].str.replace(r'[^-+\d.]', '').astype(D))

 df['format_desk_pricing_us'] = df['format_desk_pricing_us'].astype(float)
 df['format_office_pricing_us'] = df['format_office_pricing_us'].astype(float)
 print df.dtypes

 df['format_desk_pricing_us'] = np.where(df['format_desk_pricing_us'] <= 800, df['format_desk_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values
 df['format_office_pricing_us'] = np.where(df['format_office_pricing_us'] <= 1400, df['format_office_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values

 df.to_csv('data/weworklocations_20161019.csv',index=False, encoding='utf-8')
	import urllib2
	import json
	import csv
	import pandas as pd
	import numpy as np
	import decimal
	D = decimal.Decimal

	url = "https://www.wework.com/locations/all"
	data = urllib2.urlopen(url).read()
	data = json.loads(data)

	ouJSON = 'data/wework.json'
	with open(ouJSON, 'w') as f:
	json.dump(data, f)

	varsList = ['uuid','zip','line1','location_path','city','latitude','longitude','path','slug','open_for_touring','name','phone','image_url','location_address','format_desk_pricing','format_office_pricing']

	dataList = [] #dictionaries and json is cool, but just being lazy with this list.

	for i in range(len(data)):
	locVarsList = []
	for j in varsList:
	locVarsList.append(data[i][j])
	dataList.append(locVarsList)

	df = pd.DataFrame(dataList, columns=varsList)
	df['url'] = 'https://www.wework.com' + df.path.map(str)

	df['format_desk_pricing_us'] = (df['format_desk_pricing'].str.replace(r'[^-+\d.]', '').astype(D))
	df['format_office_pricing_us'] = (df['format_office_pricing'].str.replace(r'[^-+\d.]', '').astype(D))

	df['format_desk_pricing_us'] = df['format_desk_pricing_us'].astype(float)
	df['format_office_pricing_us'] = df['format_office_pricing_us'].astype(float)
	print df.dtypes

	df['format_desk_pricing_us'] = np.where(df['format_desk_pricing_us'] <= 800, df['format_desk_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values
	df['format_office_pricing_us'] = np.where(df['format_office_pricing_us'] <= 1400, df['format_office_pricing_us'], 0) #lazy hack to remove foreign values higher than the highest us values

	df.to_csv('data/weworklocations_20161019.csv',index=False, encoding='utf-8')