nickjevershed · November 7, 2019 03:31
diff --git a/process.py b/process.py
 #%%

 import pandas as pd
 import simplejson as json
 from datetime import datetime

 #%%

 with open('1R3zG-DJRqN7MFLGqjhZs26D1SJcKI7pkd1j1XDwAiIM.json') as birdJson:
 	birdsInfo = json.load(birdJson)['sheets']['Sheet1']

 newInfo = {}

 for bird in birdsInfo:
 	birdIndex = 'bird' + bird['id']
 	newInfo[birdIndex] = {"name":bird['name'], "image": "https://interactive.guim.co.uk/embed/aus/2019/bird-pics/" + bird['img']}

 df = pd.read_csv("results.csv")

 df['time'] = pd.to_datetime(df['last_modified_time'],unit='ms')

 df['time'] = df['time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Sydney')

 df = df[df['time'] >= '2019-10-28']
 df = df[df['counted'] == 'valid']

 def addBird(row):
    data = json.loads(row['data'])
    index = "bird" + str(data['iid'])
    if index in newInfo:
        return newInfo[index]['name']

 def addImage(row):
    data = json.loads(row['data'])
    index = "bird" + str(data['iid'])
    if index in newInfo:
        return newInfo[index]['image']    

 df['name'] = df.apply(addBird, axis=1)
 df['img'] = df.apply(addImage, axis=1)
 df['count'] = 1


 #%%

 #test = df[(df['name'] == "Short-tailed shearwater (muttonbird)") | (df['name'] == "Australian magpie")]

 df_10 = df[['name','img','time','count']].groupby(['name','img']).resample('h', on='time').sum().reset_index()

 test = df_10[df_10['name'] == 'Rainbow lorikeet']

 # df_10 = df[['name','time','count']].resample('h', on='time').sum().reset_index()

 #%%

 df_10['cumulative'] = df_10.groupby(['name'])['count'].apply(lambda x: x.cumsum())

 #%%

 test = df_10[df_10['name'] == "Short-tailed shearwater (muttonbird)"]

 #%%

 #df_10['time'] = df_10['time'].dt.strftime('%H:%M %d %b')
 df_10.to_csv('top-ten.csv')

 #%%

 pvt = df_10.pivot_table(index=['name','img'], columns='time', values='cumulative').reset_index()

 #newCols = pvt.columns
 #
 #for col in newCols:
 #    print(col)

 mapper = lambda x: x.strftime('%H:%M %d %b') if isinstance(x, datetime) else x
 pvt.columns = pvt.columns.map(mapper)

 pvt.to_csv('pivot.csv')
	#%%

	import pandas as pd
	import simplejson as json
	from datetime import datetime

	#%%

	with open('1R3zG-DJRqN7MFLGqjhZs26D1SJcKI7pkd1j1XDwAiIM.json') as birdJson:
	birdsInfo = json.load(birdJson)['sheets']['Sheet1']

	newInfo = {}

	for bird in birdsInfo:
	birdIndex = 'bird' + bird['id']
	newInfo[birdIndex] = {"name":bird['name'], "image": "https://interactive.guim.co.uk/embed/aus/2019/bird-pics/" + bird['img']}

	df = pd.read_csv("results.csv")

	df['time'] = pd.to_datetime(df['last_modified_time'],unit='ms')

	df['time'] = df['time'].dt.tz_localize('UTC').dt.tz_convert('Australia/Sydney')

	df = df[df['time'] >= '2019-10-28']
	df = df[df['counted'] == 'valid']

	def addBird(row):
	data = json.loads(row['data'])
	index = "bird" + str(data['iid'])
	if index in newInfo:
	return newInfo[index]['name']

	def addImage(row):
	data = json.loads(row['data'])
	index = "bird" + str(data['iid'])
	if index in newInfo:
	return newInfo[index]['image']

	df['name'] = df.apply(addBird, axis=1)
	df['img'] = df.apply(addImage, axis=1)
	df['count'] = 1


	#%%

	#test = df[(df['name'] == "Short-tailed shearwater (muttonbird)") \| (df['name'] == "Australian magpie")]

	df_10 = df[['name','img','time','count']].groupby(['name','img']).resample('h', on='time').sum().reset_index()

	test = df_10[df_10['name'] == 'Rainbow lorikeet']

	# df_10 = df[['name','time','count']].resample('h', on='time').sum().reset_index()

	#%%

	df_10['cumulative'] = df_10.groupby(['name'])['count'].apply(lambda x: x.cumsum())

	#%%

	test = df_10[df_10['name'] == "Short-tailed shearwater (muttonbird)"]

	#%%

	#df_10['time'] = df_10['time'].dt.strftime('%H:%M %d %b')
	df_10.to_csv('top-ten.csv')

	#%%

	pvt = df_10.pivot_table(index=['name','img'], columns='time', values='cumulative').reset_index()

	#newCols = pvt.columns
	#
	#for col in newCols:
	# print(col)

	mapper = lambda x: x.strftime('%H:%M %d %b') if isinstance(x, datetime) else x
	pvt.columns = pvt.columns.map(mapper)

	pvt.to_csv('pivot.csv')