Last active
May 22, 2018 06:13
-
-
Save nibrahim/e32e99c904317b326b2460048a3ea16b to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import math | |
import sys | |
#import pprint | |
## You should probably close the file before returning d here. Use a | |
## with statement | |
def parse_file(filename): | |
f = open(filename, "rt") | |
d = json.load(f) | |
return d | |
## You don't necessarily need to do this upfront. You can just add | |
## events as you go through the original data structure | |
def set_events(data): | |
distinct_events = set() | |
for i in data: | |
events = i["events"] | |
for j in events: | |
distinct_events.add(j) | |
return list(distinct_events) | |
def calc_metrics(data, distinct_events): | |
#distinct_events = set_events(data) | |
## You shouldn't do this. The code should run based on the | |
## data. Adding something like this means that you have to change | |
## the code when the data changes. This is an anti-pattern. | |
## | |
## Also, you shouldn't name variables dict, list etc. since these | |
## are builtins. | |
dict = { | |
'weekend':{}, | |
'reading':{}, | |
'cycling':{}, | |
'lettuce':{}, | |
'dentist':{}, | |
'running':{}, | |
'television':{}, | |
'exercise':{}, | |
'brussel sprouts':{}, | |
'candy':{}, | |
'beer':{}, | |
'spaghetti':{}, | |
'brushed teeth':{}, | |
'work':{}, | |
'peanuts':{}, | |
'lasagna':{}, | |
'carrot':{}, | |
'bread':{}, | |
'touched tree':{}, | |
'computer':{}, | |
'pizza':{}, | |
'nachos':{}, | |
'cauliflower':{}, | |
'ice cream':{}, | |
'potatoes':{}, | |
'pudding':{} | |
} | |
## While this loop works, I think a more idiomatic (though perhaps | |
## not as efficient solution) is to just count and use numbers | |
## rather than the string which you've using. | |
## | |
## I also recommend building the functions with proper names | |
## "calc_metrics" could mean anything. | |
for i in data: | |
events = i["events"] | |
squirrel = int(i["squirrel"]) | |
for j in distinct_events: ## You've commented this out above. I'm assuming that was an error | |
if j in events: | |
x = "{}{}".format(1,squirrel) | |
else: | |
x = "{}{}".format(0, squirrel) | |
try: | |
dict[j][x] = dict[x]+1 ## Shouldn't the right side be dict[j][x] + 1 ? | |
except KeyError: | |
#print(j) | |
#print(x) | |
dict[j][x] = 1 | |
return dict | |
def calc_phi(dict): | |
## Same comment about repeating the data as above. | |
dict1 = { | |
'weekend': {}, | |
'reading': {}, | |
'cycling': {}, | |
'lettuce': {}, | |
'dentist': {}, | |
'running': {}, | |
'television': {}, | |
'exercise': {}, | |
'brussel sprouts': {}, | |
'candy': {}, | |
'beer': {}, | |
'spaghetti': {}, | |
'brushed teeth': {}, | |
'work': {}, | |
'peanuts': {}, | |
'lasagna': {}, | |
'carrot': {}, | |
'bread': {}, | |
'touched tree': {}, | |
'computer': {}, | |
'pizza': {}, | |
'nachos': {}, | |
'cauliflower': {}, | |
'ice cream': {}, | |
'potatoes': {}, | |
'pudding':{} | |
} | |
for i in dict1: | |
if not ("11" in dict[i].keys()): ## You don't need the .keys(). You can use the `in` operator directly on dictionaries | |
dict[i]["11"] = 0 | |
if not ("10" in dict[i].keys()): | |
dict[i]["10"] = 0 | |
if not ("01" in dict[i].keys()): | |
dict[i]["01"] = 0 | |
if not ("00" in dict[i].keys()): | |
dict[i]["00"] = 0 | |
## This whole calculation here becomes rather dense. I recommend you clean it up a little with temporary variables. | |
x = (dict[i]["11"]*dict[i]["00"] - dict[i]["10"]*dict[i]["01"]) | |
y = math.sqrt((dict[i]["11"]+dict[i]["10"])*(dict[i]["00"]+dict[i]["01"])*(dict[i]["11"]+dict[i]["01"])*(dict[i]["00"]+dict[i]["10"])) | |
dict1[i] = x/y | |
return dict1 | |
def main(filename): | |
d = parse_file(filename) | |
distinct_events = set_events(d) | |
dict = calc_metrics(d, distinct_events) | |
dict1 = calc_phi(dict) ## Your calc_phi mutates the dict parameter which is a bad idea. | |
print(json.dumps(dict1, indent=10)) | |
#pprint.pprint(dict1, indent=10) | |
if __name__ == "__main__": # Import guard | |
main(sys.argv[1]) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment