adammcmaster · January 14, 2020 16:13
diff --git a/p4-ridges-automl-transform.py b/p4-ridges-automl-transform.py
 import csv
 import json
 import os
 import re


 subject_sets = {
    '7585',
    '7870',
    '7888',
    '8385',
    '9845',
    '13286',
 }
 workflow_id = '2627'
 GCS_PATH_PREFIX = 'gs://golden-toolbox-239212-vcm/'
 RIDGE_THRESHOLD = 0.75
 FILE_PATH = os.path.join(os.environ['HOME'], 'Downloads')

 subjects = {}

 print('Loading subjects')

 with open(os.path.join(FILE_PATH, 'planet-four-ridges-subjects.csv')) as subj_file:
    r = csv.DictReader(subj_file)
    for row in r:
        if not row['subject_set_id'] in subject_sets:
            continue
        location = re.sub(r'^https://', GCS_PATH_PREFIX, json.loads(row['locations']).values()[0])
        subjects[row['subject_id']] = {
            'location': location,
            'yes_votes': 0.0,
            'no_votes': 0.0,
        }

 print('Counting votes from classifications')

 with open(os.path.join(FILE_PATH, 'polygonal-ridge-workflow-classifications.csv')) as class_file:
    r = csv.DictReader(class_file)
    for row in r:
        if (
            row['workflow_id'] != workflow_id
            or row['subject_ids'] not in subjects
        ):
            continue

        annotations = json.loads(row['annotations'])
        for annotation in annotations:
            if annotation['task'] == 'T0' and annotation['value']:
                if annotation['value'].lower() == 'yes':
                    subjects[row['subject_ids']]['yes_votes'] += 1.0
                elif annotation['value'].lower() == 'no':
                    subjects[row['subject_ids']]['no_votes'] += 1.0
                break

 print('Outputting results')

 with open(os.path.join(FILE_PATH, 'planet-four-ridges-automl.csv'), 'w') as out_file:
    w = csv.writer(out_file)
    for subject_id, subject in subjects.iteritems():
        vote_total = subject['yes_votes'] + subject['no_votes']
        if vote_total == 0:
            continue
        yes_proportion = subject['yes_votes'] / vote_total
        if yes_proportion >= RIDGE_THRESHOLD:
            subject_class = 'ridge'
        else:
            subject_class = 'no_ridge'
        w.writerow((subject['location'], subject_class))
	import csv
	import json
	import os
	import re


	subject_sets = {
	'7585',
	'7870',
	'7888',
	'8385',
	'9845',
	'13286',
	}
	workflow_id = '2627'
	GCS_PATH_PREFIX = 'gs://golden-toolbox-239212-vcm/'
	RIDGE_THRESHOLD = 0.75
	FILE_PATH = os.path.join(os.environ['HOME'], 'Downloads')

	subjects = {}

	print('Loading subjects')

	with open(os.path.join(FILE_PATH, 'planet-four-ridges-subjects.csv')) as subj_file:
	r = csv.DictReader(subj_file)
	for row in r:
	if not row['subject_set_id'] in subject_sets:
	continue
	location = re.sub(r'^https://', GCS_PATH_PREFIX, json.loads(row['locations']).values()[0])
	subjects[row['subject_id']] = {
	'location': location,
	'yes_votes': 0.0,
	'no_votes': 0.0,
	}

	print('Counting votes from classifications')

	with open(os.path.join(FILE_PATH, 'polygonal-ridge-workflow-classifications.csv')) as class_file:
	r = csv.DictReader(class_file)
	for row in r:
	if (
	row['workflow_id'] != workflow_id
	or row['subject_ids'] not in subjects
	):
	continue

	annotations = json.loads(row['annotations'])
	for annotation in annotations:
	if annotation['task'] == 'T0' and annotation['value']:
	if annotation['value'].lower() == 'yes':
	subjects[row['subject_ids']]['yes_votes'] += 1.0
	elif annotation['value'].lower() == 'no':
	subjects[row['subject_ids']]['no_votes'] += 1.0
	break

	print('Outputting results')

	with open(os.path.join(FILE_PATH, 'planet-four-ridges-automl.csv'), 'w') as out_file:
	w = csv.writer(out_file)
	for subject_id, subject in subjects.iteritems():
	vote_total = subject['yes_votes'] + subject['no_votes']
	if vote_total == 0:
	continue
	yes_proportion = subject['yes_votes'] / vote_total
	if yes_proportion >= RIDGE_THRESHOLD:
	subject_class = 'ridge'
	else:
	subject_class = 'no_ridge'
	w.writerow((subject['location'], subject_class))
No results found