Last active
January 27, 2020 01:13
-
-
Save todpole3/6975c9e3cb405cc9b8625aa4c4a5ad9a to your computer and use it in GitHub Desktop.
Reproducing botocore.exceptions.ClientError occurred for HIT pages that contain emoji characters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Code adapted from | |
https://github.com/aws-samples/mturk-code-samples/blob/master/Python/CreateHitSample.py | |
Package version: | |
boto3==1.10.45 | |
Usage: | |
python3 reproduce_boto3_character_encoding_bug.py --aws_credentials /tmp/accessKeys.csv | |
""" | |
import argparse | |
import csv | |
import boto3 | |
# Before connecting to MTurk, set up your AWS account and IAM settings as | |
# described here: | |
# https://blog.mturk.com/how-to-use-iam-to-control-api-access-to-your-mturk-account-76fe2c2e66e2 | |
# | |
# Follow AWS best practices for setting up credentials here: | |
# http://boto3.readthedocs.io/en/latest/guide/configuration.html | |
# Use the Amazon Mechanical Turk Sandbox to publish test Human Intelligence | |
# Tasks (HITs) without paying any money. Sign up for a Sandbox account at | |
# https://requestersandbox.mturk.com/ with the same credentials as your main | |
# MTurk account. | |
parser = argparse.ArgumentParser(description="HIT submissions") | |
parser.add_argument("--aws_credentials", default=None, help="Path to AWS user credentials.") | |
args = parser.parse_args() | |
# By default, HITs are created in the free-to-use Sandbox | |
create_hits_in_live = False | |
environments = { | |
"live": { | |
"endpoint": "https://mturk-requester.us-east-1.amazonaws.com", | |
"preview": "https://www.mturk.com/mturk/preview", | |
"manage": "https://requester.mturk.com/mturk/manageHITs", | |
"reward": "0.00" | |
}, | |
"sandbox": { | |
"endpoint": "https://mturk-requester-sandbox.us-east-1.amazonaws.com", | |
"preview": "https://workersandbox.mturk.com/mturk/preview", | |
"manage": "https://requestersandbox.mturk.com/mturk/manageHITs", | |
"reward": "0.11" | |
}, | |
} | |
mturk_environment = environments["live"] if create_hits_in_live else environments["sandbox"] | |
# use profile if one was passed as an arg, otherwise | |
# Read user AWS credentials | |
with open(args.aws_credentials) as f: | |
reader = csv.DictReader(f) | |
for row in reader: | |
aws_access_key_id = row['Access key ID'] | |
aws_secret_access_key = row['Secret access key'] | |
session = boto3.Session( | |
aws_access_key_id=aws_access_key_id, | |
aws_secret_access_key=aws_secret_access_key) | |
client = session.client( | |
service_name='mturk', | |
region_name='us-east-1', | |
endpoint_url=mturk_environment['endpoint'], | |
) | |
# The question we ask the workers is contained in this file. | |
question_sample = """ | |
<HTMLQuestion xmlns="http://mechanicalturk.amazonaws.com/AWSMechanicalTurkDataSchemas/2011-11-11/HTMLQuestion.xsd"> | |
<HTMLContent><![CDATA[ | |
<!-- YOUR HTML BEGINS --> | |
<!DOCTYPE html> | |
<html> | |
<head> | |
<meta http-equiv='Content-Type' content='text/html; charset=UTF-8'/> | |
<script type='text/javascript' src='https://s3.amazonaws.com/mturk-public/externalHIT_v1.js'></script> | |
</head> | |
<body> | |
<form name='mturk_form' method='post' id='mturk_form' action='https://www.mturk.com/mturk/externalSubmit'><input type='hidden' value='' name='assignmentId' id='assignmentId'/> | |
<h2>Is this Tweet happy, angry, excited, scared, annoyed or upset? Type in one word to describe the main emotion in the message. If it is unclear, type in "unclear".</h2> | |
<h3> Tweet: "I am really looking forward to the next Seahawks game!"</h3> | |
<div> | |
<input type='text' name='reported_emotion' placeholder='Type in your answer here'> | |
👉 | |
</div> | |
<p><input type='submit' id='submitButton' value='Submit' /></p></form> | |
<script language='Javascript'>turkSetAssignmentID();</script> | |
</body></html> | |
<!-- YOUR HTML ENDS --> | |
]]> | |
</HTMLContent> | |
<FrameHeight>600</FrameHeight> | |
</HTMLQuestion> | |
""" | |
# Example of using qualification to restrict responses to Workers who have had | |
# at least 80% of their assignments approved. See: | |
# http://docs.aws.amazon.com/AWSMechTurk/latest/AWSMturkAPI/ApiReference_QualificationRequirementDataStructureArticle.html#ApiReference_QualificationType-IDs | |
worker_requirements = [{ | |
'QualificationTypeId': '000000000000000000L0', | |
'Comparator': 'GreaterThanOrEqualTo', | |
'IntegerValues': [80], | |
'RequiredToPreview': True, | |
}] | |
# Create the HIT | |
response = client.create_hit( | |
MaxAssignments=3, | |
LifetimeInSeconds=600, | |
AssignmentDurationInSeconds=600, | |
Reward=mturk_environment['reward'], | |
Title='Answer a simple question', | |
Keywords='question, answer, research', | |
Description='Answer a simple question. Created from mturk-code-samples.', | |
Question=question_sample, | |
QualificationRequirements=worker_requirements, | |
) | |
print("\nYou can work on the HIT groups here:") | |
print(mturk_environment['preview'] + "?groupId={}".format((response['HIT']['HITGroupId']))) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment