Augment via REST and python requests
def run_augment_test ():
"""Augment via REST"""
# Query result
#
task_data = """{"id": "datamart.socrata.data-cityofnewyork-us.k2tc-bipg", "score": 8.788908, "metadata": {"name": "Medallion Taxi Initial Inspection Schedule", "description": "This is a schedule of Medallion Taxicab initial inspections at the Taxi and Limousine Commission\\ u2019...", "size": 686334, "nb_rows": 13537, "columns": [{"name": "Medallion_Number", "structural_type": "http://schema.org/Text", "semantic_types": []}, {"name": "Schedule_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1566600510.95516, "stddev": 3029873.8282078225, "coverage": [{"range": {"gte": 1561334400.0, "lte": 1571788800.0}}]}, {"name": "Schedule_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1561359725.6562016, "stddev": 4632.38259222503, "coverage": [{"range": {"gte": 1561352400.0, "lte": 1561374000.0}}]}, {"name": "Fleet_Agent_Code", "structural_type": "http://schema.org/Integer", "semantic_types": [], "mean": 148.76346310113024, "stddev": 140.04502936441713, "coverage": [{"range": {"gte": 0.0, "lte": 999.0}}]}, {"name": "Last_Updated_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561248000.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561248000.0, "lte": 1561248000.0}}]}, {"name": "Last_Updated_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561399296.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561399200.766, "lte": 1561399200.766}}]}], "materialize": {"socrata_id": "k2tc-bipg", "socrata_domain": "data.cityofnewyork.us", "socrata_updated": "2019-06-23T22:10:10.000Z", "direct_url": "https://data.cityofnewyork.us/api/views/k2tc-bipg/rows.csv?accessType=DOWNLOAD", "identifier": "datamart.socrata", "date": "2019-06-24T02:01:08.225169Z"}, "date": "2019-06-24T03:19:48.375479Z"}, "augmentation": {"type": "none", "left_columns": [], "right_columns": []}}"""
# syntax check
#
assert json .loads (task_data ), 'task_data not valid json'
# Base file to augment. Has two columns: Medallion_Number, Earnings
# 2 columns x 20 rows
#
input_fpath = join (INPUT_DIR , 'medallion-test-file.csv' )
assert isfile (input_fpath ), f'File not found: { input_fpath } '
# Set file for request
#
files = {'data' : open (input_fpath , 'rb' )}
# Set headers
#
headers = {"Content-Type" : "multipart/form-data" }
# Augment url
#
augment_url = 'https://datamart.d3m.vida-nyu.org/augment'
print ('task_data' , task_data )
print ('-' * 40 )
print ('augment_url' , augment_url )
print ('-' * 40 )
# Make request
#
try :
response = requests .post (augment_url ,
headers = headers ,
files = files ,
data = task_data ,
verify = False ,
stream = True )
except requests .exceptions .Timeout as err_obj :
user_msg = ('Request timed out. responded with: %s' % err_obj )
print (user_msg )
return
except ValueError as err_obj :
user_msg = ('ValueError: %s' % err_obj )
print (user_msg )
return
if response .status_code != 200 :
user_msg = (f'Augment failed. Status code:'
f' { response .status_code } . response: { response .text } ' )
print (user_msg )
return
print ('augment success!' )
return
data_foldername = join (OUTPUT_DIR , 'augment-results' ,)
if not isdir (data_foldername ):
os .makedirs (data_foldername )
try :
with zipfile .ZipFile (BytesIO (response .content ), 'r' ) as data_zip :
data_zip .extractall (data_foldername )
except RuntimeError as err_obj :
user_msg = (f'Failed to extract zip to "{ data_foldername } ".'
f' Error: %s' ) % (err_obj ,)
print (user_msg )
msgt ('files downloaded to %s' % data_foldername )
(nyu_datamart) dev_scripts $ python search_test3.py
task_data {"id": "datamart.socrata.data-cityofnewyork-us.k2tc-bipg", "score": 8.788908, "metadata": {"name": "Medallion Taxi Initial Inspection Schedule", "description": "This is a schedule of Medallion Taxicab initial inspections at the Taxi and Limousine Commission\u2019...", "size": 686334, "nb_rows": 13537, "columns": [{"name": "Medallion_Number", "structural_type": "http://schema.org/Text", "semantic_types": []}, {"name": "Schedule_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1566600510.95516, "stddev": 3029873.8282078225, "coverage": [{"range": {"gte": 1561334400.0, "lte": 1571788800.0}}]}, {"name": "Schedule_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["http://schema.org/DateTime"], "mean": 1561359725.6562016, "stddev": 4632.38259222503, "coverage": [{"range": {"gte": 1561352400.0, "lte": 1561374000.0}}]}, {"name": "Fleet_Agent_Code", "structural_type": "http://schema.org/Integer", "semantic_types": [], "mean": 148.76346310113024, "stddev": 140.04502936441713, "coverage": [{"range": {"gte": 0.0, "lte": 999.0}}]}, {"name": "Last_Updated_Date", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561248000.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561248000.0, "lte": 1561248000.0}}]}, {"name": "Last_Updated_Time", "structural_type": "http://schema.org/Text", "semantic_types": ["https://schema.org/Enumeration", "http://schema.org/DateTime"], "mean": 1561399296.0, "stddev": 0.0, "coverage": [{"range": {"gte": 1561399200.766, "lte": 1561399200.766}}]}], "materialize": {"socrata_id": "k2tc-bipg", "socrata_domain": "data.cityofnewyork.us", "socrata_updated": "2019-06-23T22:10:10.000Z", "direct_url": "https://data.cityofnewyork.us/api/views/k2tc-bipg/rows.csv?accessType=DOWNLOAD", "identifier": "datamart.socrata", "date": "2019-06-24T02:01:08.225169Z"}, "date": "2019-06-24T03:19:48.375479Z"}, "augmentation": {"type": "none", "left_columns": [], "right_columns": []}}
----------------------------------------
augment_url https://datamart.d3m.vida-nyu.org/augment
----------------------------------------
Traceback (most recent call last):
File "search_test3.py", line 137, in <module>
run_augment_test()
File "search_test3.py", line 103, in run_augment_test
stream=True)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/api.py", line 112, in post
return request('post', url, data=data, json=json, **kwargs)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/api.py", line 58, in request
return session.request(method=method, url=url, **kwargs)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/sessions.py", line 498, in request
prep = self.prepare_request(req)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/sessions.py", line 441, in prepare_request
hooks=merge_hooks(request.hooks, self.hooks),
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 312, in prepare
self.prepare_body(data, files, json)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 500, in prepare_body
(body, content_type) = self._encode_files(files, data)
File "/Users/ramanprasad/.virtualenvs/nyu_datamart/lib/python3.6/site-packages/requests/models.py", line 122, in _encode_files
raise ValueError("Data must not be a string.")
ValueError: Data must not be a string.