Created
September 28, 2018 16:29
-
-
Save amueller/6c4f16a4d7c9edebb111d819873c85d0 to your computer and use it in GitHub Desktop.
parsing in preparation datasets on openml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import openml" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"datasets = openml.datasets.list_datasets(status=\"in_preparation\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"17377" | |
] | |
}, | |
"execution_count": 3, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(datasets)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"not_qsar = {k : v for k,v in datasets.items() if v['name'].find(\"QSAR\") == -1}" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"331" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(not_qsar)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stderr", | |
"output_type": "stream", | |
"text": [ | |
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854941/accelerometry.csv.\n", | |
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n", | |
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854942/infrawatch.csv.\n", | |
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n", | |
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854943/running.csv.\n", | |
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n", | |
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854944/snowboard.csv.\n", | |
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n", | |
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/18661014/WeatherMael.arff.\n", | |
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n" | |
] | |
} | |
], | |
"source": [ | |
"failures = []\n", | |
"for did in not_qsar.keys():\n", | |
" try:\n", | |
" openml.datasets.get_dataset(did)\n", | |
" except:\n", | |
" failures.append(did)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 18, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"75" | |
] | |
}, | |
"execution_count": 18, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"len(failures)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 20, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"[1231, 1243, 1244, 1438, 1576, 1947, 4536, 4539, 4670, 4800, 6333, 6334, 6335, 6336, 23389, 23411, 23417, 23418, 23419, 23425, 23428, 23455, 23466, 23485, 23490, 23500, 23501, 23502, 23503, 23504, 23505, 23506, 23507, 23510, 23511, 35983, 40471, 40500, 40501, 40508, 40510, 40521, 40533, 40534, 40599, 40629, 40716, 40717, 40718, 40719, 40720, 40721, 40722, 40723, 40724, 40731, 40737, 40746, 40750, 40751, 40757, 40758, 40818, 40968, 41018, 41019, 41042, 41043, 41074, 41102, 41113, 41114, 41115, 41116, 41190]\n" | |
] | |
} | |
], | |
"source": [ | |
"print(failures)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python [conda env:py37]", | |
"language": "python", | |
"name": "conda-env-py37-py" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.0" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment