Skip to content

Instantly share code, notes, and snippets.

@amueller
Created September 28, 2018 16:29
Show Gist options
  • Save amueller/6c4f16a4d7c9edebb111d819873c85d0 to your computer and use it in GitHub Desktop.
Save amueller/6c4f16a4d7c9edebb111d819873c85d0 to your computer and use it in GitHub Desktop.
parsing in preparation datasets on openml
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import openml"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"datasets = openml.datasets.list_datasets(status=\"in_preparation\")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"17377"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(datasets)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
"not_qsar = {k : v for k,v in datasets.items() if v['name'].find(\"QSAR\") == -1}"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"331"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(not_qsar)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854941/accelerometry.csv.\n",
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n",
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854942/infrawatch.csv.\n",
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n",
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854943/running.csv.\n",
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n",
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/1854944/snowboard.csv.\n",
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n",
"/home/andy/checkout/openml-python/openml/_api_calls.py:101: UserWarning: Received uncompressed content from OpenML for https://www.openml.org/data/v1/download/18661014/WeatherMael.arff.\n",
" warnings.warn('Received uncompressed content from OpenML for %s.' % url)\n"
]
}
],
"source": [
"failures = []\n",
"for did in not_qsar.keys():\n",
" try:\n",
" openml.datasets.get_dataset(did)\n",
" except:\n",
" failures.append(did)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"75"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(failures)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[1231, 1243, 1244, 1438, 1576, 1947, 4536, 4539, 4670, 4800, 6333, 6334, 6335, 6336, 23389, 23411, 23417, 23418, 23419, 23425, 23428, 23455, 23466, 23485, 23490, 23500, 23501, 23502, 23503, 23504, 23505, 23506, 23507, 23510, 23511, 35983, 40471, 40500, 40501, 40508, 40510, 40521, 40533, 40534, 40599, 40629, 40716, 40717, 40718, 40719, 40720, 40721, 40722, 40723, 40724, 40731, 40737, 40746, 40750, 40751, 40757, 40758, 40818, 40968, 41018, 41019, 41042, 41043, 41074, 41102, 41113, 41114, 41115, 41116, 41190]\n"
]
}
],
"source": [
"print(failures)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python [conda env:py37]",
"language": "python",
"name": "conda-env-py37-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment