Skip to content

Instantly share code, notes, and snippets.

@jbusecke
Created February 22, 2022 18:08
Show Gist options
  • Save jbusecke/7ed565ae29deaddcc4f3453944d0bc87 to your computer and use it in GitHub Desktop.
Save jbusecke/7ed565ae29deaddcc4f3453944d0bc87 to your computer and use it in GitHub Desktop.
Investigate differing retraction info on different ESGF nodes
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "15439f8f-5a70-4dc8-880a-ae8c166000e1",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/jovyan/pangeo-cmip6-cloud/retractions.py:1: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n",
" from tqdm.autonotebook import tqdm\n"
]
}
],
"source": [
"from retractions import query_retraction"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "08012f3f-1935-456f-8b85-3f8b8257156f",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Downloading Retraction Data...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "0eef916803514ca581f922954bb47a00",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/37 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting instance_ids...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "8fb1732f56394ea89fd2a81410306d06",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/37 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully downloaded all retraction info\n",
"1 replicas found\n",
"Downloading Retraction Data...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "2d648df3b050488eab332483a3659c9e",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/37 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Extracting instance_ids...\n"
]
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "f5476aa5b5b74474a5867cbb8bec5127",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
" 0%| | 0/37 [00:00<?, ?it/s]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Successfully downloaded all retraction info\n",
"0 replicas found\n"
]
}
],
"source": [
"urls = [\n",
"\"https://esgf-data.dkrz.de/esg-search/search\",\n",
"\"https://esgf-node.llnl.gov/esg-search/search\",\n",
"]\n",
"\n",
"params = {\n",
" \"type\": \"Dataset\",\n",
" \"mip_era\": \"CMIP6\",\n",
" \"replica\": \"false\",\n",
" \"distrib\": \"true\",\n",
" \"retracted\": \"true\",\n",
" \"format\": \"application/solr+json\",\n",
" \"fields\": \"instance_id\",\n",
"}\n",
"\n",
"# query every one of the nodes\n",
"retracted_ids = {url :query_retraction(url, params) for url in urls}"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cdf1df29-9525-442c-a91b-f6e54f9e6d48",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"retracted_ids_df = {k:pd.Series(list(v)).to_frame(name=\"instance_id\") for k,v in retracted_ids.items()}"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "034eff43-c3ff-4e2d-9c62-bf75f40b1083",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>instance_id</th>\n",
" <th>_merge</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>112</th>\n",
" <td>CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-histg...</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>505</th>\n",
" <td>CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-histn...</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>807</th>\n",
" <td>CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-hista...</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>980</th>\n",
" <td>CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.piControl.r1i...</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1094</th>\n",
" <td>CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-aer.r2i...</td>\n",
" <td>left_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184013</th>\n",
" <td>CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.esm-piControl.r...</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184014</th>\n",
" <td>CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1...</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184015</th>\n",
" <td>CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1...</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184016</th>\n",
" <td>CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.esm-hist.r1i1p1...</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" <tr>\n",
" <th>184017</th>\n",
" <td>CMIP6.CMIP.NOAA-GFDL.GFDL-CM4.piControl.r1i1p1...</td>\n",
" <td>right_only</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1336 rows × 2 columns</p>\n",
"</div>"
],
"text/plain": [
" instance_id _merge\n",
"112 CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-histg... left_only\n",
"505 CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-histn... left_only\n",
"807 CMIP6.RFMIP.NASA-GISS.GISS-E2-1-G.piClim-hista... left_only\n",
"980 CMIP6.CMIP.NASA-GISS.GISS-E2-1-G.piControl.r1i... left_only\n",
"1094 CMIP6.DAMIP.NASA-GISS.GISS-E2-1-G.hist-aer.r2i... left_only\n",
"... ... ...\n",
"184013 CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.esm-piControl.r... right_only\n",
"184014 CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1... right_only\n",
"184015 CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.historical.r1i1... right_only\n",
"184016 CMIP6.CMIP.NOAA-GFDL.GFDL-ESM4.esm-hist.r1i1p1... right_only\n",
"184017 CMIP6.CMIP.NOAA-GFDL.GFDL-CM4.piControl.r1i1p1... right_only\n",
"\n",
"[1336 rows x 2 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check the difference\n",
"\n",
"\n",
"merged = retracted_ids_df['https://esgf-data.dkrz.de/esg-search/search'].merge(\n",
" retracted_ids_df['https://esgf-node.llnl.gov/esg-search/search'],\n",
" how='outer', indicator=True\n",
")\n",
"different = merged[merged['_merge']!='both']\n",
"different"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "930575f1-9d11-4015-ab7d-e6742ec0e48c",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment