Created
February 7, 2021 01:28
-
-
Save glennklockwood/2c5aed8af7eda9de0f4893119baf0d63 to your computer and use it in GitHub Desktop.
Convert Darshan log to Parquet
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import re\n", | |
"import json\n", | |
"\n", | |
"import pandas\n", | |
"import pyarrow\n", | |
"import pyarrow.parquet\n", | |
"\n", | |
"import tokio" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"DARSHAN_LOG = 'run.16/glock_python_id4773289_2-3-57796-6844918380606652705_1612397099.darshan'\n", | |
"DARSHAN_PARSER = '/global/u2/g/glock/apps.cori-haswell/darshan-3.2.1+dxtstdio/bin/darshan-parser'" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def records(darshanlog):\n", | |
" \"\"\"Iterator that produces flat records from a Darshan log.\n", | |
" \"\"\"\n", | |
" for modulename, moduledata in darshanlog.get('counters', {}).items():\n", | |
" for filename, filedata in moduledata.items():\n", | |
" for rankname, rankdata in filedata.items():\n", | |
" record = {\n", | |
" 'module': modulename,\n", | |
" 'file': filename,\n", | |
" 'rank': rankname,\n", | |
" }\n", | |
" record.update({key.lower(): value for key, value in rankdata.items()})\n", | |
" yield record" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Load the Darshan log\n", | |
"darshan = tokio.connectors.darshan.Darshan(DARSHAN_LOG)\n", | |
"darshan.subprocess_cmd = [DARSHAN_PARSER]\n", | |
"_ = darshan.darshan_parser_base()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>access1_access</th>\n", | |
" <th>access1_count</th>\n", | |
" <th>access2_access</th>\n", | |
" <th>access2_count</th>\n", | |
" <th>access3_access</th>\n", | |
" <th>access3_count</th>\n", | |
" <th>access4_access</th>\n", | |
" <th>access4_count</th>\n", | |
" <th>bytes_read</th>\n", | |
" <th>bytes_written</th>\n", | |
" <th>...</th>\n", | |
" <th>stats</th>\n", | |
" <th>stride1_count</th>\n", | |
" <th>stride1_stride</th>\n", | |
" <th>stride2_count</th>\n", | |
" <th>stride2_stride</th>\n", | |
" <th>stride3_count</th>\n", | |
" <th>stride3_stride</th>\n", | |
" <th>stride4_count</th>\n", | |
" <th>stride4_stride</th>\n", | |
" <th>writes</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>module</th>\n", | |
" <th>file</th>\n", | |
" <th>rank</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">posix</th>\n", | |
" <th><STDIN></th>\n", | |
" <th>0</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th><STDERR></th>\n", | |
" <th>0</th>\n", | |
" <td>593.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>593</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th><STDOUT></th>\n", | |
" <th>0</th>\n", | |
" <td>164.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2510.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>650.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>281.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>3769</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>/tmp/gn0ph1tz</th>\n", | |
" <th>0</th>\n", | |
" <td>4.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>/proc/22524/status</th>\n", | |
" <th>0</th>\n", | |
" <td>1220.0</td>\n", | |
" <td>28.0</td>\n", | |
" <td>1218.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>36596</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>30.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 88 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" access1_access access1_count access2_access \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 593.0 1.0 0.0 \n", | |
" <STDOUT> 0 164.0 2.0 2510.0 \n", | |
" /tmp/gn0ph1tz 0 4.0 1.0 0.0 \n", | |
" /proc/22524/status 0 1220.0 28.0 1218.0 \n", | |
"\n", | |
" access2_count access3_access access3_count \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 1.0 650.0 1.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 2.0 0.0 0.0 \n", | |
"\n", | |
" access4_access access4_count bytes_read \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0 \n", | |
" <STDERR> 0 0.0 0.0 0 \n", | |
" <STDOUT> 0 281.0 1.0 0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0 \n", | |
" /proc/22524/status 0 0.0 0.0 36596 \n", | |
"\n", | |
" bytes_written ... stats stride1_count \\\n", | |
"module file rank ... \n", | |
"posix <STDIN> 0 0 ... 0.0 0.0 \n", | |
" <STDERR> 0 593 ... 0.0 0.0 \n", | |
" <STDOUT> 0 3769 ... 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 4 ... 1.0 0.0 \n", | |
" /proc/22524/status 0 0 ... 30.0 0.0 \n", | |
"\n", | |
" stride1_stride stride2_count stride2_stride \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 0.0 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 0.0 0.0 0.0 \n", | |
"\n", | |
" stride3_count stride3_stride stride4_count \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 0.0 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 0.0 0.0 0.0 \n", | |
"\n", | |
" stride4_stride writes \n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0 \n", | |
" <STDERR> 0 0.0 1 \n", | |
" <STDOUT> 0 0.0 5 \n", | |
" /tmp/gn0ph1tz 0 0.0 1 \n", | |
" /proc/22524/status 0 0.0 0 \n", | |
"\n", | |
"[5 rows x 88 columns]" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Create a DataFrame using the records() iterator and convert to a multi-index\n", | |
"df = pandas.DataFrame.from_records([x for x in records(darshan)]).set_index(['module', 'file', 'rank'])\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Convert the DataFrame to a pyarrow table, preserving the multiindex\n", | |
"table = pyarrow.Table.from_pandas(df)\n", | |
"\n", | |
"# Also preserve the header metadata and mount table from the Darshan log\n", | |
"metadata = table.schema.metadata # this is a shallow copy\n", | |
"metadata.update({\n", | |
" 'darshan'.encode(): json.dumps({\n", | |
" \"header\": darshan.get('header', {}),\n", | |
" \"mounts\": darshan.get('mounts', {})\n", | |
" }).encode()\n", | |
"})\n", | |
"\n", | |
"table = table.replace_schema_metadata(metadata)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Wrote output to /global/cfs/cdirs/m888/glock/workloads/mock-dustin/run.16/glock_python_id4773289_2-3-57796-6844918380606652705_1612397099.parquet\n" | |
] | |
} | |
], | |
"source": [ | |
"# Save the pyarrow table as a Parquet file\n", | |
"output_file = re.sub('\\.darshan$', '.parquet', DARSHAN_LOG)\n", | |
"pyarrow.parquet.write_table(table, output_file)\n", | |
"print(\"Wrote output to {}\".format(output_file))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th>access1_access</th>\n", | |
" <th>access1_count</th>\n", | |
" <th>access2_access</th>\n", | |
" <th>access2_count</th>\n", | |
" <th>access3_access</th>\n", | |
" <th>access3_count</th>\n", | |
" <th>access4_access</th>\n", | |
" <th>access4_count</th>\n", | |
" <th>bytes_read</th>\n", | |
" <th>bytes_written</th>\n", | |
" <th>...</th>\n", | |
" <th>stats</th>\n", | |
" <th>stride1_count</th>\n", | |
" <th>stride1_stride</th>\n", | |
" <th>stride2_count</th>\n", | |
" <th>stride2_stride</th>\n", | |
" <th>stride3_count</th>\n", | |
" <th>stride3_stride</th>\n", | |
" <th>stride4_count</th>\n", | |
" <th>stride4_stride</th>\n", | |
" <th>writes</th>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>module</th>\n", | |
" <th>file</th>\n", | |
" <th>rank</th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" <th></th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th rowspan=\"5\" valign=\"top\">posix</th>\n", | |
" <th><STDIN></th>\n", | |
" <th>0</th>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th><STDERR></th>\n", | |
" <th>0</th>\n", | |
" <td>593.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>593</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th><STDOUT></th>\n", | |
" <th>0</th>\n", | |
" <td>164.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>2510.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>650.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>281.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0</td>\n", | |
" <td>3769</td>\n", | |
" <td>...</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>5</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>/tmp/gn0ph1tz</th>\n", | |
" <th>0</th>\n", | |
" <td>4.0</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>...</td>\n", | |
" <td>1.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>1</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>/proc/22524/status</th>\n", | |
" <th>0</th>\n", | |
" <td>1220.0</td>\n", | |
" <td>28.0</td>\n", | |
" <td>1218.0</td>\n", | |
" <td>2.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>36596</td>\n", | |
" <td>0</td>\n", | |
" <td>...</td>\n", | |
" <td>30.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0.0</td>\n", | |
" <td>0</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 88 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" access1_access access1_count access2_access \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 593.0 1.0 0.0 \n", | |
" <STDOUT> 0 164.0 2.0 2510.0 \n", | |
" /tmp/gn0ph1tz 0 4.0 1.0 0.0 \n", | |
" /proc/22524/status 0 1220.0 28.0 1218.0 \n", | |
"\n", | |
" access2_count access3_access access3_count \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 1.0 650.0 1.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 2.0 0.0 0.0 \n", | |
"\n", | |
" access4_access access4_count bytes_read \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0 \n", | |
" <STDERR> 0 0.0 0.0 0 \n", | |
" <STDOUT> 0 281.0 1.0 0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0 \n", | |
" /proc/22524/status 0 0.0 0.0 36596 \n", | |
"\n", | |
" bytes_written ... stats stride1_count \\\n", | |
"module file rank ... \n", | |
"posix <STDIN> 0 0 ... 0.0 0.0 \n", | |
" <STDERR> 0 593 ... 0.0 0.0 \n", | |
" <STDOUT> 0 3769 ... 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 4 ... 1.0 0.0 \n", | |
" /proc/22524/status 0 0 ... 30.0 0.0 \n", | |
"\n", | |
" stride1_stride stride2_count stride2_stride \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 0.0 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 0.0 0.0 0.0 \n", | |
"\n", | |
" stride3_count stride3_stride stride4_count \\\n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0.0 0.0 \n", | |
" <STDERR> 0 0.0 0.0 0.0 \n", | |
" <STDOUT> 0 0.0 0.0 0.0 \n", | |
" /tmp/gn0ph1tz 0 0.0 0.0 0.0 \n", | |
" /proc/22524/status 0 0.0 0.0 0.0 \n", | |
"\n", | |
" stride4_stride writes \n", | |
"module file rank \n", | |
"posix <STDIN> 0 0.0 0 \n", | |
" <STDERR> 0 0.0 1 \n", | |
" <STDOUT> 0 0.0 5 \n", | |
" /tmp/gn0ph1tz 0 0.0 1 \n", | |
" /proc/22524/status 0 0.0 0 \n", | |
"\n", | |
"[5 rows x 88 columns]" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Read the parquet file back in as a pyarrow table\n", | |
"table_verify = pyarrow.parquet.read_table(output_file)\n", | |
"\n", | |
"# Convert it to a DataFrame - use pyarrow instead of Pandas to preserve multiindex\n", | |
"df_verify = table_verify.to_pandas()\n", | |
"\n", | |
"# Verify multiindex is preserved\n", | |
"df_verify.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"{'header': {'version': '3.21', 'compression': 'ZLIB', 'exe': ['python', '-O', '/src/legacypipe/py/legacypipe/forced_photom.py', '--survey-dir', '/global/cfs/cdirs/cosmo/work/legacysurvey/dr9m', '--catalog-dir-north', '/global/cfs/cdirs/cosmo/work/legacysurvey/dr9m/north', '--catalog-dir-south', '/global/cfs/cdirs/cosmo/work/legacysurvey/dr9m/south', '--catalog-resolve-dec-ngc', '32.375', '--skip-calibs', '--apphot', '--derivs', '--outlier-mask', '--camera', 'decam', '--expnum', '587427', '--out-dir', '/global/cfs/cdirs/m888/glock/workloads/mock-dustin/mock-dustin-out', '--ccdname', 'N4'], 'uid': 69615, 'jobid': '4773289', 'start_time': 1612396996, 'start_time_string': 'Wed Feb 3 16:03:16 2021', 'end_time': 1612397098, 'end_time_string': 'Wed Feb 3 16:04:58 2021', 'nprocs': 1, 'walltime': 103, 'metadata': ['lib_ver = 3.2.1', 'h = romio_no_indep_rw=true;cb_nodes=4']}, 'mounts': {'/var/lib/hugetlbfs/global/pagesize-1073741824': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-2147483648': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-134217728': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-268435456': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-536870912': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-16777216': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-33554432': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-67108864': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-2097152': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-4194304': 'hugetlbfs', '/var/lib/hugetlbfs/global/pagesize-8388608': 'hugetlbfs', '/opt/cray/pe/modulefiles/craype/.version': 'overlay', '/global/project/projectdirs': 'dvs', '/var/opt/cray/alps/spool': 'devtmpfs', '/etc/opt/cray/wlm_detect': 'overlay', '/var/opt/cray/alps': 'overlay', '/var/lib/hugetlbfs': 'overlay', '/global/gscratch1': 'lustre', '/var/opt/cray/dws': 'overlay', '/global/projecta': 'overlay', '/global/projectb': 'dvs', '/global/common': 'dvs', '/global/syscom': 'dvs', '/dev/hugepages': 'hugetlbfs', '/opt/cray/pe': 'squashfs', '/var/backups': 'squashfs', '/global/dna': 'dvs', '/global/cfs': 'dvs', '/dev/mqueue': 'mqueue', '/global/u1': 'dvs', '/global/u2': 'dvs', '/var/cache': 'squashfs', '/var/local': 'squashfs', '/opt/conda': 'squashfs', '/opt/intel': 'squashfs', '/opt/cray': 'overlay', '/opt/mods': 'overlay', '/var/mail': 'squashfs', '/dev/shm': 'tmpfs', '/dev/pts': 'devpts', '/var/tmp': 'tmpfs', '/homedir': 'squashfs', '/var/log': 'squashfs', '/lib64': 'squashfs', '/media': 'squashfs', '/proc': 'proc', '/boot': 'squashfs', '/home': 'squashfs', '/root': 'squashfs', '/sbin': 'squashfs', '/sys': 'sysfs', '/dev': 'devtmpfs', '/tmp': 'tmpfs', '/bin': 'squashfs', '/lib': 'squashfs', '/mnt': 'squashfs', '/run': 'squashfs', '/src': 'squashfs', '/srv': 'squashfs', '/usr': 'squashfs', '/': 'ramfs'}}\n" | |
] | |
} | |
], | |
"source": [ | |
"# Verify Darshan metadata was preserved\n", | |
"metadata = json.loads(table_verify.schema.metadata['darshan'.encode()])\n", | |
"print(metadata)" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "pytokio-prod", | |
"language": "python", | |
"name": "pytokio-prod" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 4 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment