Skip to content

Instantly share code, notes, and snippets.

@rwcitek
Last active June 15, 2025 02:16
Show Gist options
  • Save rwcitek/9dd47da6c4e7a6fb94f0375a64c5543f to your computer and use it in GitHub Desktop.
Save rwcitek/9dd47da6c4e7a6fb94f0375a64c5543f to your computer and use it in GitHub Desktop.
large-df-parquet-csv.ipynb
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/rwcitek/9dd47da6c4e7a6fb94f0375a64c5543f/large-df-parquet-csv.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "lIYdn1woOS1n"
},
"outputs": [],
"source": [
"import pandas as pd\n",
"import numpy as np\n"
]
},
{
"cell_type": "code",
"source": [
"n=20_000\n",
"n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "jqvkuL0p7Dlg",
"outputId": "846db0c5-4dcc-4377-8198-8ecc5b0c723a"
},
"execution_count": 2,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"20000"
]
},
"metadata": {},
"execution_count": 2
}
]
},
{
"cell_type": "code",
"source": [
"df1 = pd.DataFrame()\n",
"df1\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 89
},
"id": "xRks_9N36N_N",
"outputId": "83771391-8e2f-4e67-8ed0-cfc7426e4b1b"
},
"execution_count": 3,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"Empty DataFrame\n",
"Columns: []\n",
"Index: []"
],
"text/html": [
"\n",
" <div id=\"df-2d5d6deb-41dd-4dd4-b951-a5a0df87565e\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-2d5d6deb-41dd-4dd4-b951-a5a0df87565e')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-2d5d6deb-41dd-4dd4-b951-a5a0df87565e button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-2d5d6deb-41dd-4dd4-b951-a5a0df87565e');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
" <div id=\"id_6a905560-fa81-44ca-9e1c-27641a04635e\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df1')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_6a905560-fa81-44ca-9e1c-27641a04635e button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df1');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df1",
"summary": "{\n \"name\": \"df1\",\n \"rows\": 0,\n \"fields\": []\n}"
}
},
"metadata": {},
"execution_count": 3
}
]
},
{
"cell_type": "code",
"source": [
"df1[\"A\"] = np.arange(1,n+1).astype(\"uint16\")\n",
"df1\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 423
},
"id": "WrcwKt1W6W1k",
"outputId": "933ae66b-b5e5-4874-abb8-4c1b3c64ed91"
},
"execution_count": 4,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" A\n",
"0 1\n",
"1 2\n",
"2 3\n",
"3 4\n",
"4 5\n",
"... ...\n",
"19995 19996\n",
"19996 19997\n",
"19997 19998\n",
"19998 19999\n",
"19999 20000\n",
"\n",
"[20000 rows x 1 columns]"
],
"text/html": [
"\n",
" <div id=\"df-7f6b8821-b4c4-463a-b01c-f1baba8abb6d\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>A</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19995</th>\n",
" <td>19996</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19996</th>\n",
" <td>19997</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19997</th>\n",
" <td>19998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19998</th>\n",
" <td>19999</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19999</th>\n",
" <td>20000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20000 rows × 1 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7f6b8821-b4c4-463a-b01c-f1baba8abb6d')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-7f6b8821-b4c4-463a-b01c-f1baba8abb6d button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-7f6b8821-b4c4-463a-b01c-f1baba8abb6d');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
" <div id=\"df-e8fe8f95-8822-4b58-b07f-d76e1982ab77\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-e8fe8f95-8822-4b58-b07f-d76e1982ab77')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-e8fe8f95-8822-4b58-b07f-d76e1982ab77 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" <div id=\"id_79e2697f-ab3a-41d9-9eea-bc7b3fba9b2e\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df1')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_79e2697f-ab3a-41d9-9eea-bc7b3fba9b2e button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df1');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df1",
"summary": "{\n \"name\": \"df1\",\n \"rows\": 20000,\n \"fields\": [\n {\n \"column\": \"A\",\n \"properties\": {\n \"dtype\": \"uint16\",\n \"num_unique_values\": 20000,\n \"samples\": [\n 10651,\n 2042,\n 8669\n ],\n \"semantic_type\": \"\",\n \"description\": \"\"\n }\n }\n ]\n}"
}
},
"metadata": {},
"execution_count": 4
}
]
},
{
"cell_type": "code",
"source": [
"df2 = df1.transpose()\n",
"df2\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 110
},
"id": "6DwzUc-q6dl9",
"outputId": "81c54277-8ed4-4ef5-d9f7-6d0a51a7374e"
},
"execution_count": 5,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 ... \\\n",
"A 1 2 3 4 5 6 7 8 9 10 ... \n",
"\n",
" 19990 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"A 19991 19992 19993 19994 19995 19996 19997 19998 19999 20000 \n",
"\n",
"[1 rows x 20000 columns]"
],
"text/html": [
"\n",
" <div id=\"df-c51f9680-facc-425e-9b75-94db8dea2926\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>19990</th>\n",
" <th>19991</th>\n",
" <th>19992</th>\n",
" <th>19993</th>\n",
" <th>19994</th>\n",
" <th>19995</th>\n",
" <th>19996</th>\n",
" <th>19997</th>\n",
" <th>19998</th>\n",
" <th>19999</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>A</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1 rows × 20000 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-c51f9680-facc-425e-9b75-94db8dea2926')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-c51f9680-facc-425e-9b75-94db8dea2926 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-c51f9680-facc-425e-9b75-94db8dea2926');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
" <div id=\"id_b7ef078e-8b4a-4346-a82f-cbd46cc8b8f7\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df2')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_b7ef078e-8b4a-4346-a82f-cbd46cc8b8f7 button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df2');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df2"
}
},
"metadata": {},
"execution_count": 5
}
]
},
{
"cell_type": "code",
"source": [
"df3 = df1.merge( df2, how=\"cross\").set_index(\"A\")\n",
"df3\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 454
},
"id": "Er7cumoI6kaq",
"outputId": "c056015d-1719-46ea-9411-2439b452e620"
},
"execution_count": 6,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
" 0 1 2 3 4 5 6 7 8 9 \\\n",
"A \n",
"1 1 2 3 4 5 6 7 8 9 10 \n",
"2 1 2 3 4 5 6 7 8 9 10 \n",
"3 1 2 3 4 5 6 7 8 9 10 \n",
"4 1 2 3 4 5 6 7 8 9 10 \n",
"5 1 2 3 4 5 6 7 8 9 10 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"19996 1 2 3 4 5 6 7 8 9 10 \n",
"19997 1 2 3 4 5 6 7 8 9 10 \n",
"19998 1 2 3 4 5 6 7 8 9 10 \n",
"19999 1 2 3 4 5 6 7 8 9 10 \n",
"20000 1 2 3 4 5 6 7 8 9 10 \n",
"\n",
" ... 19990 19991 19992 19993 19994 19995 19996 19997 19998 \\\n",
"A ... \n",
"1 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"2 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"3 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"4 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"5 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"... ... ... ... ... ... ... ... ... ... ... \n",
"19996 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"19997 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"19998 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"19999 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"20000 ... 19991 19992 19993 19994 19995 19996 19997 19998 19999 \n",
"\n",
" 19999 \n",
"A \n",
"1 20000 \n",
"2 20000 \n",
"3 20000 \n",
"4 20000 \n",
"5 20000 \n",
"... ... \n",
"19996 20000 \n",
"19997 20000 \n",
"19998 20000 \n",
"19999 20000 \n",
"20000 20000 \n",
"\n",
"[20000 rows x 20000 columns]"
],
"text/html": [
"\n",
" <div id=\"df-03edbbe5-3b0a-4e34-9ce4-5b1ff26d6c86\" class=\"colab-df-container\">\n",
" <div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>0</th>\n",
" <th>1</th>\n",
" <th>2</th>\n",
" <th>3</th>\n",
" <th>4</th>\n",
" <th>5</th>\n",
" <th>6</th>\n",
" <th>7</th>\n",
" <th>8</th>\n",
" <th>9</th>\n",
" <th>...</th>\n",
" <th>19990</th>\n",
" <th>19991</th>\n",
" <th>19992</th>\n",
" <th>19993</th>\n",
" <th>19994</th>\n",
" <th>19995</th>\n",
" <th>19996</th>\n",
" <th>19997</th>\n",
" <th>19998</th>\n",
" <th>19999</th>\n",
" </tr>\n",
" <tr>\n",
" <th>A</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19996</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19997</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19998</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19999</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>20000</th>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>3</td>\n",
" <td>4</td>\n",
" <td>5</td>\n",
" <td>6</td>\n",
" <td>7</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>19991</td>\n",
" <td>19992</td>\n",
" <td>19993</td>\n",
" <td>19994</td>\n",
" <td>19995</td>\n",
" <td>19996</td>\n",
" <td>19997</td>\n",
" <td>19998</td>\n",
" <td>19999</td>\n",
" <td>20000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>20000 rows × 20000 columns</p>\n",
"</div>\n",
" <div class=\"colab-df-buttons\">\n",
"\n",
" <div class=\"colab-df-container\">\n",
" <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-03edbbe5-3b0a-4e34-9ce4-5b1ff26d6c86')\"\n",
" title=\"Convert this dataframe to an interactive table.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\" viewBox=\"0 -960 960 960\">\n",
" <path d=\"M120-120v-720h720v720H120Zm60-500h600v-160H180v160Zm220 220h160v-160H400v160Zm0 220h160v-160H400v160ZM180-400h160v-160H180v160Zm440 0h160v-160H620v160ZM180-180h160v-160H180v160Zm440 0h160v-160H620v160Z\"/>\n",
" </svg>\n",
" </button>\n",
"\n",
" <style>\n",
" .colab-df-container {\n",
" display:flex;\n",
" gap: 12px;\n",
" }\n",
"\n",
" .colab-df-convert {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-convert:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" .colab-df-buttons div {\n",
" margin-bottom: 4px;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-convert:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
" const buttonEl =\n",
" document.querySelector('#df-03edbbe5-3b0a-4e34-9ce4-5b1ff26d6c86 button.colab-df-convert');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" async function convertToInteractive(key) {\n",
" const element = document.querySelector('#df-03edbbe5-3b0a-4e34-9ce4-5b1ff26d6c86');\n",
" const dataTable =\n",
" await google.colab.kernel.invokeFunction('convertToInteractive',\n",
" [key], {});\n",
" if (!dataTable) return;\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
" + ' to learn more about interactive tables.';\n",
" element.innerHTML = '';\n",
" dataTable['output_type'] = 'display_data';\n",
" await google.colab.output.renderOutput(dataTable, element);\n",
" const docLink = document.createElement('div');\n",
" docLink.innerHTML = docLinkHtml;\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
" <div id=\"df-29364feb-3e4b-4078-ae44-5f21c61da8e6\">\n",
" <button class=\"colab-df-quickchart\" onclick=\"quickchart('df-29364feb-3e4b-4078-ae44-5f21c61da8e6')\"\n",
" title=\"Suggest charts\"\n",
" style=\"display:none;\">\n",
"\n",
"<svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" .colab-df-quickchart {\n",
" --bg-color: #E8F0FE;\n",
" --fill-color: #1967D2;\n",
" --hover-bg-color: #E2EBFA;\n",
" --hover-fill-color: #174EA6;\n",
" --disabled-fill-color: #AAA;\n",
" --disabled-bg-color: #DDD;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-quickchart {\n",
" --bg-color: #3B4455;\n",
" --fill-color: #D2E3FC;\n",
" --hover-bg-color: #434B5C;\n",
" --hover-fill-color: #FFFFFF;\n",
" --disabled-bg-color: #3B4455;\n",
" --disabled-fill-color: #666;\n",
" }\n",
"\n",
" .colab-df-quickchart {\n",
" background-color: var(--bg-color);\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: var(--fill-color);\n",
" height: 32px;\n",
" padding: 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-quickchart:hover {\n",
" background-color: var(--hover-bg-color);\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: var(--button-hover-fill-color);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" .colab-df-quickchart-complete:disabled:hover {\n",
" background-color: var(--disabled-bg-color);\n",
" fill: var(--disabled-fill-color);\n",
" box-shadow: none;\n",
" }\n",
"\n",
" .colab-df-spinner {\n",
" border: 2px solid var(--fill-color);\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" animation:\n",
" spin 1s steps(1) infinite;\n",
" }\n",
"\n",
" @keyframes spin {\n",
" 0% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" border-left-color: var(--fill-color);\n",
" }\n",
" 20% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 30% {\n",
" border-color: transparent;\n",
" border-left-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 40% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-top-color: var(--fill-color);\n",
" }\n",
" 60% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" }\n",
" 80% {\n",
" border-color: transparent;\n",
" border-right-color: var(--fill-color);\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" 90% {\n",
" border-color: transparent;\n",
" border-bottom-color: var(--fill-color);\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" async function quickchart(key) {\n",
" const quickchartButtonEl =\n",
" document.querySelector('#' + key + ' button');\n",
" quickchartButtonEl.disabled = true; // To prevent multiple clicks.\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" try {\n",
" const charts = await google.colab.kernel.invokeFunction(\n",
" 'suggestCharts', [key], {});\n",
" } catch (error) {\n",
" console.error('Error during call to suggestCharts:', error);\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" (() => {\n",
" let quickchartButtonEl =\n",
" document.querySelector('#df-29364feb-3e4b-4078-ae44-5f21c61da8e6 button');\n",
" quickchartButtonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" <div id=\"id_43238d98-131c-4f9a-816c-59e22d88ffb3\">\n",
" <style>\n",
" .colab-df-generate {\n",
" background-color: #E8F0FE;\n",
" border: none;\n",
" border-radius: 50%;\n",
" cursor: pointer;\n",
" display: none;\n",
" fill: #1967D2;\n",
" height: 32px;\n",
" padding: 0 0 0 0;\n",
" width: 32px;\n",
" }\n",
"\n",
" .colab-df-generate:hover {\n",
" background-color: #E2EBFA;\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" fill: #174EA6;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate {\n",
" background-color: #3B4455;\n",
" fill: #D2E3FC;\n",
" }\n",
"\n",
" [theme=dark] .colab-df-generate:hover {\n",
" background-color: #434B5C;\n",
" box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
" filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
" fill: #FFFFFF;\n",
" }\n",
" </style>\n",
" <button class=\"colab-df-generate\" onclick=\"generateWithVariable('df3')\"\n",
" title=\"Generate code using this dataframe.\"\n",
" style=\"display:none;\">\n",
"\n",
" <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
" width=\"24px\">\n",
" <path d=\"M7,19H8.4L18.45,9,17,7.55,7,17.6ZM5,21V16.75L18.45,3.32a2,2,0,0,1,2.83,0l1.4,1.43a1.91,1.91,0,0,1,.58,1.4,1.91,1.91,0,0,1-.58,1.4L9.25,21ZM18.45,9,17,7.55Zm-12,3A5.31,5.31,0,0,0,4.9,8.1,5.31,5.31,0,0,0,1,6.5,5.31,5.31,0,0,0,4.9,4.9,5.31,5.31,0,0,0,6.5,1,5.31,5.31,0,0,0,8.1,4.9,5.31,5.31,0,0,0,12,6.5,5.46,5.46,0,0,0,6.5,12Z\"/>\n",
" </svg>\n",
" </button>\n",
" <script>\n",
" (() => {\n",
" const buttonEl =\n",
" document.querySelector('#id_43238d98-131c-4f9a-816c-59e22d88ffb3 button.colab-df-generate');\n",
" buttonEl.style.display =\n",
" google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
"\n",
" buttonEl.onclick = () => {\n",
" google.colab.notebook.generateWithVariable('df3');\n",
" }\n",
" })();\n",
" </script>\n",
" </div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "df3"
}
},
"metadata": {},
"execution_count": 6
}
]
},
{
"cell_type": "code",
"source": [
"f\"{df3.size:,}\"\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 37
},
"id": "NE4p4bnb6pqK",
"outputId": "1e502106-4d30-48c2-c6f9-2741986e0fc0"
},
"execution_count": 7,
"outputs": [
{
"output_type": "execute_result",
"data": {
"text/plain": [
"'400,000,000'"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "string"
}
},
"metadata": {},
"execution_count": 7
}
]
},
{
"cell_type": "code",
"source": [
"df3.info()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "l9w_o_5B76jP",
"outputId": "55dbe460-ba95-4795-c8fa-7195b4597afe"
},
"execution_count": 8,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 20000 entries, 1 to 20000\n",
"Columns: 20000 entries, 0 to 19999\n",
"dtypes: uint16(20000)\n",
"memory usage: 763.0 MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df3.to_parquet(\"400MM.uint16.parquet\")\n"
],
"metadata": {
"id": "nicJzCX18nm3"
},
"execution_count": 9,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!ls -la --si\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "FvL14EhS9m5t",
"outputId": "83b6b772-a615-4ee1-b42c-487e10b171a2"
},
"execution_count": 10,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"total 9.8M\n",
"drwxr-xr-x 1 root root 4.1k Jun 14 12:25 .\n",
"drwxr-xr-x 1 root root 4.1k Jun 14 12:22 ..\n",
"-rw-r--r-- 1 root root 9.8M Jun 14 12:26 400MM.uint16.parquet\n",
"drwxr-xr-x 4 root root 4.1k Jun 12 13:35 .config\n",
"drwxr-xr-x 1 root root 4.1k Jun 12 13:36 sample_data\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df4 = pd.read_parquet(\"400MM.uint16.parquet\")\n",
"df4.info()\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "vniPE0GRZVif",
"outputId": "529e65bc-b9bf-4978-b361-d35004b29912"
},
"execution_count": 11,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 20000 entries, 1 to 20000\n",
"Columns: 20000 entries, 0 to 19999\n",
"dtypes: uint16(20000)\n",
"memory usage: 763.0 MB\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df3.to_csv(\"400MM.uint16.csv\")\n"
],
"metadata": {
"id": "Q27O541S9pGU"
},
"execution_count": 12,
"outputs": []
},
{
"cell_type": "code",
"source": [
"!ls -la --si\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gcpil9qB9xBX",
"outputId": "c1e63d0f-edbd-4f5d-e5f7-9dbccd7b0b35"
},
"execution_count": 13,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"total 2.2G\n",
"drwxr-xr-x 1 root root 4.1k Jun 14 12:26 .\n",
"drwxr-xr-x 1 root root 4.1k Jun 14 12:22 ..\n",
"-rw-r--r-- 1 root root 2.2G Jun 14 12:59 400MM.uint16.csv\n",
"-rw-r--r-- 1 root root 9.8M Jun 14 12:26 400MM.uint16.parquet\n",
"drwxr-xr-x 4 root root 4.1k Jun 12 13:35 .config\n",
"drwxr-xr-x 1 root root 4.1k Jun 12 13:36 sample_data\n"
]
}
]
},
{
"cell_type": "code",
"source": [
"df5 = pd.read_csv(\"400MM.uint16.csv\")\n",
"df5.info()\n"
],
"metadata": {
"id": "AJ1WvSzQ90Us",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "70d60b8c-6a89-4803-cc41-031cc79d3cf6"
},
"execution_count": 14,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 20000 entries, 0 to 19999\n",
"Columns: 20001 entries, A to 19999\n",
"dtypes: int64(20001)\n",
"memory usage: 3.0 GB\n"
]
}
]
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "Fkwr4OCbmCa2"
},
"execution_count": null,
"outputs": []
}
],
"metadata": {
"colab": {
"name": "large-df-parquet-csv.ipynb",
"provenance": [],
"include_colab_link": true
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment