Last active
April 11, 2024 07:44
-
-
Save habedi/dcc121e2cf959de204df1eea3a6fc429 to your computer and use it in GitHub Desktop.
A template notebook to be used in the Google Colab environment
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/habedi/dcc121e2cf959de204df1eea3a6fc429/effective-prediction-baseline-model-v2-for-training.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "CZQeICYiZ6Mv" | |
}, | |
"source": [ | |
"# Importing Libraries" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": { | |
"id": "fArpJOpMZ6Mw" | |
}, | |
"outputs": [], | |
"source": [ | |
"import os\n", | |
"import gc\n", | |
"import numpy as np\n", | |
"\n", | |
"import pandas as pd\n", | |
"pd.set_option('display.max_rows', 500)\n", | |
"pd.set_option('display.max_columns', 500)\n", | |
"pd.set_option('display.width', 1000)\n", | |
"\n", | |
"from pathlib import Path\n", | |
"\n", | |
"from transformers import TFAutoModel\n", | |
"from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Set Keras3 backend to TensorFlow, JAX, or PyTorch\n", | |
"os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n", | |
"#os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", | |
"#os.environ[\"KERAS_BACKEND\"] = \"pytorch\"" | |
], | |
"metadata": { | |
"id": "52Nkir1s5w5P" | |
}, | |
"execution_count": 2, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "aIcSNIzZZ6Mx", | |
"outputId": "bc460945-bf06-499d-f5fd-330fe90e1581" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"TensorFlow version: 2.15.0\n", | |
"Keras version: 2.15.0\n" | |
] | |
} | |
], | |
"source": [ | |
"import tensorflow as tf\n", | |
"#import tf_keras as keras\n", | |
"import keras\n", | |
"\n", | |
"print(f\"TensorFlow version: {tf.__version__}\")\n", | |
"print(f\"Keras version: {keras.__version__}\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"policy = keras.mixed_precision.Policy('mixed_float16')\n", | |
"#keras.mixed_precision.set_global_policy(policy)\n", | |
"\n", | |
"print('Compute dtype: %s' % policy.compute_dtype)\n", | |
"print('Variable dtype: %s' % policy.variable_dtype)" | |
], | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "gAk5Lt1O5_ov", | |
"outputId": "30f3de58-85d7-4265-d627-a8ab0fccb155" | |
}, | |
"execution_count": 4, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Compute dtype: float16\n", | |
"Variable dtype: float32\n" | |
] | |
} | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "lvwgzEMYZ6My" | |
}, | |
"source": [ | |
"# Checking the Available Resources" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "yza17viNZ6Mz", | |
"outputId": "79f173d3-075a-4f39-9d7d-fbeb73006866" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Mon Apr 1 13:46:22 2024 \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n", | |
"|-----------------------------------------+----------------------+----------------------+\n", | |
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n", | |
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n", | |
"| | | MIG M. |\n", | |
"|=========================================+======================+======================|\n", | |
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n", | |
"| N/A 38C P8 9W / 70W | 3MiB / 15360MiB | 0% Default |\n", | |
"| | | N/A |\n", | |
"+-----------------------------------------+----------------------+----------------------+\n", | |
" \n", | |
"+---------------------------------------------------------------------------------------+\n", | |
"| Processes: |\n", | |
"| GPU GI CI PID Type Process name GPU Memory |\n", | |
"| ID ID Usage |\n", | |
"|=======================================================================================|\n", | |
"| No running processes found |\n", | |
"+---------------------------------------------------------------------------------------+\n" | |
] | |
} | |
], | |
"source": [ | |
"gpu_info = !nvidia-smi\n", | |
"gpu_info = '\\n'.join(gpu_info)\n", | |
"\n", | |
"if gpu_info.find('failed') >= 0:\n", | |
" print('Not connected to a GPU')\n", | |
"else:\n", | |
" print(gpu_info)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "JoKadXL0Z6Mz", | |
"outputId": "e1f87695-0c3f-480a-90ae-e0e53cc5c85c" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Your runtime has 54.8 gigabytes of available RAM\n", | |
"\n", | |
"You are using a high-RAM runtime!\n" | |
] | |
} | |
], | |
"source": [ | |
"from psutil import virtual_memory\n", | |
"\n", | |
"ram_gb = virtual_memory().total / 1e9\n", | |
"print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n", | |
"\n", | |
"if ram_gb < 20:\n", | |
" print('Not using a high-RAM runtime')\n", | |
"else:\n", | |
" print('You are using a high-RAM runtime!')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "NwBQwt8EZ6M0", | |
"outputId": "74ef93a5-b665-4107-98de-62dd6d5f3b5a" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n", | |
"Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease\n", | |
"Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease\n", | |
"Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n", | |
"Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n", | |
"Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n", | |
"Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n", | |
"Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n", | |
"Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n", | |
"Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n", | |
"Reading package lists...\n", | |
"Building dependency tree...\n", | |
"Reading state information...\n", | |
"49 packages can be upgraded. Run 'apt list --upgradable' to see them.\n", | |
"Reading package lists...\n", | |
"Building dependency tree...\n", | |
"Reading state information...\n", | |
"htop is already the newest version (3.0.5-7build2).\n", | |
"neofetch is already the newest version (7.1.0-3).\n", | |
"0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.\n" | |
] | |
} | |
], | |
"source": [ | |
"!sudo apt update -yq && sudo apt install neofetch htop -yq" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "fckr0sNHZ6M1", | |
"outputId": "5084fd6f-7053-45b5-cefb-19e0574e2cd1" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"\u001b[?25l\u001b[?7l\u001b[0m\u001b[31m\u001b[1m .-/+oossssoo+/-.\n", | |
" `:+ssssssssssssssssss+:`\n", | |
" -+ssssssssssssssssssyyssss+-\n", | |
" .ossssssssssssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNy\u001b[0m\u001b[31m\u001b[1msssso.\n", | |
" /sssssssssss\u001b[37m\u001b[0m\u001b[1mhdmmNNmmyNMMMMh\u001b[0m\u001b[31m\u001b[1mssssss/\n", | |
" +sssssssss\u001b[37m\u001b[0m\u001b[1mhm\u001b[0m\u001b[31m\u001b[1myd\u001b[37m\u001b[0m\u001b[1mMMMMMMMNddddy\u001b[0m\u001b[31m\u001b[1mssssssss+\n", | |
" /ssssssss\u001b[37m\u001b[0m\u001b[1mhNMMM\u001b[0m\u001b[31m\u001b[1myh\u001b[37m\u001b[0m\u001b[1mhyyyyhmNMMMNh\u001b[0m\u001b[31m\u001b[1mssssssss/\n", | |
".ssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNh\u001b[0m\u001b[31m\u001b[1mssssssssss\u001b[37m\u001b[0m\u001b[1mhNMMMd\u001b[0m\u001b[31m\u001b[1mssssssss.\n", | |
"+ssss\u001b[37m\u001b[0m\u001b[1mhhhyNMMNy\u001b[0m\u001b[31m\u001b[1mssssssssssss\u001b[37m\u001b[0m\u001b[1myNMMMy\u001b[0m\u001b[31m\u001b[1msssssss+\n", | |
"oss\u001b[37m\u001b[0m\u001b[1myNMMMNyMMh\u001b[0m\u001b[31m\u001b[1mssssssssssssss\u001b[37m\u001b[0m\u001b[1mhmmmh\u001b[0m\u001b[31m\u001b[1mssssssso\n", | |
"oss\u001b[37m\u001b[0m\u001b[1myNMMMNyMMh\u001b[0m\u001b[31m\u001b[1msssssssssssssshmmmh\u001b[0m\u001b[31m\u001b[1mssssssso\n", | |
"+ssss\u001b[37m\u001b[0m\u001b[1mhhhyNMMNy\u001b[0m\u001b[31m\u001b[1mssssssssssss\u001b[37m\u001b[0m\u001b[1myNMMMy\u001b[0m\u001b[31m\u001b[1msssssss+\n", | |
".ssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNh\u001b[0m\u001b[31m\u001b[1mssssssssss\u001b[37m\u001b[0m\u001b[1mhNMMMd\u001b[0m\u001b[31m\u001b[1mssssssss.\n", | |
" /ssssssss\u001b[37m\u001b[0m\u001b[1mhNMMM\u001b[0m\u001b[31m\u001b[1myh\u001b[37m\u001b[0m\u001b[1mhyyyyhdNMMMNh\u001b[0m\u001b[31m\u001b[1mssssssss/\n", | |
" +sssssssss\u001b[37m\u001b[0m\u001b[1mdm\u001b[0m\u001b[31m\u001b[1myd\u001b[37m\u001b[0m\u001b[1mMMMMMMMMddddy\u001b[0m\u001b[31m\u001b[1mssssssss+\n", | |
" /sssssssssss\u001b[37m\u001b[0m\u001b[1mhdmNNNNmyNMMMMh\u001b[0m\u001b[31m\u001b[1mssssss/\n", | |
" .ossssssssssssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNy\u001b[0m\u001b[31m\u001b[1msssso.\n", | |
" -+sssssssssssssssss\u001b[37m\u001b[0m\u001b[1myyy\u001b[0m\u001b[31m\u001b[1mssss+-\n", | |
" `:+ssssssssssssssssss+:`\n", | |
" .-/+oossssoo+/-.\u001b[0m\n", | |
"\u001b[20A\u001b[9999999D\u001b[43C\u001b[0m\u001b[1m\u001b[31m\u001b[1mroot\u001b[0m@\u001b[31m\u001b[1m8b115a06ccc6\u001b[0m \n", | |
"\u001b[43C\u001b[0m-----------------\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mOS\u001b[0m\u001b[0m:\u001b[0m Ubuntu 22.04.3 LTS x86_64\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mHost\u001b[0m\u001b[0m:\u001b[0m Google Compute Engine\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mKernel\u001b[0m\u001b[0m:\u001b[0m 6.1.58+\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mUptime\u001b[0m\u001b[0m:\u001b[0m 7 mins\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mPackages\u001b[0m\u001b[0m:\u001b[0m 1302 (dpkg)\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mShell\u001b[0m\u001b[0m:\u001b[0m bash 5.1.16\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mTerminal\u001b[0m\u001b[0m:\u001b[0m jupyter-noteboo\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mCPU\u001b[0m\u001b[0m:\u001b[0m Intel Xeon (8) @ 2.299GHz\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mGPU\u001b[0m\u001b[0m:\u001b[0m NVIDIA Tesla T4\u001b[0m \n", | |
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mMemory\u001b[0m\u001b[0m:\u001b[0m 1392MiB / 52217MiB\u001b[0m \n", | |
"\n", | |
"\u001b[43C\u001b[30m\u001b[40m \u001b[31m\u001b[41m \u001b[32m\u001b[42m \u001b[33m\u001b[43m \u001b[34m\u001b[44m \u001b[35m\u001b[45m \u001b[36m\u001b[46m \u001b[37m\u001b[47m \u001b[m\n", | |
"\u001b[43C\u001b[38;5;8m\u001b[48;5;8m \u001b[38;5;9m\u001b[48;5;9m \u001b[38;5;10m\u001b[48;5;10m \u001b[38;5;11m\u001b[48;5;11m \u001b[38;5;12m\u001b[48;5;12m \u001b[38;5;13m\u001b[48;5;13m \u001b[38;5;14m\u001b[48;5;14m \u001b[38;5;15m\u001b[48;5;15m \u001b[m\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\n", | |
"\u001b[?25h\u001b[?7h" | |
] | |
} | |
], | |
"source": [ | |
"!neofetch" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "8GDV3B3bZ6M3" | |
}, | |
"source": [ | |
"# Mounting Google Drive (for Colab)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": { | |
"colab": { | |
"base_uri": "https://localhost:8080/" | |
}, | |
"id": "K6LfD8OMZ6M3", | |
"outputId": "e95373b6-ef4f-415e-81a8-d83d43d25c51" | |
}, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"name": "stdout", | |
"text": [ | |
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n" | |
] | |
} | |
], | |
"source": [ | |
"from google.colab import drive\n", | |
"\n", | |
"drive.mount('/content/drive')" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "XsmLrvApZ6M4" | |
}, | |
"source": [ | |
"# Global Variables and Settings" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": { | |
"id": "j_7rcBKZZ6M6" | |
}, | |
"outputs": [], | |
"source": [ | |
"class CFG:\n", | |
" # Directories\n", | |
" base_data_dir = Path(\"/content/drive/MyDrive/ML_Workspace/ProjectX\")\n", | |
" input_dir = base_data_dir / \"input\"\n", | |
" output_dir = base_data_dir / \"output\"\n", | |
"\n", | |
" # Model parameters\n", | |
" base_model = \"microsoft/deberta-v3-xsmall\"\n", | |
" max_len = 512\n", | |
" gradient_checkpoint = False\n", | |
"\n", | |
" # Training parameters\n", | |
" num_workers = -1\n", | |
" batch_size = 16\n", | |
" verbose = 1\n", | |
" n_fold = 2\n", | |
" dropout = 0.2\n", | |
" lr = 5e-5\n", | |
" epochs = 3\n", | |
"\n", | |
" # Misc\n", | |
" random_seed = 42" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"source": [ | |
"# Set the random seeds for reproducability\n", | |
"np.random.seed(CFG.random_seed)" | |
], | |
"metadata": { | |
"id": "VYhEDq0D4obd" | |
}, | |
"execution_count": 11, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "markdown", | |
"source": [ | |
"# The Rest" | |
], | |
"metadata": { | |
"id": "v8tdpF0g2n9c" | |
} | |
}, | |
{ | |
"cell_type": "code", | |
"source": [], | |
"metadata": { | |
"id": "S-NumdtR2qsj" | |
}, | |
"execution_count": 11, | |
"outputs": [] | |
} | |
], | |
"metadata": { | |
"accelerator": "GPU", | |
"colab": { | |
"gpuType": "T4", | |
"provenance": [], | |
"name": "my_template_colab_notebook_v1.ipynb", | |
"include_colab_link": true | |
}, | |
"kaggle": { | |
"accelerator": "gpu", | |
"dataSources": [ | |
{ | |
"datasetId": 4691239, | |
"sourceId": 7972390, | |
"sourceType": "datasetVersion" | |
} | |
], | |
"isGpuEnabled": true, | |
"isInternetEnabled": true, | |
"language": "python", | |
"sourceType": "notebook" | |
}, | |
"kernelspec": { | |
"display_name": "Python 3", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.10.13" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment