Skip to content

Instantly share code, notes, and snippets.

@habedi
Last active April 11, 2024 07:44
Show Gist options
  • Save habedi/dcc121e2cf959de204df1eea3a6fc429 to your computer and use it in GitHub Desktop.
Save habedi/dcc121e2cf959de204df1eea3a6fc429 to your computer and use it in GitHub Desktop.
A template notebook to be used in the Google Colab environment
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"id": "view-in-github",
"colab_type": "text"
},
"source": [
"<a href=\"https://colab.research.google.com/gist/habedi/dcc121e2cf959de204df1eea3a6fc429/effective-prediction-baseline-model-v2-for-training.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "CZQeICYiZ6Mv"
},
"source": [
"# Importing Libraries"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"id": "fArpJOpMZ6Mw"
},
"outputs": [],
"source": [
"import os\n",
"import gc\n",
"import numpy as np\n",
"\n",
"import pandas as pd\n",
"pd.set_option('display.max_rows', 500)\n",
"pd.set_option('display.max_columns', 500)\n",
"pd.set_option('display.width', 1000)\n",
"\n",
"from pathlib import Path\n",
"\n",
"from transformers import TFAutoModel\n",
"from sklearn.model_selection import StratifiedKFold, StratifiedGroupKFold"
]
},
{
"cell_type": "code",
"source": [
"# Set Keras3 backend to TensorFlow, JAX, or PyTorch\n",
"os.environ[\"KERAS_BACKEND\"] = \"tensorflow\"\n",
"#os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
"#os.environ[\"KERAS_BACKEND\"] = \"pytorch\""
],
"metadata": {
"id": "52Nkir1s5w5P"
},
"execution_count": 2,
"outputs": []
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "aIcSNIzZZ6Mx",
"outputId": "bc460945-bf06-499d-f5fd-330fe90e1581"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"TensorFlow version: 2.15.0\n",
"Keras version: 2.15.0\n"
]
}
],
"source": [
"import tensorflow as tf\n",
"#import tf_keras as keras\n",
"import keras\n",
"\n",
"print(f\"TensorFlow version: {tf.__version__}\")\n",
"print(f\"Keras version: {keras.__version__}\")"
]
},
{
"cell_type": "code",
"source": [
"policy = keras.mixed_precision.Policy('mixed_float16')\n",
"#keras.mixed_precision.set_global_policy(policy)\n",
"\n",
"print('Compute dtype: %s' % policy.compute_dtype)\n",
"print('Variable dtype: %s' % policy.variable_dtype)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "gAk5Lt1O5_ov",
"outputId": "30f3de58-85d7-4265-d627-a8ab0fccb155"
},
"execution_count": 4,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Compute dtype: float16\n",
"Variable dtype: float32\n"
]
}
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "lvwgzEMYZ6My"
},
"source": [
"# Checking the Available Resources"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "yza17viNZ6Mz",
"outputId": "79f173d3-075a-4f39-9d7d-fbeb73006866"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Mon Apr 1 13:46:22 2024 \n",
"+---------------------------------------------------------------------------------------+\n",
"| NVIDIA-SMI 535.104.05 Driver Version: 535.104.05 CUDA Version: 12.2 |\n",
"|-----------------------------------------+----------------------+----------------------+\n",
"| GPU Name Persistence-M | Bus-Id Disp.A | Volatile Uncorr. ECC |\n",
"| Fan Temp Perf Pwr:Usage/Cap | Memory-Usage | GPU-Util Compute M. |\n",
"| | | MIG M. |\n",
"|=========================================+======================+======================|\n",
"| 0 Tesla T4 Off | 00000000:00:04.0 Off | 0 |\n",
"| N/A 38C P8 9W / 70W | 3MiB / 15360MiB | 0% Default |\n",
"| | | N/A |\n",
"+-----------------------------------------+----------------------+----------------------+\n",
" \n",
"+---------------------------------------------------------------------------------------+\n",
"| Processes: |\n",
"| GPU GI CI PID Type Process name GPU Memory |\n",
"| ID ID Usage |\n",
"|=======================================================================================|\n",
"| No running processes found |\n",
"+---------------------------------------------------------------------------------------+\n"
]
}
],
"source": [
"gpu_info = !nvidia-smi\n",
"gpu_info = '\\n'.join(gpu_info)\n",
"\n",
"if gpu_info.find('failed') >= 0:\n",
" print('Not connected to a GPU')\n",
"else:\n",
" print(gpu_info)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "JoKadXL0Z6Mz",
"outputId": "e1f87695-0c3f-480a-90ae-e0e53cc5c85c"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Your runtime has 54.8 gigabytes of available RAM\n",
"\n",
"You are using a high-RAM runtime!\n"
]
}
],
"source": [
"from psutil import virtual_memory\n",
"\n",
"ram_gb = virtual_memory().total / 1e9\n",
"print('Your runtime has {:.1f} gigabytes of available RAM\\n'.format(ram_gb))\n",
"\n",
"if ram_gb < 20:\n",
" print('Not using a high-RAM runtime')\n",
"else:\n",
" print('You are using a high-RAM runtime!')"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "NwBQwt8EZ6M0",
"outputId": "74ef93a5-b665-4107-98de-62dd6d5f3b5a"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Hit:1 https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64 InRelease\n",
"Hit:2 http://security.ubuntu.com/ubuntu jammy-security InRelease\n",
"Hit:3 http://archive.ubuntu.com/ubuntu jammy InRelease\n",
"Hit:4 https://cloud.r-project.org/bin/linux/ubuntu jammy-cran40/ InRelease\n",
"Hit:5 http://archive.ubuntu.com/ubuntu jammy-updates InRelease\n",
"Hit:6 https://ppa.launchpadcontent.net/c2d4u.team/c2d4u4.0+/ubuntu jammy InRelease\n",
"Hit:7 http://archive.ubuntu.com/ubuntu jammy-backports InRelease\n",
"Hit:8 https://ppa.launchpadcontent.net/deadsnakes/ppa/ubuntu jammy InRelease\n",
"Hit:9 https://ppa.launchpadcontent.net/graphics-drivers/ppa/ubuntu jammy InRelease\n",
"Hit:10 https://ppa.launchpadcontent.net/ubuntugis/ppa/ubuntu jammy InRelease\n",
"Reading package lists...\n",
"Building dependency tree...\n",
"Reading state information...\n",
"49 packages can be upgraded. Run 'apt list --upgradable' to see them.\n",
"Reading package lists...\n",
"Building dependency tree...\n",
"Reading state information...\n",
"htop is already the newest version (3.0.5-7build2).\n",
"neofetch is already the newest version (7.1.0-3).\n",
"0 upgraded, 0 newly installed, 0 to remove and 49 not upgraded.\n"
]
}
],
"source": [
"!sudo apt update -yq && sudo apt install neofetch htop -yq"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "fckr0sNHZ6M1",
"outputId": "5084fd6f-7053-45b5-cefb-19e0574e2cd1"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"\u001b[?25l\u001b[?7l\u001b[0m\u001b[31m\u001b[1m .-/+oossssoo+/-.\n",
" `:+ssssssssssssssssss+:`\n",
" -+ssssssssssssssssssyyssss+-\n",
" .ossssssssssssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNy\u001b[0m\u001b[31m\u001b[1msssso.\n",
" /sssssssssss\u001b[37m\u001b[0m\u001b[1mhdmmNNmmyNMMMMh\u001b[0m\u001b[31m\u001b[1mssssss/\n",
" +sssssssss\u001b[37m\u001b[0m\u001b[1mhm\u001b[0m\u001b[31m\u001b[1myd\u001b[37m\u001b[0m\u001b[1mMMMMMMMNddddy\u001b[0m\u001b[31m\u001b[1mssssssss+\n",
" /ssssssss\u001b[37m\u001b[0m\u001b[1mhNMMM\u001b[0m\u001b[31m\u001b[1myh\u001b[37m\u001b[0m\u001b[1mhyyyyhmNMMMNh\u001b[0m\u001b[31m\u001b[1mssssssss/\n",
".ssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNh\u001b[0m\u001b[31m\u001b[1mssssssssss\u001b[37m\u001b[0m\u001b[1mhNMMMd\u001b[0m\u001b[31m\u001b[1mssssssss.\n",
"+ssss\u001b[37m\u001b[0m\u001b[1mhhhyNMMNy\u001b[0m\u001b[31m\u001b[1mssssssssssss\u001b[37m\u001b[0m\u001b[1myNMMMy\u001b[0m\u001b[31m\u001b[1msssssss+\n",
"oss\u001b[37m\u001b[0m\u001b[1myNMMMNyMMh\u001b[0m\u001b[31m\u001b[1mssssssssssssss\u001b[37m\u001b[0m\u001b[1mhmmmh\u001b[0m\u001b[31m\u001b[1mssssssso\n",
"oss\u001b[37m\u001b[0m\u001b[1myNMMMNyMMh\u001b[0m\u001b[31m\u001b[1msssssssssssssshmmmh\u001b[0m\u001b[31m\u001b[1mssssssso\n",
"+ssss\u001b[37m\u001b[0m\u001b[1mhhhyNMMNy\u001b[0m\u001b[31m\u001b[1mssssssssssss\u001b[37m\u001b[0m\u001b[1myNMMMy\u001b[0m\u001b[31m\u001b[1msssssss+\n",
".ssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNh\u001b[0m\u001b[31m\u001b[1mssssssssss\u001b[37m\u001b[0m\u001b[1mhNMMMd\u001b[0m\u001b[31m\u001b[1mssssssss.\n",
" /ssssssss\u001b[37m\u001b[0m\u001b[1mhNMMM\u001b[0m\u001b[31m\u001b[1myh\u001b[37m\u001b[0m\u001b[1mhyyyyhdNMMMNh\u001b[0m\u001b[31m\u001b[1mssssssss/\n",
" +sssssssss\u001b[37m\u001b[0m\u001b[1mdm\u001b[0m\u001b[31m\u001b[1myd\u001b[37m\u001b[0m\u001b[1mMMMMMMMMddddy\u001b[0m\u001b[31m\u001b[1mssssssss+\n",
" /sssssssssss\u001b[37m\u001b[0m\u001b[1mhdmNNNNmyNMMMMh\u001b[0m\u001b[31m\u001b[1mssssss/\n",
" .ossssssssssssssssss\u001b[37m\u001b[0m\u001b[1mdMMMNy\u001b[0m\u001b[31m\u001b[1msssso.\n",
" -+sssssssssssssssss\u001b[37m\u001b[0m\u001b[1myyy\u001b[0m\u001b[31m\u001b[1mssss+-\n",
" `:+ssssssssssssssssss+:`\n",
" .-/+oossssoo+/-.\u001b[0m\n",
"\u001b[20A\u001b[9999999D\u001b[43C\u001b[0m\u001b[1m\u001b[31m\u001b[1mroot\u001b[0m@\u001b[31m\u001b[1m8b115a06ccc6\u001b[0m \n",
"\u001b[43C\u001b[0m-----------------\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mOS\u001b[0m\u001b[0m:\u001b[0m Ubuntu 22.04.3 LTS x86_64\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mHost\u001b[0m\u001b[0m:\u001b[0m Google Compute Engine\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mKernel\u001b[0m\u001b[0m:\u001b[0m 6.1.58+\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mUptime\u001b[0m\u001b[0m:\u001b[0m 7 mins\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mPackages\u001b[0m\u001b[0m:\u001b[0m 1302 (dpkg)\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mShell\u001b[0m\u001b[0m:\u001b[0m bash 5.1.16\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mTerminal\u001b[0m\u001b[0m:\u001b[0m jupyter-noteboo\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mCPU\u001b[0m\u001b[0m:\u001b[0m Intel Xeon (8) @ 2.299GHz\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mGPU\u001b[0m\u001b[0m:\u001b[0m NVIDIA Tesla T4\u001b[0m \n",
"\u001b[43C\u001b[0m\u001b[31m\u001b[1mMemory\u001b[0m\u001b[0m:\u001b[0m 1392MiB / 52217MiB\u001b[0m \n",
"\n",
"\u001b[43C\u001b[30m\u001b[40m \u001b[31m\u001b[41m \u001b[32m\u001b[42m \u001b[33m\u001b[43m \u001b[34m\u001b[44m \u001b[35m\u001b[45m \u001b[36m\u001b[46m \u001b[37m\u001b[47m \u001b[m\n",
"\u001b[43C\u001b[38;5;8m\u001b[48;5;8m \u001b[38;5;9m\u001b[48;5;9m \u001b[38;5;10m\u001b[48;5;10m \u001b[38;5;11m\u001b[48;5;11m \u001b[38;5;12m\u001b[48;5;12m \u001b[38;5;13m\u001b[48;5;13m \u001b[38;5;14m\u001b[48;5;14m \u001b[38;5;15m\u001b[48;5;15m \u001b[m\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"\u001b[?25h\u001b[?7h"
]
}
],
"source": [
"!neofetch"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "8GDV3B3bZ6M3"
},
"source": [
"# Mounting Google Drive (for Colab)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "K6LfD8OMZ6M3",
"outputId": "e95373b6-ef4f-415e-81a8-d83d43d25c51"
},
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
"Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount(\"/content/drive\", force_remount=True).\n"
]
}
],
"source": [
"from google.colab import drive\n",
"\n",
"drive.mount('/content/drive')"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "XsmLrvApZ6M4"
},
"source": [
"# Global Variables and Settings"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"id": "j_7rcBKZZ6M6"
},
"outputs": [],
"source": [
"class CFG:\n",
" # Directories\n",
" base_data_dir = Path(\"/content/drive/MyDrive/ML_Workspace/ProjectX\")\n",
" input_dir = base_data_dir / \"input\"\n",
" output_dir = base_data_dir / \"output\"\n",
"\n",
" # Model parameters\n",
" base_model = \"microsoft/deberta-v3-xsmall\"\n",
" max_len = 512\n",
" gradient_checkpoint = False\n",
"\n",
" # Training parameters\n",
" num_workers = -1\n",
" batch_size = 16\n",
" verbose = 1\n",
" n_fold = 2\n",
" dropout = 0.2\n",
" lr = 5e-5\n",
" epochs = 3\n",
"\n",
" # Misc\n",
" random_seed = 42"
]
},
{
"cell_type": "code",
"source": [
"# Set the random seeds for reproducability\n",
"np.random.seed(CFG.random_seed)"
],
"metadata": {
"id": "VYhEDq0D4obd"
},
"execution_count": 11,
"outputs": []
},
{
"cell_type": "markdown",
"source": [
"# The Rest"
],
"metadata": {
"id": "v8tdpF0g2n9c"
}
},
{
"cell_type": "code",
"source": [],
"metadata": {
"id": "S-NumdtR2qsj"
},
"execution_count": 11,
"outputs": []
}
],
"metadata": {
"accelerator": "GPU",
"colab": {
"gpuType": "T4",
"provenance": [],
"name": "my_template_colab_notebook_v1.ipynb",
"include_colab_link": true
},
"kaggle": {
"accelerator": "gpu",
"dataSources": [
{
"datasetId": 4691239,
"sourceId": 7972390,
"sourceType": "datasetVersion"
}
],
"isGpuEnabled": true,
"isInternetEnabled": true,
"language": "python",
"sourceType": "notebook"
},
"kernelspec": {
"display_name": "Python 3",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 0
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment