SametSahin10 · May 6, 2023 16:59
diff --git a/composer_instrument_classification.ipynb b/composer_instrument_classification.ipynb
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "authorship_tag": "ABX9TyNnNakicIlUs3+Y+NH466+W",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/gist/SametSahin10/50332f5677e78cb50781bd1ce8947cb0/untitled0.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "from google.colab import drive\n",
        "drive.mount('/content/drive/')"
      ],
      "metadata": {
        "id": "t6OBJv6rfD6U"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install pretty_midi"
      ],
      "metadata": {
        "id": "AFSSTwuJgJ2M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dNWUTrTme1lG"
      },
      "outputs": [],
      "source": [
        "import pretty_midi\n",
        "import numpy as np\n",
        "import os\n",
        "import tensorflow as tf\n",
        "import pandas as pd\n",
        "\n",
        "def extractNotesAndVelocities(path):\n",
        "    # Load MIDI file\n",
        "    midi_data = pretty_midi.PrettyMIDI(path)\n",
        "    # Extract notes and velocities\n",
        "    notes = []\n",
        "    velocities = []\n",
        "    for instrument in midi_data.instruments:\n",
        "        for note in instrument.notes:\n",
        "            notes.append(note.pitch)\n",
        "            velocities.append(note.velocity)\n",
        "\n",
        "    # Convert notes and velocities to Numpy arrays\n",
        "    notes = np.array(notes)\n",
        "    velocities = np.array(velocities)\n",
        "    return notes, velocities\n",
        "\n",
        "def createComposersDictionary(root_path):\n",
        "    composers = {}\n",
        "\n",
        "    for folder in os.listdir(root_path):\n",
        "        for file in os.listdir(os.path.join(root_path, folder)):\n",
        "            path = f'{root_path}/{folder}/{file}'\n",
        "            try:\n",
        "                notes, velocities = extractNotesAndVelocities(path)\n",
        "            except:\n",
        "                print(f\"Got an error while processing: {path}\")\n",
        "                continue\n",
        "\n",
        "            data = [notes, velocities]\n",
        "            dataAsNumpyArray = np.array(data)\n",
        "            \n",
        "            if folder in composers: \n",
        "                composers[folder].append(dataAsNumpyArray)\n",
        "            else:\n",
        "                composers[folder] = [dataAsNumpyArray]\n",
        "\n",
        "    return composers\n",
        "\n",
        "composers = createComposersDictionary('/content/drive/MyDrive/collab/musicnet_midis')"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "model = tf.keras.Sequential([\n",
        "    tf.keras.layers.Input(shape=(2, 400)),\n",
        "    tf.keras.layers.Dense(128, activation='relu'),\n",
        "    tf.keras.layers.Dense(128, activation='relu'),\n",
        "    tf.keras.layers.Dense(128, activation='relu'),\n",
        "    tf.keras.layers.Dense(10, activation=\"softmax\")\n",
        "])\n",
        "\n",
        "model.compile(\n",
        "    loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
        "    optimizer=tf.keras.optimizers.Adam(),\n",
        "    metrics=[\"accuracy\"]\n",
        ")"
      ],
      "metadata": {
        "id": "pxHkGhK6s76u"
      },
      "execution_count": 61,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "label_to_int = {\n",
        "    composer:index for index, composer in enumerate(os.listdir('/content/drive/MyDrive/collab/musicnet_midis'))\n",
        "}"
      ],
      "metadata": {
        "id": "5x24_F1CjJwi"
      },
      "execution_count": 62,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "label_to_int"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "VMeNPV8Sjt3g",
        "outputId": "f7b6104e-8fe2-46ec-b06f-fe92e6eceaf5"
      },
      "execution_count": 63,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "{'Mozart': 0,\n",
              " 'Brahms': 1,\n",
              " 'Dvorak': 2,\n",
              " 'Haydn': 3,\n",
              " 'Schubert': 4,\n",
              " 'Bach': 5,\n",
              " 'Beethoven': 6,\n",
              " 'Faure': 7,\n",
              " 'Ravel': 8,\n",
              " 'Cambini': 9}"
            ]
          },
          "metadata": {},
          "execution_count": 63
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "labels = []\n",
        "features = []\n",
        "for k, v in composers.items():\n",
        "  for i in range(len(v)):\n",
        "    if v[i][0].shape[0] < 400: continue\n",
        "    n = v[i][0].shape[0] // 400\n",
        "    increment = 0\n",
        "    for index in range(n):\n",
        "      notes = v[i][0][increment:increment+400]\n",
        "      velocities = v[i][1][increment:increment+400]\n",
        "      labels.append(label_to_int[k])\n",
        "\n",
        "      data = [notes, velocities]\n",
        "      dataAsNumpyArray = np.array(data)\n",
        "\n",
        "      features.append(dataAsNumpyArray)\n",
        "      increment+=400"
      ],
      "metadata": {
        "id": "I6Ec_RHHh15W"
      },
      "execution_count": 64,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "len(features), len(labels)"
      ],
      "metadata": {
        "id": "PGcqNUJDjBeP"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "features = np.array(features)\n",
        "labels = np.array(labels)"
      ],
      "metadata": {
        "id": "xHSo0wVsmLWO"
      },
      "execution_count": 66,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)"
      ],
      "metadata": {
        "id": "SgomezGyk9J6"
      },
      "execution_count": 67,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "x_train.shape, x_test.shape, y_train.shape, y_test.shape"
      ],
      "metadata": {
        "id": "uHEsr-IctFg1"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "x_train[0]"
      ],
      "metadata": {
        "id": "puBn-Oz_wCGB"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))"
      ],
      "metadata": {
        "id": "A-P_oIp_l1d-"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"authorship_tag": "ABX9TyNnNakicIlUs3+Y+NH466+W",
	"include_colab_link": true
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"language_info": {
	"name": "python"
	}
	},
	"cells": [
	{
	"cell_type": "markdown",
	"metadata": {
	"id": "view-in-github",
	"colab_type": "text"
	},
	"source": [
	"<a href=\"https://colab.research.google.com/gist/SametSahin10/50332f5677e78cb50781bd1ce8947cb0/untitled0.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"from google.colab import drive\n",
	"drive.mount('/content/drive/')"
	],
	"metadata": {
	"id": "t6OBJv6rfD6U"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"!pip install pretty_midi"
	],
	"metadata": {
	"id": "AFSSTwuJgJ2M"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"execution_count": null,
	"metadata": {
	"id": "dNWUTrTme1lG"
	},
	"outputs": [],
	"source": [
	"import pretty_midi\n",
	"import numpy as np\n",
	"import os\n",
	"import tensorflow as tf\n",
	"import pandas as pd\n",
	"\n",
	"def extractNotesAndVelocities(path):\n",
	" # Load MIDI file\n",
	" midi_data = pretty_midi.PrettyMIDI(path)\n",
	" # Extract notes and velocities\n",
	" notes = []\n",
	" velocities = []\n",
	" for instrument in midi_data.instruments:\n",
	" for note in instrument.notes:\n",
	" notes.append(note.pitch)\n",
	" velocities.append(note.velocity)\n",
	"\n",
	" # Convert notes and velocities to Numpy arrays\n",
	" notes = np.array(notes)\n",
	" velocities = np.array(velocities)\n",
	" return notes, velocities\n",
	"\n",
	"def createComposersDictionary(root_path):\n",
	" composers = {}\n",
	"\n",
	" for folder in os.listdir(root_path):\n",
	" for file in os.listdir(os.path.join(root_path, folder)):\n",
	" path = f'{root_path}/{folder}/{file}'\n",
	" try:\n",
	" notes, velocities = extractNotesAndVelocities(path)\n",
	" except:\n",
	" print(f\"Got an error while processing: {path}\")\n",
	" continue\n",
	"\n",
	" data = [notes, velocities]\n",
	" dataAsNumpyArray = np.array(data)\n",
	" \n",
	" if folder in composers: \n",
	" composers[folder].append(dataAsNumpyArray)\n",
	" else:\n",
	" composers[folder] = [dataAsNumpyArray]\n",
	"\n",
	" return composers\n",
	"\n",
	"composers = createComposersDictionary('/content/drive/MyDrive/collab/musicnet_midis')"
	]
	},
	{
	"cell_type": "code",
	"source": [
	"model = tf.keras.Sequential([\n",
	" tf.keras.layers.Input(shape=(2, 400)),\n",
	" tf.keras.layers.Dense(128, activation='relu'),\n",
	" tf.keras.layers.Dense(128, activation='relu'),\n",
	" tf.keras.layers.Dense(128, activation='relu'),\n",
	" tf.keras.layers.Dense(10, activation=\"softmax\")\n",
	"])\n",
	"\n",
	"model.compile(\n",
	" loss=tf.keras.losses.SparseCategoricalCrossentropy(),\n",
	" optimizer=tf.keras.optimizers.Adam(),\n",
	" metrics=[\"accuracy\"]\n",
	")"
	],
	"metadata": {
	"id": "pxHkGhK6s76u"
	},
	"execution_count": 61,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"label_to_int = {\n",
	" composer:index for index, composer in enumerate(os.listdir('/content/drive/MyDrive/collab/musicnet_midis'))\n",
	"}"
	],
	"metadata": {
	"id": "5x24_F1CjJwi"
	},
	"execution_count": 62,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"label_to_int"
	],
	"metadata": {
	"colab": {
	"base_uri": "https://localhost:8080/"
	},
	"id": "VMeNPV8Sjt3g",
	"outputId": "f7b6104e-8fe2-46ec-b06f-fe92e6eceaf5"
	},
	"execution_count": 63,
	"outputs": [
	{
	"output_type": "execute_result",
	"data": {
	"text/plain": [
	"{'Mozart': 0,\n",
	" 'Brahms': 1,\n",
	" 'Dvorak': 2,\n",
	" 'Haydn': 3,\n",
	" 'Schubert': 4,\n",
	" 'Bach': 5,\n",
	" 'Beethoven': 6,\n",
	" 'Faure': 7,\n",
	" 'Ravel': 8,\n",
	" 'Cambini': 9}"
	]
	},
	"metadata": {},
	"execution_count": 63
	}
	]
	},
	{
	"cell_type": "code",
	"source": [
	"labels = []\n",
	"features = []\n",
	"for k, v in composers.items():\n",
	" for i in range(len(v)):\n",
	" if v[i][0].shape[0] < 400: continue\n",
	" n = v[i][0].shape[0] // 400\n",
	" increment = 0\n",
	" for index in range(n):\n",
	" notes = v[i][0][increment:increment+400]\n",
	" velocities = v[i][1][increment:increment+400]\n",
	" labels.append(label_to_int[k])\n",
	"\n",
	" data = [notes, velocities]\n",
	" dataAsNumpyArray = np.array(data)\n",
	"\n",
	" features.append(dataAsNumpyArray)\n",
	" increment+=400"
	],
	"metadata": {
	"id": "I6Ec_RHHh15W"
	},
	"execution_count": 64,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"len(features), len(labels)"
	],
	"metadata": {
	"id": "PGcqNUJDjBeP"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"features = np.array(features)\n",
	"labels = np.array(labels)"
	],
	"metadata": {
	"id": "xHSo0wVsmLWO"
	},
	"execution_count": 66,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"from sklearn.model_selection import train_test_split\n",
	"x_train, x_test, y_train, y_test = train_test_split(features, labels, test_size=0.2)"
	],
	"metadata": {
	"id": "SgomezGyk9J6"
	},
	"execution_count": 67,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"x_train.shape, x_test.shape, y_train.shape, y_test.shape"
	],
	"metadata": {
	"id": "uHEsr-IctFg1"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"x_train[0]"
	],
	"metadata": {
	"id": "puBn-Oz_wCGB"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"history = model.fit(x_train, y_train, epochs=10, validation_data=(x_test, y_test))"
	],
	"metadata": {
	"id": "A-P_oIp_l1d-"
	},
	"execution_count": null,
	"outputs": []
	}
	]
	}