{ "cells": [ { "cell_type": "markdown", "id": "5ede69a1", "metadata": {}, "source": [ "# Classification Challenge using CatBoost\n", "\n", "## INF2179 Fall 2021\n", "### Hamid Yuksel\n", "\n", "This submission uses [CatBoost](https://catboost.ai/).\n", "CatBoost was chosen for its listed benefits, mainly in requiring less hyperparameter tuning and preprocessing of categorical and text features. It is also fast and fairly easy to set up.\n", "\n", "\"Markdown\n" ] }, { "cell_type": "code", "execution_count": 1, "id": "ee82451e", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: catboost in /Users/yuksel/.local/lib/python3.8/site-packages (1.0.3)\n", "Requirement already satisfied: numpy>=1.16.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from catboost) (1.20.1)\n", "Requirement already satisfied: scipy in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from catboost) (1.8.0)\n", "Requirement already satisfied: matplotlib in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from catboost) (3.4.3)\n", "Requirement already satisfied: plotly in /Users/yuksel/.local/lib/python3.8/site-packages (from catboost) (5.3.1)\n", "Requirement already satisfied: pandas>=0.24.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from catboost) (1.2.4)\n", "Requirement already satisfied: graphviz in /Users/yuksel/.local/lib/python3.8/site-packages (from catboost) (0.18)\n", "Requirement already satisfied: six in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from catboost) (1.15.0)\n", "Requirement already satisfied: python-dateutil>=2.7.3 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.0->catboost) (2.8.1)\n", "Requirement already satisfied: pytz>=2017.3 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from pandas>=0.24.0->catboost) (2021.1)\n", "Requirement already satisfied: pyparsing>=2.2.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->catboost) (2.4.7)\n", "Requirement already satisfied: cycler>=0.10 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->catboost) (0.10.0)\n", "Requirement already satisfied: pillow>=6.2.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->catboost) (8.2.0)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from matplotlib->catboost) (1.3.1)\n", "Requirement already satisfied: tenacity>=6.2.0 in /Users/yuksel/.local/lib/python3.8/site-packages (from plotly->catboost) (8.0.1)\n", "\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.2.1 is available.\n", "You should consider upgrading via the '/Users/yuksel/opt/anaconda3/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Requirement already satisfied: ipywidgets in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (7.6.3)\n", "Requirement already satisfied: traitlets>=4.3.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (5.0.5)\n", "Requirement already satisfied: jupyterlab-widgets>=1.0.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (1.0.0)\n", "Requirement already satisfied: ipykernel>=4.5.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (5.3.4)\n", "Requirement already satisfied: ipython>=4.0.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (7.22.0)\n", "Requirement already satisfied: nbformat>=4.2.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (5.1.3)\n", "Requirement already satisfied: widgetsnbextension~=3.5.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipywidgets) (3.5.1)\n", "Requirement already satisfied: tornado>=4.2 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1)\n", "Requirement already satisfied: jupyter-client in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (6.1.12)\n", "Requirement already satisfied: appnope in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipykernel>=4.5.1->ipywidgets) (0.1.2)\n", "Requirement already satisfied: pygments in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (2.8.1)\n", "Requirement already satisfied: prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (3.0.17)\n", "Requirement already satisfied: setuptools>=18.5 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (52.0.0.post20210125)\n", "Requirement already satisfied: backcall in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (0.2.0)\n", "Requirement already satisfied: jedi>=0.16 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (0.17.2)\n", "Requirement already satisfied: pexpect>4.3 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (4.8.0)\n", "Requirement already satisfied: pickleshare in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (0.7.5)\n", "Requirement already satisfied: decorator in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from ipython>=4.0.0->ipywidgets) (5.0.6)\n", "Requirement already satisfied: ipython-genutils in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (0.2.0)\n", "Requirement already satisfied: jupyter-core in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (4.7.1)\n", "Requirement already satisfied: jsonschema!=2.5.0,>=2.4 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbformat>=4.2.0->ipywidgets) (3.2.0)\n", "Requirement already satisfied: notebook>=4.4.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from widgetsnbextension~=3.5.0->ipywidgets) (6.3.0)\n", "Requirement already satisfied: parso<0.8.0,>=0.7.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jedi>=0.16->ipython>=4.0.0->ipywidgets) (0.7.0)\n", "Requirement already satisfied: six>=1.11.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (1.15.0)\n", "Requirement already satisfied: attrs>=17.4.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (20.3.0)\n", "Requirement already satisfied: pyrsistent>=0.14.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jsonschema!=2.5.0,>=2.4->nbformat>=4.2.0->ipywidgets) (0.17.3)\n", "Requirement already satisfied: nbconvert in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (6.0.7)\n", "Requirement already satisfied: Send2Trash>=1.5.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.0)\n", "Requirement already satisfied: pyzmq>=17 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (20.0.0)\n", "Requirement already satisfied: terminado>=0.8.3 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.9.4)\n", "Requirement already satisfied: argon2-cffi in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (20.1.0)\n", "Requirement already satisfied: prometheus-client in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.10.1)\n", "Requirement already satisfied: jinja2 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.11.3)\n", "Requirement already satisfied: python-dateutil>=2.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jupyter-client->ipykernel>=4.5.1->ipywidgets) (2.8.1)\n", "Requirement already satisfied: ptyprocess>=0.5 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from pexpect>4.3->ipython>=4.0.0->ipywidgets) (0.7.0)\n", "Requirement already satisfied: wcwidth in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from prompt-toolkit!=3.0.0,!=3.0.1,<3.1.0,>=2.0.0->ipython>=4.0.0->ipywidgets) (0.2.5)\n", "Requirement already satisfied: cffi>=1.0.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.14.5)\n", "Requirement already satisfied: MarkupSafe>=0.23 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from jinja2->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.0.1)\n", "Requirement already satisfied: entrypoints>=0.2.2 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.3)\n", "Requirement already satisfied: jupyterlab-pygments in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.1.2)\n", "Requirement already satisfied: testpath in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.4.4)\n", "Requirement already satisfied: defusedxml in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.7.1)\n", "Requirement already satisfied: nbclient<0.6.0,>=0.5.0 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.3)\n", "Requirement already satisfied: mistune<2,>=0.8.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.8.4)\n", "Requirement already satisfied: pandocfilters>=1.4.1 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.4.3)\n", "Requirement already satisfied: bleach in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (3.3.0)\n", "Requirement already satisfied: pycparser in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from cffi>=1.0.0->argon2-cffi->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.20)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: async-generator in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.10)\n", "Requirement already satisfied: nest-asyncio in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from nbclient<0.6.0,>=0.5.0->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (1.5.1)\n", "Requirement already satisfied: webencodings in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (0.5.1)\n", "Requirement already satisfied: packaging in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (21.3)\n", "Requirement already satisfied: pyparsing!=3.0.5,>=2.0.2 in /Users/yuksel/opt/anaconda3/lib/python3.8/site-packages (from packaging->bleach->nbconvert->notebook>=4.4.1->widgetsnbextension~=3.5.0->ipywidgets) (2.4.7)\n", "\u001b[33mWARNING: You are using pip version 21.3.1; however, version 22.2.1 is available.\n", "You should consider upgrading via the '/Users/yuksel/opt/anaconda3/bin/python3 -m pip install --upgrade pip' command.\u001b[0m\n", "Enabling notebook extension jupyter-js-widgets/extension...\n", " - Validating: \u001b[32mOK\u001b[0m\n" ] } ], "source": [ "#Installing and Importing required libraries\n", "! pip3 install --user catboost\n", "! pip3 install --user ipywidgets\n", "! jupyter nbextension enable --py widgetsnbextension\n", "\n", "import pandas as pd \n", "import numpy as np\n", "from sklearn.neural_network import MLPClassifier\n", "from sklearn.metrics import accuracy_score \n", "from catboost import Pool, CatBoostClassifier" ] }, { "cell_type": "code", "execution_count": 2, "id": "1853f85d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Rock 24486\n", "Pop 16251\n", "Hip Hop 9263\n", "unknown 5000\n", "Name: Genre, dtype: int64" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Reading data\n", "df = pd.read_csv('data.csv')\n", "\n", "# Splitting\n", "training = df.head(50000)\n", "holdout_set = training.sample(5000, random_state=1) # pick 5000 observations randomly\n", "training = training.drop(holdout_set.index) # Remove holdout from training data\n", "testing = df.tail(5000)\n", "\n", "#Looking at counts per genre\n", "df['Genre'].value_counts()" ] }, { "cell_type": "code", "execution_count": 3, "id": "f0a20f7a", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Lyric
50000Feels so good,. Feels so good,. Feels so good ...
50001Shadow of a doubt. I heard your heart,. you he...
50002Slaves. Hebrews born to serve to the pharaoh. ...
50003You've been picked and it's over. What's the c...
50004Magic happens. But only if you are open to the...
......
54995I can't believe what you did to me. Down on my...
54996Have all the songs been written?. Have all the...
54997Everything you do you do so right. The clothes...
54998(trecho). (Rule Number Two. Understanding what...
54999As fall rides off in the Sunset. I sweep the S...
\n", "

5000 rows × 1 columns

\n", "
" ], "text/plain": [ " Lyric\n", "50000 Feels so good,. Feels so good,. Feels so good ...\n", "50001 Shadow of a doubt. I heard your heart,. you he...\n", "50002 Slaves. Hebrews born to serve to the pharaoh. ...\n", "50003 You've been picked and it's over. What's the c...\n", "50004 Magic happens. But only if you are open to the...\n", "... ...\n", "54995 I can't believe what you did to me. Down on my...\n", "54996 Have all the songs been written?. Have all the...\n", "54997 Everything you do you do so right. The clothes...\n", "54998 (trecho). (Rule Number Two. Understanding what...\n", "54999 As fall rides off in the Sunset. I sweep the S...\n", "\n", "[5000 rows x 1 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Splitting training/testing set to feature (X) and labels (y)\n", "train_y = training.Genre\n", "train_X = training.drop('Genre', axis=1)\n", "\n", "test_X = testing.drop('Genre', axis=1)\n", "\n", "test_X" ] }, { "cell_type": "code", "execution_count": 5, "id": "1c8eb420", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0:\tlearn: 0.8093706\ttotal: 204ms\tremaining: 20.2s\n", "1:\tlearn: 0.7672922\ttotal: 383ms\tremaining: 18.8s\n", "2:\tlearn: 0.7547202\ttotal: 538ms\tremaining: 17.4s\n", "3:\tlearn: 0.7451655\ttotal: 725ms\tremaining: 17.4s\n", "4:\tlearn: 0.7425807\ttotal: 874ms\tremaining: 16.6s\n", "5:\tlearn: 0.7348963\ttotal: 1.03s\tremaining: 16.1s\n", "6:\tlearn: 0.7305562\ttotal: 1.18s\tremaining: 15.7s\n", "7:\tlearn: 0.7265356\ttotal: 1.35s\tremaining: 15.5s\n", "8:\tlearn: 0.7236361\ttotal: 1.51s\tremaining: 15.2s\n", "9:\tlearn: 0.7214462\ttotal: 1.66s\tremaining: 14.9s\n", "10:\tlearn: 0.7199267\ttotal: 1.8s\tremaining: 14.6s\n", "11:\tlearn: 0.7176381\ttotal: 1.95s\tremaining: 14.3s\n", "12:\tlearn: 0.7126308\ttotal: 2.13s\tremaining: 14.3s\n", "13:\tlearn: 0.7106341\ttotal: 2.27s\tremaining: 14s\n", "14:\tlearn: 0.7080899\ttotal: 2.42s\tremaining: 13.7s\n", "15:\tlearn: 0.7062654\ttotal: 2.57s\tremaining: 13.5s\n", "16:\tlearn: 0.7047084\ttotal: 2.71s\tremaining: 13.2s\n", "17:\tlearn: 0.7034535\ttotal: 2.85s\tremaining: 13s\n", "18:\tlearn: 0.6994373\ttotal: 3.01s\tremaining: 12.8s\n", "19:\tlearn: 0.6973022\ttotal: 3.17s\tremaining: 12.7s\n", "20:\tlearn: 0.6955342\ttotal: 3.32s\tremaining: 12.5s\n", "21:\tlearn: 0.6942205\ttotal: 3.46s\tremaining: 12.3s\n", "22:\tlearn: 0.6924866\ttotal: 3.62s\tremaining: 12.1s\n", "23:\tlearn: 0.6911944\ttotal: 3.76s\tremaining: 11.9s\n", "24:\tlearn: 0.6894409\ttotal: 3.91s\tremaining: 11.7s\n", "25:\tlearn: 0.6876020\ttotal: 4.06s\tremaining: 11.6s\n", "26:\tlearn: 0.6865187\ttotal: 4.2s\tremaining: 11.4s\n", "27:\tlearn: 0.6854479\ttotal: 4.34s\tremaining: 11.2s\n", "28:\tlearn: 0.6842690\ttotal: 4.52s\tremaining: 11.1s\n", "29:\tlearn: 0.6829425\ttotal: 4.66s\tremaining: 10.9s\n", "30:\tlearn: 0.6816033\ttotal: 4.81s\tremaining: 10.7s\n", "31:\tlearn: 0.6801754\ttotal: 4.94s\tremaining: 10.5s\n", "32:\tlearn: 0.6782935\ttotal: 5.11s\tremaining: 10.4s\n", "33:\tlearn: 0.6776460\ttotal: 5.24s\tremaining: 10.2s\n", "34:\tlearn: 0.6764593\ttotal: 5.39s\tremaining: 10s\n", "35:\tlearn: 0.6753200\ttotal: 5.54s\tremaining: 9.85s\n", "36:\tlearn: 0.6743207\ttotal: 5.69s\tremaining: 9.7s\n", "37:\tlearn: 0.6735959\ttotal: 5.83s\tremaining: 9.51s\n", "38:\tlearn: 0.6725859\ttotal: 5.97s\tremaining: 9.34s\n", "39:\tlearn: 0.6713788\ttotal: 6.11s\tremaining: 9.17s\n", "40:\tlearn: 0.6704214\ttotal: 6.25s\tremaining: 9s\n", "41:\tlearn: 0.6695152\ttotal: 6.41s\tremaining: 8.85s\n", "42:\tlearn: 0.6685145\ttotal: 6.57s\tremaining: 8.71s\n", "43:\tlearn: 0.6672083\ttotal: 6.72s\tremaining: 8.56s\n", "44:\tlearn: 0.6661077\ttotal: 6.89s\tremaining: 8.42s\n", "45:\tlearn: 0.6652636\ttotal: 7.03s\tremaining: 8.25s\n", "46:\tlearn: 0.6641843\ttotal: 7.2s\tremaining: 8.11s\n", "47:\tlearn: 0.6631666\ttotal: 7.36s\tremaining: 7.97s\n", "48:\tlearn: 0.6617494\ttotal: 7.52s\tremaining: 7.83s\n", "49:\tlearn: 0.6607631\ttotal: 7.68s\tremaining: 7.68s\n", "50:\tlearn: 0.6591692\ttotal: 7.86s\tremaining: 7.55s\n", "51:\tlearn: 0.6581611\ttotal: 8s\tremaining: 7.38s\n", "52:\tlearn: 0.6572886\ttotal: 8.16s\tremaining: 7.24s\n", "53:\tlearn: 0.6561826\ttotal: 8.33s\tremaining: 7.1s\n", "54:\tlearn: 0.6553215\ttotal: 8.47s\tremaining: 6.93s\n", "55:\tlearn: 0.6548207\ttotal: 8.61s\tremaining: 6.76s\n", "56:\tlearn: 0.6537602\ttotal: 8.75s\tremaining: 6.6s\n", "57:\tlearn: 0.6527694\ttotal: 8.89s\tremaining: 6.44s\n", "58:\tlearn: 0.6519938\ttotal: 9.05s\tremaining: 6.29s\n", "59:\tlearn: 0.6512436\ttotal: 9.18s\tremaining: 6.12s\n", "60:\tlearn: 0.6506879\ttotal: 9.33s\tremaining: 5.97s\n", "61:\tlearn: 0.6498068\ttotal: 9.47s\tremaining: 5.8s\n", "62:\tlearn: 0.6491370\ttotal: 9.61s\tremaining: 5.64s\n", "63:\tlearn: 0.6482225\ttotal: 9.75s\tremaining: 5.48s\n", "64:\tlearn: 0.6475584\ttotal: 9.89s\tremaining: 5.33s\n", "65:\tlearn: 0.6464247\ttotal: 10s\tremaining: 5.17s\n", "66:\tlearn: 0.6458629\ttotal: 10.2s\tremaining: 5.01s\n", "67:\tlearn: 0.6450315\ttotal: 10.3s\tremaining: 4.85s\n", "68:\tlearn: 0.6442996\ttotal: 10.4s\tremaining: 4.69s\n", "69:\tlearn: 0.6437053\ttotal: 10.6s\tremaining: 4.54s\n", "70:\tlearn: 0.6430089\ttotal: 10.7s\tremaining: 4.38s\n", "71:\tlearn: 0.6424537\ttotal: 10.9s\tremaining: 4.23s\n", "72:\tlearn: 0.6415341\ttotal: 11s\tremaining: 4.07s\n", "73:\tlearn: 0.6406980\ttotal: 11.2s\tremaining: 3.92s\n", "74:\tlearn: 0.6393859\ttotal: 11.3s\tremaining: 3.77s\n", "75:\tlearn: 0.6385405\ttotal: 11.4s\tremaining: 3.62s\n", "76:\tlearn: 0.6375530\ttotal: 11.6s\tremaining: 3.47s\n", "77:\tlearn: 0.6368774\ttotal: 11.8s\tremaining: 3.32s\n", "78:\tlearn: 0.6358629\ttotal: 11.9s\tremaining: 3.17s\n", "79:\tlearn: 0.6349938\ttotal: 12s\tremaining: 3.01s\n", "80:\tlearn: 0.6342170\ttotal: 12.2s\tremaining: 2.86s\n", "81:\tlearn: 0.6334373\ttotal: 12.3s\tremaining: 2.7s\n", "82:\tlearn: 0.6327819\ttotal: 12.5s\tremaining: 2.55s\n", "83:\tlearn: 0.6321171\ttotal: 12.6s\tremaining: 2.4s\n", "84:\tlearn: 0.6315886\ttotal: 12.7s\tremaining: 2.25s\n", "85:\tlearn: 0.6308170\ttotal: 12.9s\tremaining: 2.1s\n", "86:\tlearn: 0.6299083\ttotal: 13s\tremaining: 1.95s\n", "87:\tlearn: 0.6294237\ttotal: 13.2s\tremaining: 1.8s\n", "88:\tlearn: 0.6286204\ttotal: 13.3s\tremaining: 1.65s\n", "89:\tlearn: 0.6277611\ttotal: 13.5s\tremaining: 1.5s\n", "90:\tlearn: 0.6269985\ttotal: 13.6s\tremaining: 1.35s\n", "91:\tlearn: 0.6257764\ttotal: 13.8s\tremaining: 1.2s\n", "92:\tlearn: 0.6250042\ttotal: 13.9s\tremaining: 1.05s\n", "93:\tlearn: 0.6238683\ttotal: 14.1s\tremaining: 899ms\n", "94:\tlearn: 0.6232904\ttotal: 14.2s\tremaining: 749ms\n", "95:\tlearn: 0.6221232\ttotal: 14.4s\tremaining: 598ms\n", "96:\tlearn: 0.6214122\ttotal: 14.5s\tremaining: 449ms\n", "97:\tlearn: 0.6205985\ttotal: 14.6s\tremaining: 299ms\n", "98:\tlearn: 0.6196649\ttotal: 14.8s\tremaining: 149ms\n", "99:\tlearn: 0.6192072\ttotal: 14.9s\tremaining: 0us\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Build a classifier\n", "text_features = ['Lyric']\n", "\n", "\n", "train_dataset = Pool(data=train_X,\n", " label=train_y,\n", " text_features=text_features)\n", "\n", "model = CatBoostClassifier(iterations=100,\n", " learning_rate=1,\n", " depth=5,\n", " loss_function='MultiClass')\n", "\n", "model.fit(train_dataset)" ] }, { "cell_type": "code", "execution_count": 6, "id": "f770a8f2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.6796\n" ] } ], "source": [ "# Estimate accuracy\n", "pred = model.predict(holdout_set.drop('Genre',axis=1))\n", "estimated_accuracy = accuracy_score(holdout_set['Genre'], pred)\n", "print(estimated_accuracy)\n", "pd.Series(estimated_accuracy).to_csv('ea.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 7, "id": "035f58c6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['Pop' 'Rock' 'Rock' ... 'Rock' 'Pop' 'Rock']\n" ] } ], "source": [ "# Predict testing set\n", "pred = model.predict(test_X)\n", "print(pred.flatten())\n", "pred = pd.Series(pred.flatten()).to_csv('pred.csv', index=False, header=False)" ] }, { "cell_type": "code", "execution_count": 8, "id": "2137c958", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(array(['Hip Hop', 'Pop', 'Rock'], dtype=object), array([ 821, 1385, 2794]))" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# to check number of instances of each genre in pred\n", "np.unique(model.predict(test_X), return_counts=True)" ] }, { "cell_type": "code", "execution_count": null, "id": "83aa22c4", "metadata": {}, "outputs": [], "source": [ "import pickle\n", "pickle.dump(model, open('model.pickle', 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "id": "8c3b136d", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" } }, "nbformat": 4, "nbformat_minor": 5 }