Skip to content

Instantly share code, notes, and snippets.

@tcvieira
Created August 6, 2019 15:01
Show Gist options
  • Save tcvieira/89743b108ca7391a17b5370de6142d39 to your computer and use it in GitHub Desktop.
Save tcvieira/89743b108ca7391a17b5370de6142d39 to your computer and use it in GitHub Desktop.
Ludwig Example
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Imports and Functions"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:21.314599Z",
"start_time": "2019-07-19T16:56:20.249496Z"
}
},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"import matplotlib.pyplot as plt\n",
"import logging\n",
"\n",
"import seaborn as sns\n",
"sns.set()\n",
"sns.set_style(\"whitegrid\")\n",
"flatui = [\"#9b59b6\",\"#3498db\",\"#95a5a6\",\"#e74c3c\",\"#34495e\",\"#2ecc71\"]\n",
"sns.set_palette(flatui) \n",
"sns.set(font_scale=1.5)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:21.320352Z",
"start_time": "2019-07-19T16:56:21.316633Z"
}
},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:13:31.457779Z",
"start_time": "2019-07-19T17:13:31.451376Z"
}
},
"outputs": [],
"source": [
"from sklearn.metrics import confusion_matrix\n",
"import matplotlib.pyplot as plt\n",
"\n",
"import seaborn as sns\n",
"sns.set()\n",
"sns.set_style(\"whitegrid\")\n",
"flatui = [\"#9b59b6\",\"#3498db\",\"#95a5a6\",\"#e74c3c\",\"#34495e\",\"#2ecc71\"]\n",
"sns.set_palette(flatui) \n",
"sns.set(font_scale=1.5)\n",
"\n",
"def conf_matrix(y_test, preds, classes, normalize_conf_mat=False, figsize=(10,10)): \n",
" conf_mat = confusion_matrix(preds, y_test, labels=classes)\n",
" fig, ax = plt.subplots(figsize=figsize)\n",
" \n",
" # normalize by row\n",
" if normalize_conf_mat:\n",
" conf_mat = conf_mat.astype('float') / conf_mat.sum(axis=1)[:, np.newaxis] #normalização dos valores \n",
" df_cm = pd.DataFrame(conf_mat, index=classes, #cria um data frame para base ao gráfico\n",
" columns=classes) \n",
" sns.heatmap(df_cm, cmap='BuPu', annot=True, \n",
" xticklabels=classes, yticklabels=classes)\n",
" else: \n",
" sns.heatmap(conf_mat, annot=True, fmt='d', cmap=\"BuPu\",\n",
" xticklabels=classes, yticklabels=classes)\n",
" \n",
" plt.tight_layout()\n",
" plt.ylabel('Actual')\n",
" plt.xlabel('Predicted')\n",
" plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:14:07.460644Z",
"start_time": "2019-07-19T17:14:07.089755Z"
}
},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"from sklearn.feature_extraction.text import TfidfVectorizer\n",
"from sklearn.metrics import classification_report\n",
"import scikitplot as skplt\n",
" \n",
"def show_roc(y_test, y_probas, name, figsize=(10,10)):\n",
" skplt.metrics.plot_roc(y_test, y_probas, plot_macro=False, plot_micro=False, figsize=figsize)\n",
" plt.tight_layout()\n",
" plt.title(name)\n",
" plt.show()\n",
" \n",
"def show_report(y_test, y_pred):\n",
" #print('roc_auc_score %s' % roc_auc_score(y_test, y_pred)) \n",
" #print('accuracy %s' % accuracy_score(y_pred, y_test))\n",
" print(classification_report(y_test, y_pred))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Data"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:21.934611Z",
"start_time": "2019-07-19T16:56:21.331839Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"43749\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>ID</th>\n",
" <th>TIPO_FINALIZACAO</th>\n",
" <th>label</th>\n",
" <th>descricao_clean</th>\n",
" <th>target</th>\n",
" <th>label_atrib</th>\n",
" <th>yake_keywords</th>\n",
" <th>NM_OCUPACAO_MANIFESTANTE</th>\n",
" <th>ocupacao_mp</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>49951</td>\n",
" <td>Falta de atribuição</td>\n",
" <td>SAC</td>\n",
" <td>ola preciso informacao quanto procedimento dev...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>htm devo cartorio novembro preciso informacao</td>\n",
" <td>Outros</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>49970</td>\n",
" <td>Falta de atribuição</td>\n",
" <td>SAC</td>\n",
" <td>prezado ministerio publico peco desculpas caso...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>rebitte seeduc grau diario educacao diploma of...</td>\n",
" <td>Outros</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>49971</td>\n",
" <td>Atribuição da Ouvidoria</td>\n",
" <td>OUVIDORIA</td>\n",
" <td>bom duvida quanto ordem chamadas concurso mpu ...</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>ampla concorrencia atencao pne proximo duvida ...</td>\n",
" <td>Outros</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>50005</td>\n",
" <td>Falta de atribuição</td>\n",
" <td>SAC</td>\n",
" <td>ola bom sou estudante curso direito cursando s...</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>cadastro nacional presos liberdade criacao tem...</td>\n",
" <td>Outros</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>50012</td>\n",
" <td>Arquivamento sumário</td>\n",
" <td>OUVIDORIA</td>\n",
" <td>ola bom sou estudante curso direito cursando s...</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>cadastro nacional presos tema informacoes acer...</td>\n",
" <td>Outros</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" ID TIPO_FINALIZACAO label \\\n",
"0 49951 Falta de atribuição SAC \n",
"1 49970 Falta de atribuição SAC \n",
"2 49971 Atribuição da Ouvidoria OUVIDORIA \n",
"3 50005 Falta de atribuição SAC \n",
"4 50012 Arquivamento sumário OUVIDORIA \n",
"\n",
" descricao_clean target label_atrib \\\n",
"0 ola preciso informacao quanto procedimento dev... 1 0 \n",
"1 prezado ministerio publico peco desculpas caso... 1 0 \n",
"2 bom duvida quanto ordem chamadas concurso mpu ... 0 1 \n",
"3 ola bom sou estudante curso direito cursando s... 1 0 \n",
"4 ola bom sou estudante curso direito cursando s... 0 0 \n",
"\n",
" yake_keywords NM_OCUPACAO_MANIFESTANTE \\\n",
"0 htm devo cartorio novembro preciso informacao Outros \n",
"1 rebitte seeduc grau diario educacao diploma of... Outros \n",
"2 ampla concorrencia atencao pne proximo duvida ... Outros \n",
"3 cadastro nacional presos liberdade criacao tem... Outros \n",
"4 cadastro nacional presos tema informacoes acer... Outros \n",
"\n",
" ocupacao_mp \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_feats = pd.read_csv('../data/ouvidoria_feats_final.csv', low_memory=False)\n",
"print(len(df_feats))\n",
"display(df_feats.head())"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:22.920847Z",
"start_time": "2019-07-19T16:56:22.781170Z"
}
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 432x288 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df_feats.groupby('label')['target'].count().plot.bar(ylim=0)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:25.781136Z",
"start_time": "2019-07-19T16:56:25.778690Z"
}
},
"outputs": [],
"source": [
"# df_feats['label'] = df_feats['label'].astype('category')\n",
"# df_feats['label_atrib'] = df_feats['label_atrib'].astype('category')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Split"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:26.255352Z",
"start_time": "2019-07-19T16:56:26.208461Z"
}
},
"outputs": [],
"source": [
"from sklearn.model_selection import train_test_split\n",
"X_train, X_test, y_train, y_test = train_test_split(df_feats[['descricao_clean', 'label']], df_feats['label'],\n",
" test_size=0.2, stratify=df_feats['label'], random_state=42)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Model"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:30.064890Z",
"start_time": "2019-07-19T16:56:30.059972Z"
}
},
"outputs": [],
"source": [
"model_definition = {\n",
" 'input_features': [{\n",
" 'name': 'descricao_clean',\n",
" 'type': 'text'\n",
" }],\n",
" 'output_features': [{\n",
" 'name': 'label',\n",
" 'type': 'category'\n",
" }],\n",
" 'training': { \n",
" 'epochs': 1\n",
" }\n",
"# ,\n",
"# 'preprocessing': { \n",
"# 'category': {\n",
"# 'fill_value': 'SAC',\n",
"# }\n",
"# }\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:33.825101Z",
"start_time": "2019-07-19T16:56:31.959701Z"
}
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/Users/thiagovieira/anaconda3/lib/python3.6/site-packages/h5py/__init__.py:36: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.\n",
" from ._conv import register_converters as _register_converters\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"WARNING: The TensorFlow contrib module will not be included in TensorFlow 2.0.\n",
"For more information, please see:\n",
" * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md\n",
" * https://github.com/tensorflow/addons\n",
"If you depend on functionality not listed there, please file an issue.\n",
"\n"
]
}
],
"source": [
"from ludwig.api import LudwigModel"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:34.782858Z",
"start_time": "2019-07-19T16:56:34.780156Z"
}
},
"outputs": [],
"source": [
"# train a model\n",
"model = LudwigModel(model_definition)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T16:56:35.159209Z",
"start_time": "2019-07-19T16:56:35.148619Z"
}
},
"outputs": [
{
"data": {
"text/plain": [
"{'input_features': [{'name': 'descricao_clean',\n",
" 'type': 'text',\n",
" 'tied_weights': None,\n",
" 'encoder': 'parallel_cnn',\n",
" 'level': 'word'}],\n",
" 'output_features': [{'name': 'label',\n",
" 'type': 'category',\n",
" 'loss': {'type': 'softmax_cross_entropy',\n",
" 'sampler': None,\n",
" 'negative_samples': 0,\n",
" 'distortion': 1,\n",
" 'unique': False,\n",
" 'labels_smoothing': 0,\n",
" 'class_weights': 1,\n",
" 'robust_lambda': 0,\n",
" 'confidence_penalty': 0,\n",
" 'class_similarities_temperature': 0,\n",
" 'weight': 1},\n",
" 'top_k': 3,\n",
" 'dependencies': [],\n",
" 'reduce_input': 'sum',\n",
" 'reduce_dependencies': 'sum'}],\n",
" 'training': {'epochs': 1,\n",
" 'optimizer': {'type': 'adam',\n",
" 'beta1': 0.9,\n",
" 'beta2': 0.999,\n",
" 'epsilon': 1e-08},\n",
" 'regularizer': 'l2',\n",
" 'regularization_lambda': 0,\n",
" 'learning_rate': 0.001,\n",
" 'batch_size': 128,\n",
" 'eval_batch_size': 0,\n",
" 'dropout_rate': 0.0,\n",
" 'early_stop': 5,\n",
" 'reduce_learning_rate_on_plateau': 0,\n",
" 'reduce_learning_rate_on_plateau_patience': 5,\n",
" 'reduce_learning_rate_on_plateau_rate': 0.5,\n",
" 'increase_batch_size_on_plateau': 0,\n",
" 'increase_batch_size_on_plateau_patience': 5,\n",
" 'increase_batch_size_on_plateau_rate': 2,\n",
" 'increase_batch_size_on_plateau_max': 512,\n",
" 'decay': False,\n",
" 'decay_steps': 10000,\n",
" 'decay_rate': 0.96,\n",
" 'staircase': False,\n",
" 'gradient_clipping': None,\n",
" 'validation_field': 'combined',\n",
" 'validation_measure': 'loss',\n",
" 'bucketing_field': None,\n",
" 'learning_rate_warmup_epochs': 5},\n",
" 'preprocessing': {'force_split': False,\n",
" 'split_probabilities': (0.7, 0.1, 0.2),\n",
" 'stratify': None,\n",
" 'text': {'char_format': 'characters',\n",
" 'char_sequence_length_limit': 1024,\n",
" 'char_most_common': 70,\n",
" 'word_format': 'space_punct',\n",
" 'word_sequence_length_limit': 256,\n",
" 'word_most_common': 20000,\n",
" 'padding_symbol': '<PAD>',\n",
" 'unknown_symbol': '<UNK>',\n",
" 'padding': 'right',\n",
" 'lowercase': True,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': ''},\n",
" 'category': {'most_common': 10000,\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': '<UNK>'},\n",
" 'set': {'format': 'space',\n",
" 'most_common': 10000,\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': ''},\n",
" 'bag': {'format': 'space',\n",
" 'most_common': 10000,\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': ''},\n",
" 'binary': {'missing_value_strategy': 'fill_with_const', 'fill_value': 0},\n",
" 'numerical': {'missing_value_strategy': 'fill_with_const', 'fill_value': 0},\n",
" 'sequence': {'sequence_length_limit': 256,\n",
" 'most_common': 20000,\n",
" 'padding_symbol': '<PAD>',\n",
" 'unknown_symbol': '<UNK>',\n",
" 'padding': 'right',\n",
" 'format': 'space',\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': ''},\n",
" 'timeseries': {'timeseries_length_limit': 256,\n",
" 'padding_value': 0,\n",
" 'padding': 'right',\n",
" 'format': 'space',\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'fill_value': ''},\n",
" 'image': {'missing_value_strategy': 'backfill',\n",
" 'in_memory': True,\n",
" 'resize_method': 'crop_or_pad'}},\n",
" 'combiner': {'type': 'concat'}}"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.model_definition"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:10:29.523490Z",
"start_time": "2019-07-19T16:56:59.310679Z"
},
"scrolled": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Experiment name: api_experiment\n",
"INFO:root:Model name: run\n",
"INFO:root:Output path: results/api_experiment_run_1\n",
"INFO:root:\n",
"\n",
"INFO:root:ludwig_version: '0.1.2'\n",
"INFO:root:command: ('/Users/thiagovieira/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py '\n",
" '-f '\n",
" '/Users/thiagovieira/Library/Jupyter/runtime/kernel-24d1c02f-716e-4743-ba1d-3327119bfb84.json')\n",
"INFO:root:commit_hash: '0cd418d44f08'\n",
"INFO:root:random_seed: 42\n",
"INFO:root:model_definition: { 'combiner': {'type': 'concat'},\n",
" 'input_features': [ { 'encoder': 'parallel_cnn',\n",
" 'level': 'word',\n",
" 'name': 'descricao_clean',\n",
" 'tied_weights': None,\n",
" 'type': 'text'}],\n",
" 'output_features': [ { 'dependencies': [],\n",
" 'loss': { 'class_similarities_temperature': 0,\n",
" 'class_weights': 1,\n",
" 'confidence_penalty': 0,\n",
" 'distortion': 1,\n",
" 'labels_smoothing': 0,\n",
" 'negative_samples': 0,\n",
" 'robust_lambda': 0,\n",
" 'sampler': None,\n",
" 'type': 'softmax_cross_entropy',\n",
" 'unique': False,\n",
" 'weight': 1},\n",
" 'name': 'label',\n",
" 'reduce_dependencies': 'sum',\n",
" 'reduce_input': 'sum',\n",
" 'top_k': 3,\n",
" 'type': 'category'}],\n",
" 'preprocessing': { 'bag': { 'fill_value': '',\n",
" 'format': 'space',\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'most_common': 10000},\n",
" 'binary': { 'fill_value': 0,\n",
" 'missing_value_strategy': 'fill_with_const'},\n",
" 'category': { 'fill_value': '<UNK>',\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'most_common': 10000},\n",
" 'force_split': False,\n",
" 'image': { 'in_memory': True,\n",
" 'missing_value_strategy': 'backfill',\n",
" 'resize_method': 'crop_or_pad'},\n",
" 'numerical': { 'fill_value': 0,\n",
" 'missing_value_strategy': 'fill_with_const'},\n",
" 'sequence': { 'fill_value': '',\n",
" 'format': 'space',\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'most_common': 20000,\n",
" 'padding': 'right',\n",
" 'padding_symbol': '<PAD>',\n",
" 'sequence_length_limit': 256,\n",
" 'unknown_symbol': '<UNK>'},\n",
" 'set': { 'fill_value': '',\n",
" 'format': 'space',\n",
" 'lowercase': False,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'most_common': 10000},\n",
" 'split_probabilities': (0.7, 0.1, 0.2),\n",
" 'stratify': None,\n",
" 'text': { 'char_format': 'characters',\n",
" 'char_most_common': 70,\n",
" 'char_sequence_length_limit': 1024,\n",
" 'fill_value': '',\n",
" 'lowercase': True,\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'padding': 'right',\n",
" 'padding_symbol': '<PAD>',\n",
" 'unknown_symbol': '<UNK>',\n",
" 'word_format': 'space_punct',\n",
" 'word_most_common': 20000,\n",
" 'word_sequence_length_limit': 256},\n",
" 'timeseries': { 'fill_value': '',\n",
" 'format': 'space',\n",
" 'missing_value_strategy': 'fill_with_const',\n",
" 'padding': 'right',\n",
" 'padding_value': 0,\n",
" 'timeseries_length_limit': 256}},\n",
" 'training': { 'batch_size': 128,\n",
" 'bucketing_field': None,\n",
" 'decay': False,\n",
" 'decay_rate': 0.96,\n",
" 'decay_steps': 10000,\n",
" 'dropout_rate': 0.0,\n",
" 'early_stop': 5,\n",
" 'epochs': 1,\n",
" 'eval_batch_size': 0,\n",
" 'gradient_clipping': None,\n",
" 'increase_batch_size_on_plateau': 0,\n",
" 'increase_batch_size_on_plateau_max': 512,\n",
" 'increase_batch_size_on_plateau_patience': 5,\n",
" 'increase_batch_size_on_plateau_rate': 2,\n",
" 'learning_rate': 0.001,\n",
" 'learning_rate_warmup_epochs': 5,\n",
" 'optimizer': { 'beta1': 0.9,\n",
" 'beta2': 0.999,\n",
" 'epsilon': 1e-08,\n",
" 'type': 'adam'},\n",
" 'reduce_learning_rate_on_plateau': 0,\n",
" 'reduce_learning_rate_on_plateau_patience': 5,\n",
" 'reduce_learning_rate_on_plateau_rate': 0.5,\n",
" 'regularization_lambda': 0,\n",
" 'regularizer': 'l2',\n",
" 'staircase': False,\n",
" 'validation_field': 'combined',\n",
" 'validation_measure': 'loss'}}\n",
"INFO:root:\n",
"\n",
"INFO:root:Using full dataframe\n",
"INFO:root:Building dataset (it may take a while)\n",
"INFO:root:Training set: 24602\n",
"INFO:root:Validation set: 3434\n",
"INFO:root:Test set: 6963\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Colocations handled automatically by placer.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py:263: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Colocations handled automatically by placer.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/losses/losses_impl.py:209: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.cast instead.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/losses/losses_impl.py:209: to_float (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.cast instead.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.cast instead.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_ops.py:3066: to_int32 (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use tf.cast instead.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:102: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Deprecated in favor of operator or tf.math.divide.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/ops/math_grad.py:102: div (from tensorflow.python.ops.math_ops) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Deprecated in favor of operator or tf.math.divide.\n",
"INFO:root:\n",
"INFO:root:╒══════════╕\n",
"INFO:root:│ TRAINING │\n",
"INFO:root:╘══════════╛\n",
"INFO:root:\n",
"INFO:root:\n",
"Epoch 1\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Training: 100%|██████████| 193/193 [09:32<00:00, 2.24s/it]\n",
"Evaluation train: 100%|██████████| 193/193 [02:36<00:00, 1.59it/s]\n",
"Evaluation vali : 100%|██████████| 27/27 [00:21<00:00, 1.29it/s]\n",
"Evaluation test : 100%|██████████| 55/55 [00:44<00:00, 1.47it/s]"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Took 13m 15.4092s\n",
"INFO:root:╒═════════╤════════╤════════════╤═════════════╕\n",
"│ label │ loss │ accuracy │ hits_at_k │\n",
"╞═════════╪════════╪════════════╪═════════════╡\n",
"│ train │ 0.1708 │ 0.9341 │ 1.0000 │\n",
"├─────────┼────────┼────────────┼─────────────┤\n",
"│ vali │ 0.3286 │ 0.8623 │ 1.0000 │\n",
"├─────────┼────────┼────────────┼─────────────┤\n",
"│ test │ 0.3322 │ 0.8581 │ 1.0000 │\n",
"╘═════════╧════════╧════════════╧═════════════╛\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:root:Validation loss on combined improved, model saved\n",
"INFO:root:\n",
"INFO:root:Best validation model epoch:\n",
"INFO:root:Best validation model loss on validation set combined: 0.32862120779989484\n",
"INFO:root:Best validation model loss on test set combined: 0.33219181626722694\n",
"INFO:root:\n",
"Finished: api_experiment_run\n",
"INFO:root:Saved to: results/api_experiment_run_1\n"
]
}
],
"source": [
"train_stats = model.train(X_train, logging_level=logging.INFO)\n",
"# save and load a model\n",
"#LudwigModel.save('ludwig_model')\n",
"#model = LudwigModel.load(model_path)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:12:09.225892Z",
"start_time": "2019-07-19T17:11:09.470510Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use standard file APIs to check for files with this prefix.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"WARNING:tensorflow:From /Users/thiagovieira/anaconda3/lib/python3.6/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.\n",
"Instructions for updating:\n",
"Use standard file APIs to check for files with this prefix.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"INFO:tensorflow:Restoring parameters from results/api_experiment_run_1/model/model_weights_progress\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"INFO:tensorflow:Restoring parameters from results/api_experiment_run_1/model/model_weights_progress\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Evaluation: 100%|██████████| 69/69 [00:56<00:00, 1.46it/s]\n"
]
}
],
"source": [
"# obtain predictions\n",
"predictions = model.predict(X_test, logging_level=logging.INFO)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:14:28.020596Z",
"start_time": "2019-07-19T17:14:28.010445Z"
},
"scrolled": false
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>label_predictions</th>\n",
" <th>label_probabilities_&lt;UNK&gt;</th>\n",
" <th>label_probabilities_SAC</th>\n",
" <th>label_probabilities_OUVIDORIA</th>\n",
" <th>label_probability</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>SAC</td>\n",
" <td>3.138596e-29</td>\n",
" <td>0.937511</td>\n",
" <td>0.062489</td>\n",
" <td>0.937511</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SAC</td>\n",
" <td>6.551658e-16</td>\n",
" <td>0.947959</td>\n",
" <td>0.052041</td>\n",
" <td>0.947959</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>SAC</td>\n",
" <td>8.961570e-28</td>\n",
" <td>0.913988</td>\n",
" <td>0.086012</td>\n",
" <td>0.913988</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>SAC</td>\n",
" <td>1.002333e-33</td>\n",
" <td>0.973154</td>\n",
" <td>0.026846</td>\n",
" <td>0.973154</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>SAC</td>\n",
" <td>5.913845e-23</td>\n",
" <td>0.873283</td>\n",
" <td>0.126717</td>\n",
" <td>0.873283</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" label_predictions label_probabilities_<UNK> label_probabilities_SAC \\\n",
"0 SAC 3.138596e-29 0.937511 \n",
"1 SAC 6.551658e-16 0.947959 \n",
"2 SAC 8.961570e-28 0.913988 \n",
"3 SAC 1.002333e-33 0.973154 \n",
"4 SAC 5.913845e-23 0.873283 \n",
"\n",
" label_probabilities_OUVIDORIA label_probability \n",
"0 0.062489 0.937511 \n",
"1 0.052041 0.947959 \n",
"2 0.086012 0.913988 \n",
"3 0.026846 0.973154 \n",
"4 0.126717 0.873283 "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"predictions.head()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:19:27.193454Z",
"start_time": "2019-07-19T17:19:27.189762Z"
}
},
"outputs": [],
"source": [
"y_pred = predictions['label_predictions']\n",
"y_probas = predictions[['label_probabilities_OUVIDORIA', 'label_probabilities_SAC']]"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:19:31.633584Z",
"start_time": "2019-07-19T17:19:31.420504Z"
},
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" precision recall f1-score support\n",
"\n",
" OUVIDORIA 0.76 0.35 0.48 1617\n",
" SAC 0.87 0.97 0.92 7133\n",
"\n",
" accuracy 0.86 8750\n",
" macro avg 0.82 0.66 0.70 8750\n",
"weighted avg 0.85 0.86 0.84 8750\n",
"\n"
]
}
],
"source": [
"show_report(y_test, y_pred)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:19:36.138421Z",
"start_time": "2019-07-19T17:19:35.770442Z"
}
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x720 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"show_roc(y_test, y_probas, 'Ludwig Parallel CNN')"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:19:59.352660Z",
"start_time": "2019-07-19T17:19:59.049235Z"
}
},
"outputs": [
{
"data": {
"image/png": "\n",
"text/plain": [
"<Figure size 720x720 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"conf_matrix(y_test, y_pred, ['OUVIDORIA', 'SAC'])"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"ExecuteTime": {
"end_time": "2019-07-19T17:20:05.398528Z",
"start_time": "2019-07-19T17:20:05.380709Z"
}
},
"outputs": [],
"source": [
"model.close()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment