pplonski · April 2, 2020 13:13
diff --git a/xgboost_memory_consumption.ipynb b/xgboost_memory_consumption.ipynb
 {
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import xgboost as xgb\n",
    "import numpy as np\n",
    "import sys"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sys.version_info"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'1.0.2'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "xgb.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "def mem():\n",
    "    ''' Memory usage in MB '''\n",
    "    with open('/proc/self/status') as f:\n",
    "        memusage = f.read().split('VmRSS:')[1].split('\\n')[0][:-3]\n",
    "    print(\"Memory:\", np.round(float(memusage.strip())/1024.0), \"MB\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 109.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "rows = 1000000\n",
    "cols = 1000\n",
    "X = np.random.rand(rows, cols)\n",
    "y = np.random.randint(low=0, high=10, size=rows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 7747.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "dtrain = xgb.DMatrix(X, label=y)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 19206.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "learner_params = {\"tree_method\": \"hist\"}\n",
    "boosting_rounds = 1\n",
    "model = xgb.train(learner_params, dtrain, boosting_rounds)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 27635.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred1 = model.predict(dtrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [],
   "source": [
    "model.save_model(\"model1.xgboost\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 23820.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [],
   "source": [
    "del model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 15698.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "model = xgb.Booster()\n",
    "model.load_model(\"model1.xgboost\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Memory: 15698.0 MB\n"
     ]
    }
   ],
   "source": [
    "mem()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [],
   "source": [
    "pred2 = model.predict(dtrain)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([1.7026957, 1.698778 , 1.6793869, 1.6793869, 1.0273438, 1.7026957,\n",
       "        1.700121 , 1.7026957, 1.7026957, 1.7026957], dtype=float32),\n",
       " array([1.7026957, 1.698778 , 1.6793869, 1.6793869, 1.0273438, 1.7026957,\n",
       "        1.700121 , 1.7026957, 1.7026957, 1.7026957], dtype=float32))"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "pred1[:10], pred2[:10]"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv",
   "language": "python",
   "name": "venv"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
 }
	{
	"cells": [
	{
	"cell_type": "code",
	"execution_count": 1,
	"metadata": {},
	"outputs": [],
	"source": [
	"import xgboost as xgb\n",
	"import numpy as np\n",
	"import sys"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 2,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"sys.version_info(major=3, minor=6, micro=7, releaselevel='final', serial=0)"
	]
	},
	"execution_count": 2,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"sys.version_info"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 3,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"'1.0.2'"
	]
	},
	"execution_count": 3,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"xgb.__version__"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 4,
	"metadata": {},
	"outputs": [],
	"source": [
	"def mem():\n",
	" ''' Memory usage in MB '''\n",
	" with open('/proc/self/status') as f:\n",
	" memusage = f.read().split('VmRSS:')[1].split('\\n')[0][:-3]\n",
	" print(\"Memory:\", np.round(float(memusage.strip())/1024.0), \"MB\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 5,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 109.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 6,
	"metadata": {},
	"outputs": [],
	"source": [
	"rows = 1000000\n",
	"cols = 1000\n",
	"X = np.random.rand(rows, cols)\n",
	"y = np.random.randint(low=0, high=10, size=rows)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 7,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 7747.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 8,
	"metadata": {},
	"outputs": [],
	"source": [
	"dtrain = xgb.DMatrix(X, label=y)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 9,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 19206.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 10,
	"metadata": {},
	"outputs": [],
	"source": [
	"learner_params = {\"tree_method\": \"hist\"}\n",
	"boosting_rounds = 1\n",
	"model = xgb.train(learner_params, dtrain, boosting_rounds)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 11,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 27635.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 12,
	"metadata": {},
	"outputs": [],
	"source": [
	"pred1 = model.predict(dtrain)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 13,
	"metadata": {},
	"outputs": [],
	"source": [
	"model.save_model(\"model1.xgboost\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 14,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 23820.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 15,
	"metadata": {},
	"outputs": [],
	"source": [
	"del model"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 16,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 15698.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 17,
	"metadata": {},
	"outputs": [],
	"source": [
	"model = xgb.Booster()\n",
	"model.load_model(\"model1.xgboost\")"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 18,
	"metadata": {},
	"outputs": [
	{
	"name": "stdout",
	"output_type": "stream",
	"text": [
	"Memory: 15698.0 MB\n"
	]
	}
	],
	"source": [
	"mem()"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 21,
	"metadata": {},
	"outputs": [],
	"source": [
	"pred2 = model.predict(dtrain)"
	]
	},
	{
	"cell_type": "code",
	"execution_count": 22,
	"metadata": {},
	"outputs": [
	{
	"data": {
	"text/plain": [
	"(array([1.7026957, 1.698778 , 1.6793869, 1.6793869, 1.0273438, 1.7026957,\n",
	" 1.700121 , 1.7026957, 1.7026957, 1.7026957], dtype=float32),\n",
	" array([1.7026957, 1.698778 , 1.6793869, 1.6793869, 1.0273438, 1.7026957,\n",
	" 1.700121 , 1.7026957, 1.7026957, 1.7026957], dtype=float32))"
	]
	},
	"execution_count": 22,
	"metadata": {},
	"output_type": "execute_result"
	}
	],
	"source": [
	"pred1[:10], pred2[:10]"
	]
	}
	],
	"metadata": {
	"kernelspec": {
	"display_name": "venv",
	"language": "python",
	"name": "venv"
	},
	"language_info": {
	"codemirror_mode": {
	"name": "ipython",
	"version": 3
	},
	"file_extension": ".py",
	"mimetype": "text/x-python",
	"name": "python",
	"nbconvert_exporter": "python",
	"pygments_lexer": "ipython3",
	"version": "3.6.7"
	}
	},
	"nbformat": 4,
	"nbformat_minor": 2
	}