Created
July 21, 2021 01:22
-
-
Save audhiaprilliant/152e14d4d62448f7d286570e5e247e26 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "id": "impossible-crystal", | |
| "metadata": {}, | |
| "source": [ | |
| "# Python Codes Profiling" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "tough-attention", | |
| "metadata": {}, | |
| "source": [ | |
| "---" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "cosmetic-mississippi", | |
| "metadata": {}, | |
| "source": [ | |
| "## Import modules" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "id": "golden-north", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Module for timing\n", | |
| "import time\n", | |
| "# Module for deterministic profiling of Python programs\n", | |
| "import cProfile\n", | |
| "import pstats\n", | |
| "# Module for dealing with various types of I/O\n", | |
| "import io\n", | |
| "# Module Python codes profiling\n", | |
| "import snakeviz" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "automated-template", | |
| "metadata": {}, | |
| "source": [ | |
| "## Profiling - Jupyter Notebook" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "incident-repair", | |
| "metadata": {}, | |
| "source": [ | |
| "### 1 One function" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "id": "hungry-terminology", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def foo():\n", | |
| " time.sleep(1)\n", | |
| " print('foo')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "id": "stuck-milan", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "foo\n", | |
| " 40 function calls in 1.001 seconds\n", | |
| "\n", | |
| " Ordered by: standard name\n", | |
| "\n", | |
| " ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
| " 1 0.000 0.000 1.001 1.001 <ipython-input-2-1e733772caf3>:1(foo)\n", | |
| " 1 0.000 0.000 1.001 1.001 <string>:1(<module>)\n", | |
| " 3 0.000 0.000 0.001 0.000 iostream.py:197(schedule)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:310(_is_master_process)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:323(_schedule_flush)\n", | |
| " 2 0.000 0.000 0.001 0.000 iostream.py:386(write)\n", | |
| " 3 0.000 0.000 0.000 0.000 iostream.py:93(_event_pipe)\n", | |
| " 3 0.001 0.000 0.001 0.000 socket.py:432(send)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1017(_wait_for_tstate_lock)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1071(is_alive)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:513(is_set)\n", | |
| " 1 0.000 0.000 1.001 1.001 {built-in method builtins.exec}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", | |
| " 1 0.000 0.000 0.001 0.001 {built-in method builtins.print}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method nt.getpid}\n", | |
| " 1 1.000 1.000 1.000 1.000 {built-in method time.sleep}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", | |
| " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "cProfile.run('foo()')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "alone-amazon", | |
| "metadata": {}, | |
| "source": [ | |
| "### 2 More than one function" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "id": "educational-method", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Function for looping\n", | |
| "def loopingSomething():\n", | |
| " index = []\n", | |
| " for i in range(100):\n", | |
| " index.append(i)\n", | |
| "\n", | |
| "# Function for printing\n", | |
| "def printSomething():\n", | |
| " print('Print something here!')\n", | |
| "\n", | |
| "# Bundle above functions\n", | |
| "def main():\n", | |
| " # First function\n", | |
| " loopingSomething()\n", | |
| " # Second function\n", | |
| " printSomething()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "id": "unlike-testament", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Print something here!\n", | |
| " 141 function calls in 0.000 seconds\n", | |
| "\n", | |
| " Ordered by: standard name\n", | |
| "\n", | |
| " ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:12(main)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:2(loopingSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:8(printSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <string>:1(<module>)\n", | |
| " 3 0.000 0.000 0.000 0.000 iostream.py:197(schedule)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:310(_is_master_process)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:323(_schedule_flush)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:386(write)\n", | |
| " 3 0.000 0.000 0.000 0.000 iostream.py:93(_event_pipe)\n", | |
| " 3 0.000 0.000 0.000 0.000 socket.py:432(send)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1017(_wait_for_tstate_lock)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1071(is_alive)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:513(is_set)\n", | |
| " 1 0.000 0.000 0.000 0.000 {built-in method builtins.exec}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", | |
| " 1 0.000 0.000 0.000 0.000 {built-in method builtins.print}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method nt.getpid}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", | |
| " 100 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n", | |
| " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", | |
| "\n", | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "cProfile.run('main()')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "alleged-colorado", | |
| "metadata": {}, | |
| "source": [ | |
| "### 3 Sort the outputs" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "id": "civilian-exchange", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Print something here!\n", | |
| " 183 function calls in 0.001 seconds\n", | |
| "\n", | |
| " Ordered by: call count\n", | |
| "\n", | |
| " ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
| " 100 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n", | |
| " 4 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\compilerop.py:166(extra_flags)\n", | |
| " 4 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n", | |
| " 4 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\threading.py:513(is_set)\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\threading.py:1017(_wait_for_tstate_lock)\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\threading.py:1071(is_alive)\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\zmq\\sugar\\socket.py:432(send)\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\iostream.py:93(_event_pipe)\n", | |
| " 3 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\iostream.py:197(schedule)\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:82(__init__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:108(__enter__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:117(__exit__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\contextlib.py:238(helper)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\traitlets\\traitlets.py:533(get)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\traitlets\\traitlets.py:564(__get__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\utils\\ipstruct.py:125(__getattr__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\codeop.py:135(__call__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:1278(user_global_ns)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3334(compare)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\interactiveshell.py:3396(run_code)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\hooks.py:103(__call__)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPython\\core\\hooks.py:168(pre_run_code_hook)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\iostream.py:310(_is_master_process)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\iostream.py:323(_schedule_flush)\n", | |
| " 2 0.000 0.000 0.000 0.000 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\ipykernel\\iostream.py:386(write)\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method nt.getpid}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.compile}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.exec}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:8(printSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-6-38d727f15956>:4(<module>)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:2(loopingSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:12(main)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-6-38d727f15956>:5(<module>)\n", | |
| " 1 0.000 0.000 0.000 0.000 {built-in method builtins.print}\n", | |
| " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", | |
| "\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<pstats.Stats at 0x28cbe4186d0>" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Create a object from cProfile class\n", | |
| "profiler = cProfile.Profile()\n", | |
| "profiler.enable()\n", | |
| "main()\n", | |
| "profiler.disable()\n", | |
| "# Sort the profiling based on 'ncalls'\n", | |
| "stats = pstats.Stats(profiler).sort_stats('ncalls')\n", | |
| "# Print the profiling\n", | |
| "stats.print_stats()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "disturbed-timing", | |
| "metadata": {}, | |
| "source": [ | |
| "### 4 Remove the directory names" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "id": "liked-brother", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 183 function calls in 0.001 seconds\n", | |
| "\n", | |
| " Ordered by: call count\n", | |
| "\n", | |
| " ncalls tottime percall cumtime percall filename:lineno(function)\n", | |
| " 100 0.000 0.000 0.000 0.000 {method 'append' of 'list' objects}\n", | |
| " 4 0.000 0.000 0.000 0.000 compilerop.py:166(extra_flags)\n", | |
| " 4 0.000 0.000 0.000 0.000 {built-in method builtins.getattr}\n", | |
| " 4 0.000 0.000 0.000 0.000 {built-in method builtins.next}\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:513(is_set)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1017(_wait_for_tstate_lock)\n", | |
| " 3 0.000 0.000 0.000 0.000 threading.py:1071(is_alive)\n", | |
| " 3 0.000 0.000 0.000 0.000 socket.py:432(send)\n", | |
| " 3 0.000 0.000 0.000 0.000 iostream.py:93(_event_pipe)\n", | |
| " 3 0.000 0.000 0.000 0.000 iostream.py:197(schedule)\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'append' of 'collections.deque' objects}\n", | |
| " 3 0.000 0.000 0.000 0.000 {method 'acquire' of '_thread.lock' objects}\n", | |
| " 2 0.000 0.000 0.000 0.000 contextlib.py:82(__init__)\n", | |
| " 2 0.000 0.000 0.000 0.000 contextlib.py:108(__enter__)\n", | |
| " 2 0.000 0.000 0.000 0.000 contextlib.py:117(__exit__)\n", | |
| " 2 0.000 0.000 0.000 0.000 contextlib.py:238(helper)\n", | |
| " 2 0.000 0.000 0.000 0.000 traitlets.py:533(get)\n", | |
| " 2 0.000 0.000 0.000 0.000 traitlets.py:564(__get__)\n", | |
| " 2 0.000 0.000 0.000 0.000 ipstruct.py:125(__getattr__)\n", | |
| " 2 0.000 0.000 0.000 0.000 codeop.py:135(__call__)\n", | |
| " 2 0.000 0.000 0.000 0.000 interactiveshell.py:1278(user_global_ns)\n", | |
| " 2 0.000 0.000 0.000 0.000 interactiveshell.py:3334(compare)\n", | |
| " 2 0.000 0.000 0.000 0.000 interactiveshell.py:3396(run_code)\n", | |
| " 2 0.000 0.000 0.000 0.000 hooks.py:103(__call__)\n", | |
| " 2 0.000 0.000 0.000 0.000 hooks.py:168(pre_run_code_hook)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:310(_is_master_process)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:323(_schedule_flush)\n", | |
| " 2 0.000 0.000 0.000 0.000 iostream.py:386(write)\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method nt.getpid}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.compile}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.exec}\n", | |
| " 2 0.000 0.000 0.000 0.000 {built-in method builtins.isinstance}\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:8(printSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-6-38d727f15956>:4(<module>)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:2(loopingSomething)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-4-e76a9716b63a>:12(main)\n", | |
| " 1 0.000 0.000 0.000 0.000 <ipython-input-6-38d727f15956>:5(<module>)\n", | |
| " 1 0.000 0.000 0.000 0.000 {built-in method builtins.print}\n", | |
| " 1 0.000 0.000 0.000 0.000 {method 'disable' of '_lsprof.Profiler' objects}\n", | |
| "\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<pstats.Stats at 0x28cbe447e80>" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Remove directory names\n", | |
| "stats = pstats.Stats(profiler).strip_dirs().sort_stats('ncalls')\n", | |
| "stats.print_stats()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "legislative-dimension", | |
| "metadata": {}, | |
| "source": [ | |
| "### 5 Export the `cProfile` data" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "id": "instant-championship", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Export the profiler output into file\n", | |
| "stats = pstats.Stats(profiler)\n", | |
| "stats.dump_stats('../data/cProfileExport')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "rolled-capability", | |
| "metadata": {}, | |
| "source": [ | |
| "### 6 Export the `cProfile` data into `txt`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "id": "broke-geology", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "result = io.StringIO()\n", | |
| "stats = pstats.Stats(profiler, stream = result).sort_stats('ncalls')\n", | |
| "stats.print_stats()\n", | |
| "\n", | |
| "# Save it into disk\n", | |
| "with open('../data/cProfileExport.txt', 'w+') as f:\n", | |
| " f.write(result.getvalue())" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "promising-mixture", | |
| "metadata": {}, | |
| "source": [ | |
| "### 7 Export the `cProfile` data into `csv`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "id": "freelance-seafood", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "result = io.StringIO()\n", | |
| "stats = pstats.Stats(profiler, stream = result).sort_stats('ncalls')\n", | |
| "stats.print_stats()\n", | |
| "result = result.getvalue()\n", | |
| "\n", | |
| "# Chop the string into a csv-like buffer\n", | |
| "result = 'ncalls' + result.split('ncalls')[-1]\n", | |
| "result = '\\n'.join([','.join(line.rstrip().split(None, 6)) for line in result.split('\\n')])\n", | |
| "\n", | |
| "# Save it into disk\n", | |
| "with open('../data/cProfileExport.csv', 'w+') as f:\n", | |
| " f.write(result)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "id": "collected-expression", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Import module for data manipulation\n", | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "id": "elegant-cookie", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Load the data\n", | |
| "df_profiling = pd.read_csv('../data/cProfileExport.csv', sep = ',')" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "id": "mineral-niagara", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension data: 39 rows and 6 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ncalls</th>\n", | |
| " <th>tottime</th>\n", | |
| " <th>percall</th>\n", | |
| " <th>cumtime</th>\n", | |
| " <th>percall.1</th>\n", | |
| " <th>filename:lineno(function)</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>100</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{method</td>\n", | |
| " <td>'append' of 'list' objects}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPy...</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{built-in</td>\n", | |
| " <td>method builtins.getattr}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{built-in</td>\n", | |
| " <td>method builtins.next}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>C:\\ProgramData\\Anaconda3\\lib\\threading.py:513(...</td>\n", | |
| " <td>NaN</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ncalls tottime percall cumtime \\\n", | |
| "100 0.0 0.0 0.0 0.0 \n", | |
| "4 0.0 0.0 0.0 0.0 \n", | |
| "4 0.0 0.0 0.0 0.0 \n", | |
| "4 0.0 0.0 0.0 0.0 \n", | |
| "3 0.0 0.0 0.0 0.0 \n", | |
| "\n", | |
| " percall.1 \\\n", | |
| "100 {method \n", | |
| "4 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPy... \n", | |
| "4 {built-in \n", | |
| "4 {built-in \n", | |
| "3 C:\\ProgramData\\Anaconda3\\lib\\threading.py:513(... \n", | |
| "\n", | |
| " filename:lineno(function) \n", | |
| "100 'append' of 'list' objects} \n", | |
| "4 NaN \n", | |
| "4 method builtins.getattr} \n", | |
| "4 method builtins.next} \n", | |
| "3 NaN " | |
| ] | |
| }, | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Print the dimension\n", | |
| "print('Dimension data: {} rows and {} columns'.format(len(df_profiling), len(df_profiling.columns)))\n", | |
| "df_profiling.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "id": "bibliographic-gravity", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "# Fill missing value\n", | |
| "df_profiling.fillna(value = '', inplace = True)\n", | |
| "# Column manipulation to make the data clean\n", | |
| "df_profiling['filename:lineno(function)'] = df_profiling['percall.1'] + ' ' + df_profiling['filename:lineno(function)']\n", | |
| "# Rename columns\n", | |
| "del df_profiling['percall.1']\n", | |
| "cols = ['tottime', 'percall', 'cumtime', 'percall_2', 'filename:lineno(function)']\n", | |
| "df_profiling.columns = cols\n", | |
| "df_profiling['ncalls'] = df_profiling.index\n", | |
| "df_profiling.reset_index(drop = True, inplace = True)\n", | |
| "# Reorder columns\n", | |
| "cols = ['ncalls', 'tottime', 'percall', 'cumtime', 'percall_2', 'filename:lineno(function)']\n", | |
| "df_profiling = df_profiling[cols]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "id": "catholic-terry", | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Dimension data: 39 rows and 6 columns\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>ncalls</th>\n", | |
| " <th>tottime</th>\n", | |
| " <th>percall</th>\n", | |
| " <th>cumtime</th>\n", | |
| " <th>percall_2</th>\n", | |
| " <th>filename:lineno(function)</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>100</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{method 'append' of 'list' objects}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>4</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPy...</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>4</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{built-in method builtins.getattr}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>4</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>{built-in method builtins.next}</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>3</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>0.0</td>\n", | |
| " <td>C:\\ProgramData\\Anaconda3\\lib\\threading.py:513(...</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " ncalls tottime percall cumtime percall_2 \\\n", | |
| "0 100 0.0 0.0 0.0 0.0 \n", | |
| "1 4 0.0 0.0 0.0 0.0 \n", | |
| "2 4 0.0 0.0 0.0 0.0 \n", | |
| "3 4 0.0 0.0 0.0 0.0 \n", | |
| "4 3 0.0 0.0 0.0 0.0 \n", | |
| "\n", | |
| " filename:lineno(function) \n", | |
| "0 {method 'append' of 'list' objects} \n", | |
| "1 C:\\ProgramData\\Anaconda3\\lib\\site-packages\\IPy... \n", | |
| "2 {built-in method builtins.getattr} \n", | |
| "3 {built-in method builtins.next} \n", | |
| "4 C:\\ProgramData\\Anaconda3\\lib\\threading.py:513(... " | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "# Print the dimension\n", | |
| "print('Dimension data: {} rows and {} columns'.format(len(df_profiling), len(df_profiling.columns)))\n", | |
| "df_profiling.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "listed-approval", | |
| "metadata": {}, | |
| "source": [ | |
| "### 8 The `cProfile` in production codes with `scikit-learn`" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "id": "statutory-player", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def irisDataClassification(): \n", | |
| " # Import modules\n", | |
| " from sklearn import datasets\n", | |
| " from sklearn.model_selection import train_test_split\n", | |
| " from sklearn.linear_model import LogisticRegression\n", | |
| " from sklearn.metrics import accuracy_score\n", | |
| "\n", | |
| " # Import some data to play with\n", | |
| " iris = datasets.load_iris()\n", | |
| " X, y = iris.data, iris.target\n", | |
| " \n", | |
| " # Data splitting\n", | |
| " X_train, X_test, y_train, y_test = train_test_split(\n", | |
| " X,\n", | |
| " y,\n", | |
| " test_size = 0.2,\n", | |
| " random_state = 1,\n", | |
| " stratify = y)\n", | |
| " \n", | |
| " # Create logistic regression object\n", | |
| " model = LogisticRegression()\n", | |
| " \n", | |
| " # Data modelling with logistic regression\n", | |
| " model.fit(X_train, y_train)\n", | |
| " \n", | |
| " # Create prediction using testing data\n", | |
| " y_pred = model.predict(X_test)\n", | |
| " \n", | |
| " # Print out the accuracy\n", | |
| " accuracy = accuracy_score(y_test, y_pred)\n", | |
| " print(accuracy)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "communist-radar", | |
| "metadata": {}, | |
| "source": [ | |
| "### 9 The `cProfile` visualization" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "id": "noted-luxembourg", | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%reload_ext snakeviz" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "id": "endless-feelings", | |
| "metadata": { | |
| "scrolled": false | |
| }, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "The snakeviz extension is already loaded. To reload it, use:\n", | |
| " %reload_ext snakeviz\n", | |
| "0.9666666666666667\n", | |
| " \n", | |
| "*** Profile stats marshalled to file 'C:\\\\Users\\\\AUDHIA~1\\\\AppData\\\\Local\\\\Temp\\\\tmp5otecf1l'. \n", | |
| "Embedding SnakeViz in this document...\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "\n", | |
| "<iframe id='snakeviz-867d3ea5-d807-11eb-b5a5-5065f309ee96' frameborder=0 seamless width='100%' height='1000'></iframe>\n", | |
| "<script>document.getElementById(\"snakeviz-867d3ea5-d807-11eb-b5a5-5065f309ee96\").setAttribute(\"src\", \"http://\" + document.location.hostname + \":8080/snakeviz/C%3A%5CUsers%5CAUDHIA~1%5CAppData%5CLocal%5CTemp%5Ctmp5otecf1l\")</script>\n" | |
| ], | |
| "text/plain": [ | |
| "<IPython.core.display.HTML object>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "%load_ext snakeviz\n", | |
| "%snakeviz irisDataClassification()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "attempted-combat", | |
| "metadata": {}, | |
| "source": [ | |
| "## Profiling - Terminal" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "id": "radical-operator", | |
| "metadata": {}, | |
| "source": [ | |
| "- Create a Python file `.py` contains our entire program. For instance `iris_classification.py`\n", | |
| "```\n", | |
| "def irisDataClassification():\n", | |
| " # Import modules\n", | |
| " from sklearn import datasets\n", | |
| " from sklearn.model_selection import train_test_split\n", | |
| " from sklearn.linear_model import LogisticRegression\n", | |
| " from sklearn.metrics import accuracy_score\n", | |
| "\n", | |
| " # Import some data to play with\n", | |
| " iris = datasets.load_iris()\n", | |
| " X, y = iris.data, iris.target\n", | |
| " \n", | |
| " # Data splitting\n", | |
| " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 1, stratify = y)\n", | |
| " \n", | |
| " # Create logistic regression object\n", | |
| " model = LogisticRegression()\n", | |
| " \n", | |
| " # Data modelling with logistic regression\n", | |
| " model.fit(X_train, y_train)\n", | |
| " \n", | |
| " # Create prediction using testing data\n", | |
| " y_pred = model.predict(X_test)\n", | |
| " \n", | |
| " # Print out the accuracy\n", | |
| " accuracy = accuracy_score(y_test, y_pred)\n", | |
| " print(accuracy)\n", | |
| "```\n", | |
| "```\n", | |
| "if __name__ == '__main__':\n", | |
| " irisDataClassification()\n", | |
| "```\n", | |
| "\n", | |
| "- Open the terminal\n", | |
| "- Run a command `python3 -m cProfile -o iris_classification.prof iris_classification.py`\n", | |
| "- Run `snakeviz iris_classification.prof` to launch the snakeviz in browser" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.8.3" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 5 | |
| } | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment