Created
April 25, 2020 08:14
-
-
Save myui/8724902af53491201a34dba01071c88e to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"nbformat": 4, | |
"nbformat_minor": 0, | |
"metadata": { | |
"colab": { | |
"name": "fancs_sparse_matrix.ipynb", | |
"version": "0.3.2", | |
"provenance": [], | |
"include_colab_link": true | |
}, | |
"kernelspec": { | |
"name": "python3", | |
"display_name": "Python 3" | |
} | |
}, | |
"cells": [ | |
{ | |
"cell_type": "markdown", | |
"metadata": { | |
"id": "view-in-github", | |
"colab_type": "text" | |
}, | |
"source": [ | |
"<a href=\"https://colab.research.google.com/gist/myui/7dc30ac8ad4b192f34d41b97f032c82f/fancs_sparse_matrix.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "Dj4twnBTjyEL", | |
"colab_type": "code", | |
"outputId": "5d48133a-7562-411e-c92f-64c67973e1b3", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 170 | |
} | |
}, | |
"source": [ | |
"!pip install td-client\n", | |
"!pip install memory_profiler\n", | |
"%load_ext memory_profiler" | |
], | |
"execution_count": 16, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Requirement already satisfied: td-client in /usr/local/lib/python3.6/dist-packages (0.13.0)\n", | |
"Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from td-client) (1.12.0)\n", | |
"Requirement already satisfied: python-dateutil in /usr/local/lib/python3.6/dist-packages (from td-client) (2.5.3)\n", | |
"Requirement already satisfied: urllib3 in /usr/local/lib/python3.6/dist-packages (from td-client) (1.24.3)\n", | |
"Requirement already satisfied: msgpack in /usr/local/lib/python3.6/dist-packages (from td-client) (0.5.6)\n", | |
"Requirement already satisfied: memory_profiler in /usr/local/lib/python3.6/dist-packages (0.55.0)\n", | |
"Requirement already satisfied: psutil in /usr/local/lib/python3.6/dist-packages (from memory_profiler) (5.4.8)\n", | |
"The memory_profiler extension is already loaded. To reload it, use:\n", | |
" %reload_ext memory_profiler\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "rQsECqr5vw15", | |
"colab_type": "code", | |
"outputId": "9d03ac45-fe7c-44bc-a9fc-1d77fa4fdc1f", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 51 | |
} | |
}, | |
"source": [ | |
"from getpass import getpass\n", | |
"\n", | |
"td_api_key = getpass('Enter TD API KEY here')\n", | |
"print(\"Succeeded.\")" | |
], | |
"execution_count": 17, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"Enter TD API KEY here··········\n", | |
"Succeeded.\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "SjKboSoEsCoM", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"from scipy.sparse import csr_matrix\n", | |
"from memory_profiler import profile\n", | |
"\n", | |
"def convert_to_csr_matrix(job):\n", | |
" indptr = [0]\n", | |
" indices = []\n", | |
" data = []\n", | |
" vocabulary = {}\n", | |
" \n", | |
" for row in job.result():\n", | |
" for col in row[0]:\n", | |
" fv = col.split(':')\n", | |
" f = fv[0]\n", | |
" v = fv[1] if len(fv) > 1 else 1.0\n", | |
" index = vocabulary.setdefault(f, len(vocabulary))\n", | |
" indices.append(index)\n", | |
" data.append(v)\n", | |
" indptr.append(len(indices))\n", | |
" \n", | |
" return vocabulary, csr_matrix((data, indices, indptr), dtype=float)" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"colab_type": "code", | |
"outputId": "6ad1b879-3063-41ec-9d3e-59a84da7eab2", | |
"id": "T5q0TVKYn_Go", | |
"colab": { | |
"base_uri": "https://localhost:8080/", | |
"height": 751 | |
} | |
}, | |
"source": [ | |
"import tdclient\n", | |
"\n", | |
"with tdclient.Client(td_api_key) as td:\n", | |
" job = td.query(\"work1\", \"SELECT features FROM kddcup_test LIMIT 10\", type=\"presto\")\n", | |
" job.wait()\n", | |
" \n", | |
" %memit vocabulary, csr = convert_to_csr_matrix(job)\n", | |
" print(vocabulary)\n", | |
" print(csr.toarray())" | |
], | |
"execution_count": 23, | |
"outputs": [ | |
{ | |
"output_type": "stream", | |
"text": [ | |
"peak memory: 121.40 MiB, increment: 0.00 MiB\n", | |
"{'1981531': 0, '16250865': 1, '10331277': 2, '6113737': 3, '3961389': 4, '13638576': 5, '16737420': 6, '813223': 7, '197254': 8, '5091135': 9, '0': 10, '486624': 11, '8546950': 12, '897060': 13, '32967': 14, '14111284': 15, '1328807': 16, '6023668': 17, '13170561': 18, '8186092': 19, '6340403': 20, '10582526': 21, '15942785': 22, '442046': 23, '10635989': 24, '13067590': 25, '5030921': 26, '391998': 27, '16686133': 28, '8186170': 29, '14373278': 30, '11668384': 31, '14411221': 32, '15182339': 33, '13959010': 34, '12687772': 35, '2486891': 36, '7780400': 37, '10868501': 38, '13695281': 39, '5312010': 40, '1551595': 41, '13660719': 42, '6068107': 43, '659238': 44, '10502959': 45, '10512490': 46, '9362879': 47, '2528618': 48, '15334256': 49, '5431954': 50, '9204208': 51, '13210102': 52, '608773': 53, '14646961': 54, '5722703': 55, '1593653': 56, '6985745': 57, '11120830': 58, '12214482': 59, '15933125': 60, '15338484': 61, '3910131': 62, '15899168': 63, '12316049': 64}\n", | |
"[[1. 1. 1. 1. 1. 1. 1. 1. 0.5 0.5 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 1. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 0.5 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 0.5 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 1. 0.5 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1.\n", | |
" 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 0.5 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 1. 0. 1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 1. 1. 1. 1. 0. 0. 0. 0. 0. 0. 0. ]\n", | |
" [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.\n", | |
" 0. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. ]]\n" | |
], | |
"name": "stdout" | |
} | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"metadata": { | |
"id": "fh4ae4U9r2Fy", | |
"colab_type": "code", | |
"colab": {} | |
}, | |
"source": [ | |
"" | |
], | |
"execution_count": 0, | |
"outputs": [] | |
} | |
] | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment