Created
October 17, 2019 03:08
-
-
Save doloopwhile/7e9663cf85d0dcad32a4bbf2c91981d1 to your computer and use it in GitHub Desktop.
自分が投稿したQiita記事のLike数・View数を収集する
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"QIITA_AUTH_TOKEN = input()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from urllib.parse import urlparse\n", | |
"\n", | |
"\n", | |
"ITEMS_URL = urlparse('https://qiita.com/api/v2/authenticated_user/items')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# 100件を越えるとページングが必要だが、そんなに投稿していないので不要\n", | |
"import urllib.request\n", | |
"\n", | |
"url = ITEMS_URL._replace(query='per_page=100').geturl()\n", | |
"req = urllib.request.Request(url, headers={ 'Authorization': f'Bearer {QIITA_AUTH_TOKEN}' })\n", | |
"\n", | |
"with urllib.request.urlopen(req) as res:\n", | |
" body = res.read()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"import json\n", | |
"\n", | |
"def get_item(id):\n", | |
" url = f'https://qiita.com/api/v2/items/{id}'\n", | |
" req = urllib.request.Request(url, headers={ 'Authorization': f'Bearer {QIITA_AUTH_TOKEN}' })\n", | |
" with urllib.request.urlopen(req) as res:\n", | |
" body = res.read()\n", | |
" return json.loads(body)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"['rendered_body',\n", | |
" 'body',\n", | |
" 'coediting',\n", | |
" 'comments_count',\n", | |
" 'created_at',\n", | |
" 'group',\n", | |
" 'id',\n", | |
" 'likes_count',\n", | |
" 'private',\n", | |
" 'reactions_count',\n", | |
" 'tags',\n", | |
" 'title',\n", | |
" 'updated_at',\n", | |
" 'url',\n", | |
" 'user',\n", | |
" 'page_views_count']" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"import json\n", | |
"items = json.loads(body)\n", | |
"display(list(items[0].keys()))\n", | |
"\n", | |
"filled_items = [get_item(x['id']) for x in items]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"Looking in indexes: https://packages.m3internal.com/repository/pypi-all/simple\n", | |
"Collecting pandas\n", | |
"\u001b[?25l Downloading https://packages.m3internal.com/repository/pypi-all/packages/07/cf/1b6917426a9a16fd79d56385d0d907f344188558337d6b81196792f857e9/pandas-0.25.1.tar.gz (12.6MB)\n", | |
"\u001b[K |████████████████████████████████| 12.6MB 4.6MB/s eta 0:00:01 |██████████████ | 5.5MB 4.6MB/s eta 0:00:02\n", | |
"\u001b[?25hRequirement already satisfied: python-dateutil>=2.6.1 in /Users/k-omoto/.pyenv/versions/3.8.0/lib/python3.8/site-packages (from pandas) (2.8.0)\n", | |
"Collecting pytz>=2017.2 (from pandas)\n", | |
"\u001b[?25l Downloading https://packages.m3internal.com/repository/pypi-all/packages/e7/f9/f0b53f88060247251bf481fa6ea62cd0d25bf1b11a87888e53ce5b7c8ad2/pytz-2019.3-py2.py3-none-any.whl (509kB)\n", | |
"\u001b[K |████████████████████████████████| 512kB 4.4MB/s eta 0:00:01\n", | |
"\u001b[?25hCollecting numpy>=1.13.3 (from pandas)\n", | |
"\u001b[?25l Downloading https://packages.m3internal.com/repository/pypi-all/packages/ac/36/325b27ef698684c38b1fe2e546e2e7ef9cecd7037bcdb35c87efec4356af/numpy-1.17.2.zip (6.5MB)\n", | |
"\u001b[K |████████████████████████████████| 6.5MB 4.1MB/s eta 0:00:01\n", | |
"\u001b[?25hRequirement already satisfied: six>=1.5 in /Users/k-omoto/.pyenv/versions/3.8.0/lib/python3.8/site-packages (from python-dateutil>=2.6.1->pandas) (1.12.0)\n", | |
"Installing collected packages: pytz, numpy, pandas\n", | |
" Running setup.py install for numpy ... \u001b[?25ldone\n", | |
"\u001b[?25h Running setup.py install for pandas ... \u001b[?25ldone\n", | |
"\u001b[?25hSuccessfully installed numpy-1.17.2 pandas-0.25.1 pytz-2019.3\n", | |
"\u001b[33mWARNING: You are using pip version 19.2.3, however version 19.3 is available.\n", | |
"You should consider upgrading via the 'pip install --upgrade pip' command.\u001b[0m\n" | |
] | |
} | |
], | |
"source": [ | |
"!pip install pandas" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 24, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"~/Downloads/qiita-20191017.csv\n" | |
] | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"from pathlib import Path\n", | |
"\n", | |
"path = Path(input()).expanduser()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 22, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>URL</th>\n", | |
" <th>タイトル</th>\n", | |
" <th>いいね数</th>\n", | |
" <th>閲覧数</th>\n", | |
" <th>いいね・閲覧比</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <td>0</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/2a2ab902e29...</td>\n", | |
" <td>「美しい」にモヤったら「認知負荷が小さい」と言いかえよう</td>\n", | |
" <td>82</td>\n", | |
" <td>1154</td>\n", | |
" <td>7.11%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>1</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/65035e0afcf...</td>\n", | |
" <td>よくあるPythonプロジェクトのREADME(開発手順の部分のみ)</td>\n", | |
" <td>3</td>\n", | |
" <td>305</td>\n", | |
" <td>0.98%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>2</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/db797d9ad03...</td>\n", | |
" <td>Python 3.7でdictが順序を保存するようになって良かったとしみじみ思う</td>\n", | |
" <td>9</td>\n", | |
" <td>1457</td>\n", | |
" <td>0.62%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>3</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/ece97deb7f0...</td>\n", | |
" <td>`return \"エラーメッセージ\"` はやめてくれ</td>\n", | |
" <td>1</td>\n", | |
" <td>189</td>\n", | |
" <td>0.53%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>4</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/bce1d657b44...</td>\n", | |
" <td>私は如何にして JMockit の returns(Object) を安全に置換したか?</td>\n", | |
" <td>1</td>\n", | |
" <td>251</td>\n", | |
" <td>0.4%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" <td>...</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>66</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/f55b4e9c88a...</td>\n", | |
" <td>Gitブランチに対応するRedmineチケットを開く</td>\n", | |
" <td>5</td>\n", | |
" <td>1610</td>\n", | |
" <td>0.31%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>67</td>\n", | |
" <td>https://qiita.com/tonluqclml/private/bb0ab4117...</td>\n", | |
" <td>ゼロから作る Deep Lerning 6章 学習に関するテクニック</td>\n", | |
" <td>0</td>\n", | |
" <td>13</td>\n", | |
" <td>0.0%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>68</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/c7bdbc3db81...</td>\n", | |
" <td>PythonでGUI : PyQt5のご紹介</td>\n", | |
" <td>13</td>\n", | |
" <td>8664</td>\n", | |
" <td>0.15%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>69</td>\n", | |
" <td>https://qiita.com/tonluqclml/private/9adf485ec...</td>\n", | |
" <td>PythonでGUI : PyQt5のご紹介(社内用)</td>\n", | |
" <td>0</td>\n", | |
" <td>9</td>\n", | |
" <td>0.0%</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <td>70</td>\n", | |
" <td>https://qiita.com/tonluqclml/items/ea3a91e69c2...</td>\n", | |
" <td>Marx - 最小限のコードで小奇麗にできるCSSライブラリ</td>\n", | |
" <td>21</td>\n", | |
" <td>1437</td>\n", | |
" <td>1.46%</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>71 rows × 5 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" URL \\\n", | |
"0 https://qiita.com/tonluqclml/items/2a2ab902e29... \n", | |
"1 https://qiita.com/tonluqclml/items/65035e0afcf... \n", | |
"2 https://qiita.com/tonluqclml/items/db797d9ad03... \n", | |
"3 https://qiita.com/tonluqclml/items/ece97deb7f0... \n", | |
"4 https://qiita.com/tonluqclml/items/bce1d657b44... \n", | |
".. ... \n", | |
"66 https://qiita.com/tonluqclml/items/f55b4e9c88a... \n", | |
"67 https://qiita.com/tonluqclml/private/bb0ab4117... \n", | |
"68 https://qiita.com/tonluqclml/items/c7bdbc3db81... \n", | |
"69 https://qiita.com/tonluqclml/private/9adf485ec... \n", | |
"70 https://qiita.com/tonluqclml/items/ea3a91e69c2... \n", | |
"\n", | |
" タイトル いいね数 閲覧数 いいね・閲覧比 \n", | |
"0 「美しい」にモヤったら「認知負荷が小さい」と言いかえよう 82 1154 7.11% \n", | |
"1 よくあるPythonプロジェクトのREADME(開発手順の部分のみ) 3 305 0.98% \n", | |
"2 Python 3.7でdictが順序を保存するようになって良かったとしみじみ思う 9 1457 0.62% \n", | |
"3 `return \"エラーメッセージ\"` はやめてくれ 1 189 0.53% \n", | |
"4 私は如何にして JMockit の returns(Object) を安全に置換したか? 1 251 0.4% \n", | |
".. ... ... ... ... \n", | |
"66 Gitブランチに対応するRedmineチケットを開く 5 1610 0.31% \n", | |
"67 ゼロから作る Deep Lerning 6章 学習に関するテクニック 0 13 0.0% \n", | |
"68 PythonでGUI : PyQt5のご紹介 13 8664 0.15% \n", | |
"69 PythonでGUI : PyQt5のご紹介(社内用) 0 9 0.0% \n", | |
"70 Marx - 最小限のコードで小奇麗にできるCSSライブラリ 21 1437 1.46% \n", | |
"\n", | |
"[71 rows x 5 columns]" | |
] | |
}, | |
"execution_count": 22, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import math\n", | |
"\n", | |
"cols = ['url', 'title', 'likes_count', 'page_views_count']\n", | |
"df = pd.DataFrame({c: [x[c] for x in filled_items] for c in cols })\n", | |
"\n", | |
"def percentage(x):\n", | |
" return f\"{round(x * 100, 2)}%\"\n", | |
"\n", | |
"df['ratio'] = (df['likes_count'] / df['page_views_count']).apply(percentage)\n", | |
"\n", | |
"df = df.rename(columns={\n", | |
" 'url': 'URL',\n", | |
" 'title':'タイトル',\n", | |
" 'likes_count': 'いいね数',\n", | |
" 'page_views_count': '閲覧数',\n", | |
" 'ratio': 'いいね・閲覧比'\n", | |
"})\n", | |
"\n", | |
"df" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 25, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"df.to_csv(path)" | |
] | |
} | |
], | |
"metadata": { | |
"file_extension": ".py", | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.8.0" | |
}, | |
"mimetype": "text/x-python", | |
"name": "python", | |
"npconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": 3 | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment