Created
March 14, 2019 04:49
-
-
Save incidunt/8bb86be60e67a3e27b503aaf29613ec9 to your computer and use it in GitHub Desktop.
用Python分析WordPress官网所有插件的开发者信息
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# 首先引入所有需要用的库\n", | |
"\n", | |
"#读取jsonl文件的库\n", | |
"import jsonlines\n", | |
"\n", | |
"# 数据分析的库\n", | |
"import pandas as pd\n", | |
"import numpy as np\n", | |
"\n", | |
"# 数据可视化的库\n", | |
"import matplotlib.pyplot as plt\n", | |
"import seaborn as sns" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(54358, 63)" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"authors_names = []\n", | |
"authors_nonames = []\n", | |
"\n", | |
"with jsonlines.open('../output.jsonl') as reader:\n", | |
" for obj in reader:\n", | |
" author = obj['author']\n", | |
" if author.strip()=='': # 对于没有作者名字的插件,有author这个key,但值为空\n", | |
" authors_nonames.append(author)\n", | |
" else:\n", | |
" authors_names.append(author)\n", | |
" \n", | |
"len(authors_names), len(authors_nonames)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAADuCAYAAAAuh+CSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFoZJREFUeJzt3Xm0HGWdxvHv7yZsigRERkDEUgRRZEAIOSoTRUWPQ40g44KIIKg4gCzBtcRhnAGRAhwRUWBEmMgiKpssJYojAgEEAgmLLMIBiiMCUVbZAiH9zh/1hjSXu3Tde7vfWp7POX3SVFdXP5fbt5/3requNuccIiIivRoKHUBEROpFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSyvTQAUT6KUqy1YB1gLWBtYAVgWkUz/1pwFLgeWCJvzwG3A/cn6fx4yEyi1SdOedCZxCZsCjJVgXeBmwBbEBREsuKYh3gZZPY/NPAA/5yv//3XmAhsCBP479PYtsitaXikNqIkmwGRUFs6f/dAtiQMLtcHXAncL2/XEdRJk8EyCIyUCoOqawoyaYDs4EPAdsBGwEWNNTYHPAn4LfA+cBleRovCRtJZOqpOKRSoiRbA/hnirL4ILB62EST8hjwa4oS+ZWOmUhTqDgkuCjJXg3sAuwAbE1x0LpplgDzgPOAn+dpvChwHpEJU3FIEFGSDVHMKD5HMbto0zv8lgAXACcCF+dp3AmcR6QUFYcMVJRkrwQ+D+wNrB84ThXcCxwPnJin8SOhw4j0QsUhAxEl2SbAHIpdUqsEjlNFTwOnAsfkaXxb6DAiY1FxSF9FSbYBcCjwCar9jqiqcMAvgG/kaXxX6DAiI1FxSF9ESbY2cDCwJ7BC4Dh1tITiGMghOpAuVaPikCnlP6T3VeAA4OWB4zTBU8DRwJH6cKFUhYpDpkSUZCsB+wFfB14ZOE4TPQQcBhyXp/FzocNIu6k4ZNKiJJsFzAXeHDhKG/wJ2D1P46tDB5H2UnHIhPlZxiHAl2jmh/aqainwXeDgPI2fDR1G2kfFIRMSJdnbgZPRLCOk2yhmH9eGDiLtouKQUqIkW5ni7bVfRF8EVgVLgaOAb+rYhwyKikN6FiXZFsBPgTeFziIvcQuwa57GC0MHkebTiFF6EiXZJ4ArUGlU1SbAVVGS7Rw6iDSfZhwyJn8ywsOAJHQW6dnhFJ881x+39IWKQ0blv6/7p0AcOouUdj7wKX1oUPpBxSEjipJsQ4rvjtC7purrFmD7PI3vDh1EmkXHOOQloiT7AHAtKo262wS4Nkqy94QOIs2i4pAX8QdXM+r9la2y3JrAxVGSfTJ0EGkOFYe8IEqyPYDTaNe38bXBdOBU//sVmTQVhwAQJdk+wEnoOdFUQ8BJUZL9W+ggUn96kZBlpfFD9EVLTWfACVGS7R06iNSb3lXVclGSfQ74ESqNNnHAZ/M0/t/QQaSeVBwtFiXZrhSnQ9fMs306FJ/zOCN0EKkfFUdLRUm2LXAROhDeZs8DcZ7GF4cOIvWi4mgh/+G+a4A1QmeR4B4DZuVpfGfoIFIf2kXRMv47wc9HpSGF1YHz/ellRHqi4miRKMmmAT8DNg6dRSplY+AMf0JLkXHpidIuRwIfDB1CKmk7IA0dQupBxzhawn9q+OTQOaTyds3T+LTQIaTaVBwtECXZLGAesGLoLFJ5i4F35Wk8P3QQqS4VR8NFSbYKcAOwUegsUht3ApvlafxM6CBSTTrG0XyHodKQcjYEvh06hFSXZhwNFiXZPwGXoQGClNcB3p2n8RWhg0j1qDgaKkqylwE3ARuEziK1pV1WMiKNRJvrCFQaMjnaZSUj0oyjgaIk2wa4BJ3xViZPu6zkJVQcDePfRXUL8PrQWaQxtMtKXkS7qppnDioNmVobAgeGDiHVoRlHg0RJ9krgbmBG6CzSOI8Db8jT+JHQQSQ8zTia5RuoNKQ/ZgBfDx1CqkEzjoaIkmx94A5gpdBZpLEWAxvlafzn0EEkLM04muNQVBrSXysD/xk6hISnGUcDREm2KcX5qDQQkH5bCmyap/FtoYNIOHqhaYbD0e9SBmMaxfnPpMU046i5KMm2BK4LnUNaZ2aexteHDiFhaJRaf3p/vYQwJ3QACUczjhqLkmwd4F5ghdBZpHWeA16Xp/GDoYPI4GnGUW9fQKUhYawI7B06hIShGUdNRUm2AnAf8A+hs0hrLQJem6fxktBBZLA046ivHVFpSFivBrYPHUIGT8VRX58PHUAE2DN0ABk87aqqoSjJNqA41bW+b0NC61Cc/PDe0EFkcDTjqKedUWlINQwBu4UOIYOl4qinHUIHEOmi52PLaFdVzURJ9hrgz2jGIdXhgPXyNL4/dBAZDM046udDqDSkWozieSktoeKoH+0WkCpScbSIdlXVSJRkqwIPoe/dkOpZDKyZp/HToYNI/2nGUS8fRKUh1bQy8IHQIWQwVBz1ok/pSpVpd1VLqDjq5f2hA4iMYbvQAWQwVBw1ESXZesDaoXOIjGFt/zyVhlNx1MeWoQOI9EDP0xZQcdTHzNABRHqg4mgBFUd9qDikDlQcLaDiqA/9QUod6HnaAiqOGoiSbH1grdA5RHrw6ijJ1g0dQvpLxVEP2k0ldaJZR8OpOOph89ABREpQcTSciqMe1g8dQKSE14cOIP2l4qiH14QOIFLCOqEDSH+pOOpBBxulTvR8bTgVRz3oD1HqRDOOhtP3cVRclGSrAPqOA6mblfM0fjZ0COkPzTiqT7MNqSOdkLPBVBzVp+KQOtLztsFUHNWnP0CpIx3naLBJF4eZ/crMVveXfbqWb2NmF052+yM8XmRmn+z6793N7AdT/TgVsnroACIT8KrQAaR/Jl0czrntnHOPUbzA7TPe+lMgAj453kq9MrNpU7WtPlkhdACRCZiS5+0gBqZ+W++cim357R3UdT0ysz+WvP/uZlbpPQ1jFoeZfcXM9vfXjzazS/z195rZ6f56bmavAlJgAzO7wcyO8ptY1czOMrPbzex0M7MRHmNPM5tvZjea2dlm9jK/fK6ZfbRrvSf91RSY7R/nQL9sXTP7tZndaWZHdt1nZzO72cz+aGZHdG/LzP7bzG4E3jEsz6VmdoSZXWtmd5jZbL88MrN5ZrbAX97pl29jZpeZ2XlmdreZpWa2i7//zWa2gV9vLf/zzfeXrcf8zSyn4pA6mpLn7YAGptsAU1YcwEHjrzKm3an4LurxZhzzgNn++kyKIljBL7t82LoJcJdzbnPn3Ff8srcBc4C3AG8ARnqxPMc5t5VzbjPgNuCz42RKgHn+cY72yzYHdgI2BXYys9f6xj4CeK+/fSsz+7Bf/+XANc65zZxzV4zwGNOdc7N89m/6ZX8F3u+c28I/1ve71t8M2At4M7ArsJG//4+B/fw6xwBHO+e2Aj7ib+uFikPqaPp4K/RrYGpm7zOzhX7gdrKZrTRsW5jZTD9IjCj+dg/0257dFREzm2Vmf/Dbu8rM3uSXv2gXuZld6AeRKbCK39bp/uZpZnaimd1iZheb2Sr+Ppub2dVmdpOZnWtma/jB8kzgdL+NVYblqcTAdrxf7vXAlma2GvAssMD/ULOB/ce5L8C1zrn7fLAbKHYzDX+hfquZfYtiRLEq8Jsetjvc75xzj/vHuRV4HbAmcKlz7m9++enAu4BfAkuBs8fY3jn+3+t9ZihewH9gZpv7+2/Utf5859wD/nHuAi72y28G3uOvbwu8xZZPulYzs1Wdc08ytqrvSqs5xxCuM0Sn0/2vFRc3RMcVy5fd1nHT/O1D5li2zN/uptHpGPj7ddy0F7bR6Uyj4wznhsy5aS/cvnw9K7ZD923T7IXrGK4zDccQne518I/vhuiYXx//uH49Bwz281qPulc8DfF4q80DvkQxCJsJrDTOwPStzrnNoXhBpBiYbgLcD1wJbG1m1wFzgfc55+4ws1OAvYHvjRTAOZeb2QnAk86574ywyu3AbOfc82a2LfBtioHfiJxziZnt25UzAjYEdnbO7Wlmv/D3Pw04BdjPOXeZmR0CfNM5N8fM9gW+7Jy7bpSHme6cm2Vm21EMbLdl+cB2sZltCJzB8rNqb0YxqH0EuBv4sb//ARQD2zksH9heYWbrU7wOv3m0n3PM4nDOLTGzeyimTlcBN1G8EL6RYnYwnu4PAC0d5fHmAh92zt1oZrtTTBsBnsfPiMxsCFhxko/TbbFzbmkP2+ve1oHAIopfwhCweJTH73T9d6fr/kPA251z3ffrxVg5ZdKMDjbUYaj88b6JvhYH+cztmAXZGaLDVBbk0t7+f/ZjYPoEcI9z7g6/zk+ALzBKcfRgBvAT/2LsmNgegHucczf469cDkZnNAFZ3zl3WlfPMHrcXfGA77nSSYlTwZeAz/oG+C1zvXvqR8yeAV/SwveFeATzgRxq7AH/xy3OK0zP/Atie5b+wXh/nWuD7fmr6KLAzcOwE8i0zA7jPOdcxs09TfiZwMUW7HwXFNLXryTQWFYdMgYEX5LgvsAMamHZ7YTAKrNzD9gEOBX7vnNvRzx4uHWFb421veM5VRluxR8EHtr08ieZRvCf7D865RT7QvOErOeceBq604kD0UcNvH8PBwDUUU83bu5afCLzblh/AfsovvwlYasXB9AMZhW/YBPg9cCNF2Z1XItdwxwGf9nk27srTq/2BmX5/5q0U+1V7oeKQOnqux/WWDUwv99f3AhZOYmD6J4oR/Rv9f+8KLBvV5yz/rpDu3U1jbXsGywezu3ctz4HNzWzIzF4LzOq6bYkfCI/K71p/tOuYSnfOiQzCZwAPOOc6flsTHdgCxcB2rJXHnXE4535H1+jBObfRsNujruvD3yZ7addt+46y/eOB40dYvgh4e9eir/nlSygOeHeb23W/f+m6fgbFvr7h2151pCz+tm26rj+Enwo65+4E/nGEPJfy4p+z+/4v3Oa3tdNojzuGXv8ARaqk1/NUzQO+QTEwfcrMRh2YmtmVVry19SIgG2ljfh//HsCZZjYdmA+c4G/+L+AkMzuUrr9Z4ALgLDPbgeKYQ/fjH0mxq+rfhz3mlcA9wK0Us6MFXbf9CLjJzBb4n200nwZOsOKdpHcDe/jlc/3yZ4B3OOeeGWMbyxwHnG1muwG/ZmID2x+a2U0UvXA5YwxudZLDiouS7GMUu+tE6uRTeRqfPv5qUkc65Uj1PRg6gMgE6HnbYCqO6tMfoNTRfaEDSP+oOKrvgdABRCbgL+OvInWl4qi4PI2fpPyBLpGQ/u6ft9JQKo560O4qqRPNNhpOxVEP2l0ldaLiaDgVRz1oxiF1ouJoOBVHPdwZOoBICSqOhlNx1MPC0AFESujlPFNSYyqOelgw/ioilTE/dADpLxVHDeRpfBfwWOgcIj14HLhj3LWk1lQc9dHLKdhFQluQp7FOgNdwKo760O4qqQPtpmoBFUd9qDikDlQcLaDiqA+9s0rqQMXRAiqO+rgdHSCXavtbnsb3hg4h/afiqIk8jTvAb0LnEBnDlaEDyGCoOOrlgtABRMZwfugAMhgqjnq5CFgaOoTICJaigU1rqDhqJE/jR4CrQucQGcFVeRo/FDqEDIaKo34uDB1AZAS/DB1ABkfFUT/aHSBVdF7oADI4Ko6aydP4NuCu0DlEuvzRn09NWkLFUU8a3UmV6PnYMiqOepobOoBIl7NCB5DBUnHUUJ7GN6NTO0g1XJensc7c3DIqjvr6cegAIsD/hA4gg6fiqK8zgCdDh5BW+zvF81BaRsVRU3kaPwGcEjqHtNqpeRo/FTqEDJ6Ko96OBfRtaxKCA74fOoSEoeKosTyNbwd+GzqHtFKWp7G+W7ylVBz1993QAaSVjg4dQMIx57Sno+6iJLscmB06h7TG/DyNZ4UOIeFoxtEMXw8dQFrla6EDSFgqjgbI0/hKIAudQ1rhojyNfx86hISl4miOg9A7rKS/Omi2Iag4GiNP45vQh7Gkv071p7uRllNxNMvBwJLQIaSRFlM8v0RUHE2Sp/Hd6BxW0h/H5mn859AhpBpUHM1zMPDX0CGkUR4BDg8dQqpDxdEweRo/DHwhdA5plAPyNH40dAipDn0AsKGiJDsT+GjoHFJ75+Zp/K+hQ0i1aMbRXPsAD4UOIbX2ELBX6BBSPSqOhsrT+G/AfqFzSK3tlaexjpfJS6g4GixP458B54bOIbV0Rp7GZ4cOIdWk4mi+vSneFSPSqweAfUOHkOpScTRcnsaLgN3R6Uikd3vmaazBhoxKxdECeRpfAPxH6BxSC4flaawTZsqYVBwtkafxt4AzQ+eQSjsHnVZEeqDiaJc9gBtDh5BKugHYLU9j7dKUcekDgC0TJVkEzAdeFTiKVMeDwCydi0p6pRlHy+RpnAMfA54PHEWq4VlgR5WGlKHiaKE8jS8F9g+dQyrhc3kaXx06hNSLiqOl8jQ+Hr3Tqu0OydP4tNAhpH50jKPloiQ7HEhC55CBOzJPY30NrEyIikOIkuwYtOuqTY7O0/iLoUNIfWlXlZCn8QHA90LnkIE4RqUhk6XiEADyND4QOCp0Dumrw/M0nhM6hNSfikNekKfxV4Fvhc4hffHveRofFDqENIOOcchLREn2WeA4YMXQWWTSngfm5Gn8w9BBpDlUHDKiKMlmA2cDa4XOIhP2MPDxPI0vCR1EmkXFIaPypyc5H9g0cBQp72ZghzyN7wkdRJpHxzhkVP70JO+kKA+pj7OAd6g0pF9UHDKmPI2fBHYE0tBZZFyO4rToH8/T+KnQYaS5tKtKehYl2UeAE9CZdavocYrTomt2KH2nGYf0LE/js4FNgHNDZ5EXyYBNVBoyKJpxyIRESbYLcCywRugsLfYocECexqeGDiLtouKQCYuSbF3gRGC70Fla6JfA3nkaPxg6iLSPikMmLUqyzwBHA6uFztICDwH75mn889BBpL10jEMmLU/jk4GNgOPRNwv2SweYC7xFpSGhacYhUypKsg2BbwMfDZ2lQc4BDs7T+NbQQURAxSF9EiXZLOBI4N2hs9TY/wEH5Wk8P3QQkW4qDumrKMm2o/jwoE5b0rtrKApD55iSSlJxSN9FSTYEfAg4EM1AxnIF8J08jc8LHURkLCoOGagoyd4GzAF2AlYKHKcKFgNnAMfmabwwdBiRXqg4JIgoydYEdgM+D2wcOE4ItwAnAafkafxw6DAiZag4JLgoyd4FfBzYAVgvcJx++itwHnBynsZXhw4jMlEqDqmUKMm2oCiQHYDNAseZCjcCF/rLtXkadwLnEZk0FYdUlv8iqe0pSmRr6nFM5BngEnxZ5Gl8X+A8IlNOxSG1ECXZChRv6Z3pL1sBbwWmB4z1HMWxioX+sgBYmKfxMwEzifSdikNqK0qylSl2Z82kKJX1gNcA61J8V7pN0UM9CjwIPADcxvKSuCVP4+em6DFEakPFIY0UJdmKwDoUJbKsTFYFVgBW9P8OAUu7Ls8CiyhKYtllkcpB5MVUHCIiUorOjisiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgpKg4RESlFxSEiIqWoOEREpBQVh4iIlKLiEBGRUlQcIiJSiopDRERKUXGIiEgp/w9KgtYfjks3iQAAAABJRU5ErkJggg==\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": {}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# 用于图表的数据\n", | |
"labels = 'with author name','without authot name'\n", | |
"sizes = [len(authors_names), len(authors_nonames)]\n", | |
" \n", | |
"# 绘制图表\n", | |
"plt.pie(sizes, labels=labels )\n", | |
" \n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"author_profiles=[]\n", | |
"\n", | |
"with jsonlines.open('../output.jsonl') as reader:\n", | |
" for obj in reader:\n", | |
" author_profiles.append(str(obj['author_profile']).replace('https://profiles.wordpress.org/',''))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"(['brooksx',\n", | |
" 'miunosoft',\n", | |
" 'ouhinit',\n", | |
" 'mdalby',\n", | |
" 'ouhinit',\n", | |
" 'ouhinit',\n", | |
" 'ouhinit',\n", | |
" 'primestrategy',\n", | |
" 'scheeeli',\n", | |
" 'ouhinit'],\n", | |
" 54421)" | |
] | |
}, | |
"execution_count": 5, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"author_profiles[:10], len(author_profiles)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"54421" | |
] | |
}, | |
"execution_count": 6, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#convert output to new array, check length\n", | |
"authors_array = np.asarray(author_profiles)\n", | |
"len(authors_array)" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>author_name</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>brooksx</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>miunosoft</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>ouhinit</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>mdalby</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>ouhinit</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" author_name\n", | |
"0 brooksx\n", | |
"1 miunosoft\n", | |
"2 ouhinit\n", | |
"3 mdalby\n", | |
"4 ouhinit" | |
] | |
}, | |
"execution_count": 7, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"#convert new array to dataframe\n", | |
"df = pd.DataFrame(data=authors_array, columns=['author_name'])\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>author_name</th>\n", | |
" <th>counts</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>shawfactor</td>\n", | |
" <td>93</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>coffee2code</td>\n", | |
" <td>74</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>algoritmika</td>\n", | |
" <td>69</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>yithemes</td>\n", | |
" <td>61</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>marcqueralt</td>\n", | |
" <td>54</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" author_name counts\n", | |
"0 shawfactor 93\n", | |
"1 coffee2code 74\n", | |
"2 algoritmika 69\n", | |
"3 yithemes 61\n", | |
"4 marcqueralt 54" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"author_count = df['author_name'].value_counts().rename_axis('author_name').reset_index(name='counts')\n", | |
"author_count.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>counts</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>count</th>\n", | |
" <td>27289.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>mean</th>\n", | |
" <td>1.994247</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>std</th>\n", | |
" <td>2.867161</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>min</th>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>25%</th>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>50%</th>\n", | |
" <td>1.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>75%</th>\n", | |
" <td>2.000000</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>max</th>\n", | |
" <td>93.000000</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" counts\n", | |
"count 27289.000000\n", | |
"mean 1.994247\n", | |
"std 2.867161\n", | |
"min 1.000000\n", | |
"25% 1.000000\n", | |
"50% 1.000000\n", | |
"75% 2.000000\n", | |
"max 93.000000" | |
] | |
}, | |
"execution_count": 9, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"author_count.describe()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"sns.stripplot(y=author_count['counts'])\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 432x288 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"# seaborn histogram\n", | |
"g=sns.distplot(author_count['counts'],rug='True')\n", | |
"\n", | |
"# Set the `xscale`\n", | |
"g.set(xscale=\"log\")\n", | |
"\n", | |
"# Add labels\n", | |
"plt.title('distribution of plugins counts from same deveploper')\n", | |
"plt.xlabel('counts')\n", | |
"plt.ylabel('rate')\n", | |
"\n", | |
"# Show plot\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": { | |
"scrolled": true | |
}, | |
"outputs": [ | |
{ | |
"data": { | |
"image/png": "\n", | |
"text/plain": [ | |
"<Figure size 2160x360 with 1 Axes>" | |
] | |
}, | |
"metadata": { | |
"needs_background": "light" | |
}, | |
"output_type": "display_data" | |
} | |
], | |
"source": [ | |
"\n", | |
"top10 = author_count[:10]\n", | |
"plt.figure(figsize=(30,5))\n", | |
"\n", | |
"sns.barplot(top10.author_name, top10.counts)\n", | |
"\n", | |
"plt.title('Top 10 Plugin Developers in wordPress.org',fontsize=20)\n", | |
"plt.ylabel('Number of Plugins', fontsize=16)\n", | |
"plt.xlabel('author', fontsize=16)\n", | |
"\n", | |
"plt.show()" | |
] | |
}, | |
{ | |
"cell_type": "markdown", | |
"metadata": {}, | |
"source": [ | |
"下面就是 wordPress.org 上发布的插件数量前10名的作者的链接\n", | |
"\n", | |
"| name |plugins| profile | website |\n", | |
"|----------------|------:|--------------------------------------------------------------------------------------------------|--------------------------------------------------------------------------------------------------------------------------------------|\n", | |
"|Peter Shaw | 93| [https://profiles.wordpress.org/shawfactor](https://profiles.wordpress.org/shawfactor) | [https://shawfactor.com/](https://shawfactor.com/) |\n", | |
"|Scott Reilly | 74| [https://profiles.wordpress.org/coffee2code](https://profiles.wordpress.org/coffee2code) | [http://coffee2code.com/](http://coffee2code.com/) |\n", | |
"|Algoritmika Ltd | 69| [https://profiles.wordpress.org/algoritmika](https://profiles.wordpress.org/algoritmika) | [https://wpfactory.com](https://wpfactory.com) |\n", | |
"|YITH | 61| [https://profiles.wordpress.org/yithemes](https://profiles.wordpress.org/yithemes) | [https://yithemes.com/](https://yithemes.com/) |\n", | |
"|DeMomentSomTres | 54| [https://profiles.wordpress.org/marcqueralt](https://profiles.wordpress.org/marcqueralt) | [http://DeMomentSomTres.com](http://DeMomentSomTres.com) |\n", | |
"|Gopi Ramasamy | 54| [https://profiles.wordpress.org/gopiplus](https://profiles.wordpress.org/gopiplus) | [http://www.gopiplus.com/work/2010/07/18/youtube-with-fancy-zoom/](http://www.gopiplus.com/work/2010/07/18/youtube-with-fancy-zoom/) |\n", | |
"|Access Keys | 53| [https://profiles.wordpress.org/access-keys](https://profiles.wordpress.org/access-keys) | [https://access-keys.com](https://access-keys.com) |\n", | |
"|WP OnlineSupport| 52| [https://profiles.wordpress.org/wponlinesupport](https://profiles.wordpress.org/wponlinesupport) | [https://www.wponlinesupport.com](https://www.wponlinesupport.com) |\n", | |
"|GamiPress | 50| [https://profiles.wordpress.org/rubengc](https://profiles.wordpress.org/rubengc) | [https://gamipress.com/](https://gamipress.com/) |\n", | |
"|BestWebSoft | 48| [https://profiles.wordpress.org/bestwebsoft](https://profiles.wordpress.org/bestwebsoft) | [https://bestwebsoft.com/](https://bestwebsoft.com/) |" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.7.2" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment