Last active
July 13, 2020 01:57
-
-
Save ysmintor/2ba0709eccbb02cba02de5a8577a5f08 to your computer and use it in GitHub Desktop.
entropy_summer_school
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "%matplotlib inline\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "plt.rcParams['font.sans-serif']=['SimHei'] #用来正常显示中文标签\n", | |
| "plt.rcParams['axes.unicode_minus']=False #用来正常显示负号" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "from scipy.stats import entropy" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import pandas as pd" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df = pd.read_excel('学生信息1.xlsx', header=1) # 读取表格数据" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>姓名</th>\n", | |
| " <th>性别</th>\n", | |
| " <th>院系</th>\n", | |
| " <th>在学年级</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>周娜</td>\n", | |
| " <td>女</td>\n", | |
| " <td>心理与认知科学学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>周行健</td>\n", | |
| " <td>男</td>\n", | |
| " <td>数学科学学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>孙怡宁</td>\n", | |
| " <td>女</td>\n", | |
| " <td>哲学系</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>岑仕鹏</td>\n", | |
| " <td>男</td>\n", | |
| " <td>信息科学技术学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>朱元虎</td>\n", | |
| " <td>男</td>\n", | |
| " <td>中国语言文学系</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 姓名 性别 院系 在学年级\n", | |
| "0 周娜 女 心理与认知科学学院 2017\n", | |
| "1 周行健 男 数学科学学院 2017\n", | |
| "2 孙怡宁 女 哲学系 2017\n", | |
| "3 岑仕鹏 男 信息科学技术学院 2018\n", | |
| "4 朱元虎 男 中国语言文学系 2017" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df.head()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## 将其它学校归为一类,判断条件是院系列包含大学" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "df['院系'] = df['院系'].apply(lambda x: '其它大学' if '大学' in x else x) " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th>姓名</th>\n", | |
| " <th>性别</th>\n", | |
| " <th>院系</th>\n", | |
| " <th>在学年级</th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>0</th>\n", | |
| " <td>周娜</td>\n", | |
| " <td>女</td>\n", | |
| " <td>心理与认知科学学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>1</th>\n", | |
| " <td>周行健</td>\n", | |
| " <td>男</td>\n", | |
| " <td>数学科学学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2</th>\n", | |
| " <td>孙怡宁</td>\n", | |
| " <td>女</td>\n", | |
| " <td>哲学系</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>3</th>\n", | |
| " <td>岑仕鹏</td>\n", | |
| " <td>男</td>\n", | |
| " <td>信息科学技术学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>4</th>\n", | |
| " <td>朱元虎</td>\n", | |
| " <td>男</td>\n", | |
| " <td>中国语言文学系</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>5</th>\n", | |
| " <td>严牧心</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>6</th>\n", | |
| " <td>匡然</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2017</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>7</th>\n", | |
| " <td>李一笑</td>\n", | |
| " <td>男</td>\n", | |
| " <td>数学科学学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>8</th>\n", | |
| " <td>薛睿</td>\n", | |
| " <td>男</td>\n", | |
| " <td>工学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>9</th>\n", | |
| " <td>马泽宇</td>\n", | |
| " <td>男</td>\n", | |
| " <td>物理学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>10</th>\n", | |
| " <td>杨秀金</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>11</th>\n", | |
| " <td>周妍</td>\n", | |
| " <td>女</td>\n", | |
| " <td>历史学系</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>12</th>\n", | |
| " <td>王伊姗</td>\n", | |
| " <td>女</td>\n", | |
| " <td>哲学系</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>13</th>\n", | |
| " <td>张馨语</td>\n", | |
| " <td>女</td>\n", | |
| " <td>光华管理学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>14</th>\n", | |
| " <td>谭畅</td>\n", | |
| " <td>男</td>\n", | |
| " <td>工学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>15</th>\n", | |
| " <td>丁月迪</td>\n", | |
| " <td>女</td>\n", | |
| " <td>政府管理学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>16</th>\n", | |
| " <td>王金晶</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>17</th>\n", | |
| " <td>王韵涵</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>18</th>\n", | |
| " <td>李武陶文</td>\n", | |
| " <td>男</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>19</th>\n", | |
| " <td>薄瑞</td>\n", | |
| " <td>男</td>\n", | |
| " <td>工学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>20</th>\n", | |
| " <td>张润祺</td>\n", | |
| " <td>女</td>\n", | |
| " <td>工学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>21</th>\n", | |
| " <td>马小凯</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>22</th>\n", | |
| " <td>汪家震</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>23</th>\n", | |
| " <td>吴军</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>24</th>\n", | |
| " <td>刘家铭</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>25</th>\n", | |
| " <td>谢奥林</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>26</th>\n", | |
| " <td>李泊宁</td>\n", | |
| " <td>男</td>\n", | |
| " <td>生命科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>27</th>\n", | |
| " <td>侯东林</td>\n", | |
| " <td>男</td>\n", | |
| " <td>信息科学技术学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>28</th>\n", | |
| " <td>赵海波</td>\n", | |
| " <td>男</td>\n", | |
| " <td>信息科学技术学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>29</th>\n", | |
| " <td>陈福康</td>\n", | |
| " <td>男</td>\n", | |
| " <td>信息科学技术学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>30</th>\n", | |
| " <td>贺伟桓</td>\n", | |
| " <td>男</td>\n", | |
| " <td>城市与环境学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>31</th>\n", | |
| " <td>刘泽熙</td>\n", | |
| " <td>男</td>\n", | |
| " <td>心理与认知科学学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>32</th>\n", | |
| " <td>邱春水</td>\n", | |
| " <td>男</td>\n", | |
| " <td>历史学系</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>33</th>\n", | |
| " <td>杜心怡</td>\n", | |
| " <td>女</td>\n", | |
| " <td>考古文博学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>34</th>\n", | |
| " <td>吴添</td>\n", | |
| " <td>男</td>\n", | |
| " <td>经济学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>35</th>\n", | |
| " <td>钟艳琦</td>\n", | |
| " <td>女</td>\n", | |
| " <td>光华管理学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>36</th>\n", | |
| " <td>于灏轩</td>\n", | |
| " <td>男</td>\n", | |
| " <td>社会学系</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>37</th>\n", | |
| " <td>宋汶航</td>\n", | |
| " <td>男</td>\n", | |
| " <td>元培学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>38</th>\n", | |
| " <td>乐江立</td>\n", | |
| " <td>男</td>\n", | |
| " <td>元培学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>39</th>\n", | |
| " <td>孙世元</td>\n", | |
| " <td>男</td>\n", | |
| " <td>元培学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>40</th>\n", | |
| " <td>叶风灿</td>\n", | |
| " <td>男</td>\n", | |
| " <td>元培学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>41</th>\n", | |
| " <td>鄭曉琳</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>42</th>\n", | |
| " <td>周心宁</td>\n", | |
| " <td>女</td>\n", | |
| " <td>外国语学院</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>43</th>\n", | |
| " <td>刘俊智</td>\n", | |
| " <td>男</td>\n", | |
| " <td>医学部教学办</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>44</th>\n", | |
| " <td>邢一泓</td>\n", | |
| " <td>男</td>\n", | |
| " <td>医学部教学办</td>\n", | |
| " <td>2019</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>45</th>\n", | |
| " <td>罗京</td>\n", | |
| " <td>男</td>\n", | |
| " <td>其它大学</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>46</th>\n", | |
| " <td>郭瑞元</td>\n", | |
| " <td>女</td>\n", | |
| " <td>其它大学</td>\n", | |
| " <td>2018</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 姓名 性别 院系 在学年级\n", | |
| "0 周娜 女 心理与认知科学学院 2017\n", | |
| "1 周行健 男 数学科学学院 2017\n", | |
| "2 孙怡宁 女 哲学系 2017\n", | |
| "3 岑仕鹏 男 信息科学技术学院 2018\n", | |
| "4 朱元虎 男 中国语言文学系 2017\n", | |
| "5 严牧心 女 外国语学院 2017\n", | |
| "6 匡然 女 外国语学院 2017\n", | |
| "7 李一笑 男 数学科学学院 2018\n", | |
| "8 薛睿 男 工学院 2018\n", | |
| "9 马泽宇 男 物理学院 2018\n", | |
| "10 杨秀金 男 生命科学学院 2018\n", | |
| "11 周妍 女 历史学系 2018\n", | |
| "12 王伊姗 女 哲学系 2018\n", | |
| "13 张馨语 女 光华管理学院 2018\n", | |
| "14 谭畅 男 工学院 2018\n", | |
| "15 丁月迪 女 政府管理学院 2018\n", | |
| "16 王金晶 女 外国语学院 2019\n", | |
| "17 王韵涵 女 外国语学院 2018\n", | |
| "18 李武陶文 男 外国语学院 2018\n", | |
| "19 薄瑞 男 工学院 2019\n", | |
| "20 张润祺 女 工学院 2019\n", | |
| "21 马小凯 男 生命科学学院 2019\n", | |
| "22 汪家震 男 生命科学学院 2019\n", | |
| "23 吴军 男 生命科学学院 2019\n", | |
| "24 刘家铭 男 生命科学学院 2019\n", | |
| "25 谢奥林 男 生命科学学院 2019\n", | |
| "26 李泊宁 男 生命科学学院 2019\n", | |
| "27 侯东林 男 信息科学技术学院 2019\n", | |
| "28 赵海波 男 信息科学技术学院 2019\n", | |
| "29 陈福康 男 信息科学技术学院 2019\n", | |
| "30 贺伟桓 男 城市与环境学院 2019\n", | |
| "31 刘泽熙 男 心理与认知科学学院 2019\n", | |
| "32 邱春水 男 历史学系 2019\n", | |
| "33 杜心怡 女 考古文博学院 2019\n", | |
| "34 吴添 男 经济学院 2019\n", | |
| "35 钟艳琦 女 光华管理学院 2019\n", | |
| "36 于灏轩 男 社会学系 2019\n", | |
| "37 宋汶航 男 元培学院 2019\n", | |
| "38 乐江立 男 元培学院 2019\n", | |
| "39 孙世元 男 元培学院 2019\n", | |
| "40 叶风灿 男 元培学院 2019\n", | |
| "41 鄭曉琳 女 外国语学院 2019\n", | |
| "42 周心宁 女 外国语学院 2019\n", | |
| "43 刘俊智 男 医学部教学办 2019\n", | |
| "44 邢一泓 男 医学部教学办 2019\n", | |
| "45 罗京 男 其它大学 2018\n", | |
| "46 郭瑞元 女 其它大学 2018" | |
| ] | |
| }, | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "df" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "\n", | |
| "依据公式$S=-whereum F_ilnF_i$,其中取$K_B=1$\n", | |
| "\n", | |
| "因此计算熵可采用 entropy 函数,其定义如下\n", | |
| "\n", | |
| "scipy.stats.entropy(pk, qk=None, base=None, axis=0),base 默认值是 e\n", | |
| "\n", | |
| "If only probabilities pk are given, the entropy is calculated as S = -sum(pk * log(pk), axis=axis).\n" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 按学院(校)分" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "dept_col = df.loc[:, '院系'].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "外国语学院 7\n", | |
| "生命科学学院 7\n", | |
| "元培学院 4\n", | |
| "工学院 4\n", | |
| "信息科学技术学院 4\n", | |
| "历史学系 2\n", | |
| "心理与认知科学学院 2\n", | |
| "其它大学 2\n", | |
| "光华管理学院 2\n", | |
| "医学部教学办 2\n", | |
| "数学科学学院 2\n", | |
| "哲学系 2\n", | |
| "物理学院 1\n", | |
| "考古文博学院 1\n", | |
| "政府管理学院 1\n", | |
| "经济学院 1\n", | |
| "中国语言文学系 1\n", | |
| "城市与环境学院 1\n", | |
| "社会学系 1\n", | |
| "Name: 院系, dtype: int64" | |
| ] | |
| }, | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dept_col" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<matplotlib.axes._subplots.AxesSubplot at 0x7f54d29fd850>" | |
| ] | |
| }, | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "dept_col.plot.barh()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "dept_col.p = dept_col.values / dept_col.values.sum()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "院系(校):外国语学院, 人数:7,占比:0.14893617021276595\n", | |
| "院系(校):生命科学学院, 人数:7,占比:0.14893617021276595\n", | |
| "院系(校):元培学院, 人数:4,占比:0.0851063829787234\n", | |
| "院系(校):工学院, 人数:4,占比:0.0851063829787234\n", | |
| "院系(校):信息科学技术学院, 人数:4,占比:0.0851063829787234\n", | |
| "院系(校):历史学系, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):心理与认知科学学院, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):其它大学, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):光华管理学院, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):医学部教学办, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):数学科学学院, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):哲学系, 人数:2,占比:0.0425531914893617\n", | |
| "院系(校):物理学院, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):考古文博学院, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):政府管理学院, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):经济学院, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):中国语言文学系, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):城市与环境学院, 人数:1,占比:0.02127659574468085\n", | |
| "院系(校):社会学系, 人数:1,占比:0.02127659574468085\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for i in range(len(dept_col)):\n", | |
| " print('院系(校):{}, 人数:{},占比:{}'.format(dept_col.index[i], dept_col.values[i], dept_col.p[i]))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "按院系(校)分熵:2.7100979219642647\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('按院系(校)分熵:{}'.format(entropy(dept_col)))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 按性别分" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "sex_col = df.loc[:,'性别'].value_counts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "<matplotlib.axes._subplots.AxesSubplot at 0x7f54d04ff950>" | |
| ] | |
| }, | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| }, | |
| { | |
| "data": { | |
| "image/png": "iVBORw0KGgoAAAANSUhEUgAAAW4AAAD2CAYAAAD24G0VAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjMsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+AADFEAAAJEElEQVR4nO3dXYjmZ3nH8d+VrAaNuCbsIhqDi+CRxpQQbdVS1nfMeqAiWFBbqSUInlmwaW17UC0s2KaIreiCbwgRFHzDIEaLmiCKTgSNQuNBiZW0FrRhVVrbZnt5sE90Mo7ZZ5P9zzzX5POBgXlekvu+uZJv/vt/MrvV3QFgjov2ewMAnB/hBhhGuAGGEW6AYYQbYJhDSy9w5MiRPnbs2NLLABwot99++4+6++hury0e7mPHjmVra2vpZQAOlKr6/m96za0SgGGEG2AY4QYYRrgBhhFugGGEG2AY4QYYRrgBhhFugGGEG2AY4QYYRrgBhhFugGGEG2AY4QYYRrgBhhFugGGEG2AY4QYYRrgBhhFugGGEG2AY4QYYRrgBhhFugGGEG2CYQ0svcMfdp3PshpuXXgbWdtfJE/u9BXhIXHEDDCPcAMMIN8Awwg0wjHADDCPcAMMIN8Awwg0wjHADDCPcAMMIN8Awwg0wjHADDCPcAMMIN8Awwg0wjHADDCPcAMMIN8Awwg0wjHADDHNe4a6qi6rqeVVVS20IgAd2vlfcT0/yV93dS2wGgHM733C/LMn7l9gIAOs5dK43VNU1SW5Mcm+Sq5PcWVV/mLPRvyjJ/yf5y+7+ypIbBeCsc4a7u7+Z5HhVXZnkvd19XZJU1VuSnO7u9y68RwC2OZ9bJW9M8g/bHl+X5JO7vbGqrq+qraraOvNfpx/K/gDY4ZxX3Nvck+SGqnpakn9O8i/d/R+7vbG7TyU5lSSXPOGpPsgEuIDWvuLu7r9N8vzVX/OJJJdU1RVLbQyA3a11xV1VlyS5JskrkvzW6uvKJB+vqq8k+fPu/vliuwTgl9a9VfK2nL3S/lR3v2X13Heq6pYkrxZtgL2zVri3xXrn82eS3HRBdwTAA/J7lQAMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMs9af8v5QXHXF4WydPLH0MgAPG664AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYY5tPQCd9x9OsduuHnpZQA2yl0nTyz293bFDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwxxa501V9Z4kz0xyT5InJLkkyV1JLkvyje5+41IbBOD+1gp3kp8n+ZPu/lJVvT7Jk7r77VV1PMmrltocAL9u3XAnyd9V1S+vuFfRvizJV5fYGAC7Wzfch5K8L8k3k5xIcjTJB5Nck+TpO99cVdcnuT5JLn7s0QuxTwBW1v1w8h+TfC/JY5L8dpJbV99/L8m7dr65u09197Xdfe3Fjz58ofYKQNa44q6qw0lOJTmzeuraJJcm+b/V4/9O8tJFdgfArzlnuLv7dFU9r7vvrarLk9zc3c+97/Wq+u6iOwTgfta6VbKK9pVJbkpy433PV9UVSf5tob0BsIt1bpVcmeQjSX6c5B3d/U+r548neWeSv19ygwDc3zq3Sn5QVS/o7v/Z8dJtSZ7Z3f+7zNYA2M1a/zvgLtFOd5/Jrz6wBGCP+L1KAIYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGEa4AYYRboBhhBtgGOEGGGatPyz4objqisPZOnli6WUAHjZccQMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAwwg3wDDCDTCMcAMMI9wAw1R3L7tA1U+T3LnoInvrSJIf7fcmLhBn2VwH6TwH6SzJ3p3nyd19dLcXDu3B4nd297V7sM6eqKqtg3IeZ9lcB+k8B+ksyWacx60SgGGEG2CYvQj3qT1YYy8dpPM4y+Y6SOc5SGdJNuA8i384CcCF5VYJwDDCDfAAquryqnpRVR3Z773cZ9FwV9X7quqrVfUXS66ztKo6VFX/WlVfWn1dtd97erCq6vFVddu2x2NntP0sk2dUVYer6rNVdUtVfaKqHjl8LrudZ+psLkvymSTPSvLFqjq6CbNZLNxV9cokF3f3s5M8paqeutRae+AZST7S3cdXX3fs94YejNU/hB9Kcunq8dgZ7TxLZs/oNUlu7O4XJ/lhkt/P0Lms7DzPDZk7m2ckeXN3/02SzyV5fjZgNktecR9P8tHV97ck+d0F11ra7yR5WVV9ffVf2734waUlnEny6iQ/WT0+nrkz2nmWsTPq7nd39+dXD48meW3mzmW389ybubP5cnd/rap+L2evul+SDZjNkuG+NMndq+//M8njF1xrad9I8sLuflaSRyS5bp/386B090+6+/S2p8bOaJezjJ9RVT07yWVJfpChc9lu23k+n8GzqarK2YuEe5J0NmA2S4b7Z0ketfr+MQuvtbRvd/e/r77fSjLtl66/iRltiKq6PMm7kvxRDsBcdpxn9Gz6rDcl+XaS52QDZrPkorfnV7+MuDrJXQuutbQPV9XVVXVxkpcn+dZ+b+gCMaMNUFWPTPKxJH/W3d/P8Lnscp7Js/nTqvqD1cPHJTmZDZjNkveaPpnktqp6YpKX5uw9yKn+OslNSSrJp7v7C/u8nwvFjDbDG5Jck+StVfXWJB9I8rrBc9l5ni8m+XBmzuZUko9W1R8n+U7O/jtz637PZtGfnFx98v+iJLd29w8XW4gHzYw2k7lsrk2YjR95Bxhm3IceAA93wg0wjHADDCPcAMMIN8AwvwBFlBR5I2o/kQAAAABJRU5ErkJggg==\n", | |
| "text/plain": [ | |
| "<Figure size 432x288 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": { | |
| "needs_background": "light" | |
| }, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "sex_col.plot.barh()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "sex_col.p = sex_col.values/sex_col.values.sum()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "性别:男, 人数:31,占比:0.6595744680851063\n", | |
| "性别:女, 人数:16,占比:0.3404255319148936\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "for i in range(len(sex_col)):\n", | |
| " print('性别:{}, 人数:{},占比:{}'.format(sex_col.index[i], sex_col.values[i], sex_col.p[i]))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "性别分的熵:0.641317327350994\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('性别分的熵:{}'.format(entropy(sex_col)))" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# 按学院(校),性别,年级三个参数分" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "mix = df.groupby(['院系','性别','在学年级']).count()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div>\n", | |
| "<style scoped>\n", | |
| " .dataframe tbody tr th:only-of-type {\n", | |
| " vertical-align: middle;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe tbody tr th {\n", | |
| " vertical-align: top;\n", | |
| " }\n", | |
| "\n", | |
| " .dataframe thead th {\n", | |
| " text-align: right;\n", | |
| " }\n", | |
| "</style>\n", | |
| "<table border=\"1\" class=\"dataframe\">\n", | |
| " <thead>\n", | |
| " <tr style=\"text-align: right;\">\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th></th>\n", | |
| " <th>姓名</th>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>院系</th>\n", | |
| " <th>性别</th>\n", | |
| " <th>在学年级</th>\n", | |
| " <th></th>\n", | |
| " </tr>\n", | |
| " </thead>\n", | |
| " <tbody>\n", | |
| " <tr>\n", | |
| " <th>中国语言文学系</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2017</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">信息科学技术学院</th>\n", | |
| " <th rowspan=\"2\" valign=\"top\">男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2019</th>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>元培学院</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>4</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">光华管理学院</th>\n", | |
| " <th rowspan=\"2\" valign=\"top\">女</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">其它大学</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>医学部教学办</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">历史学系</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">哲学系</th>\n", | |
| " <th rowspan=\"2\" valign=\"top\">女</th>\n", | |
| " <th>2017</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>城市与环境学院</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"4\" valign=\"top\">外国语学院</th>\n", | |
| " <th rowspan=\"3\" valign=\"top\">女</th>\n", | |
| " <th>2017</th>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2019</th>\n", | |
| " <td>3</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"3\" valign=\"top\">工学院</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>2</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">心理与认知科学学院</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2017</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>政府管理学院</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">数学科学学院</th>\n", | |
| " <th rowspan=\"2\" valign=\"top\">男</th>\n", | |
| " <th>2017</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>物理学院</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th rowspan=\"2\" valign=\"top\">生命科学学院</th>\n", | |
| " <th rowspan=\"2\" valign=\"top\">男</th>\n", | |
| " <th>2018</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>2019</th>\n", | |
| " <td>6</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>社会学系</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>经济学院</th>\n", | |
| " <th>男</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " <tr>\n", | |
| " <th>考古文博学院</th>\n", | |
| " <th>女</th>\n", | |
| " <th>2019</th>\n", | |
| " <td>1</td>\n", | |
| " </tr>\n", | |
| " </tbody>\n", | |
| "</table>\n", | |
| "</div>" | |
| ], | |
| "text/plain": [ | |
| " 姓名\n", | |
| "院系 性别 在学年级 \n", | |
| "中国语言文学系 男 2017 1\n", | |
| "信息科学技术学院 男 2018 1\n", | |
| " 2019 3\n", | |
| "元培学院 男 2019 4\n", | |
| "光华管理学院 女 2018 1\n", | |
| " 2019 1\n", | |
| "其它大学 女 2018 1\n", | |
| " 男 2018 1\n", | |
| "医学部教学办 男 2019 2\n", | |
| "历史学系 女 2018 1\n", | |
| " 男 2019 1\n", | |
| "哲学系 女 2017 1\n", | |
| " 2018 1\n", | |
| "城市与环境学院 男 2019 1\n", | |
| "外国语学院 女 2017 2\n", | |
| " 2018 1\n", | |
| " 2019 3\n", | |
| " 男 2018 1\n", | |
| "工学院 女 2019 1\n", | |
| " 男 2018 2\n", | |
| " 2019 1\n", | |
| "心理与认知科学学院 女 2017 1\n", | |
| " 男 2019 1\n", | |
| "政府管理学院 女 2018 1\n", | |
| "数学科学学院 男 2017 1\n", | |
| " 2018 1\n", | |
| "物理学院 男 2018 1\n", | |
| "生命科学学院 男 2018 1\n", | |
| " 2019 6\n", | |
| "社会学系 男 2019 1\n", | |
| "经济学院 男 2019 1\n", | |
| "考古文博学院 女 2019 1" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "mix" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 21, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "按学院(校),性别,年级三个参数分的熵:[3.2746946]\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "print('按学院(校),性别,年级三个参数分的熵:{}'.format(entropy(mix)))" | |
| ] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": "Python 3", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.7.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 4 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment