Skip to content

Instantly share code, notes, and snippets.

@masatomix
Last active December 15, 2018 12:20
Show Gist options
  • Select an option

  • Save masatomix/a52da25eac533f8718a591be7d420024 to your computer and use it in GitHub Desktop.

Select an option

Save masatomix/a52da25eac533f8718a591be7d420024 to your computer and use it in GitHub Desktop.
Qiitaで数式がつかえるみたいなので三角関数とか内積とか相関係数について書いて、Pythonで計算してみた ref: https://qiita.com/masatomix/items/278a233c39b4e82d0bad
\left(
\begin{array}{c}
\cos\theta\\
\sin\theta
\end{array}
\right)
\boldsymbol{a}\cdot\boldsymbol{b} := \|\boldsymbol{a}\|\|\boldsymbol{b}\|\ \cos\theta
\begin{align}
\cos\theta &= \frac{\boldsymbol{a}\cdot\boldsymbol{b}}{\|\boldsymbol{a}\|\|\boldsymbol{b}\|}\\
&= \frac{\sum_{i=1}^{n} a_ib_i}{\sqrt{\sum_{i=1}^{n} a_i^2} \cdot \sqrt{\sum_{i=1}^{n} b_i^2}}
\end{align}
\boldsymbol{x} =
\left(
\begin{array}{c}
x_1 -m_x\\
x_2 -m_x\\
\vdots\\
x_n -m_x\\
\end{array}
\right)
,
\boldsymbol{y} =
\left(
\begin{array}{c}
y_1 -m_y\\
y_2 -m_y\\
\vdots\\
y_n -m_y\\
\end{array}
\right)
\begin{align}
r_{xy} :&= \frac{\boldsymbol{x}\cdot\boldsymbol{y}}{\|\boldsymbol{x}\|\|\boldsymbol{y}\|}\\
&=\frac{\sum_{i=1}^{n} (x_i -m_x)(y_i -m_y)}{\sqrt{\sum_{i=1}^{n} (x_i -m_x)^2} \cdot \sqrt{\sum_{i=1}^{n} (y_i -m_y)^2}}\\
&= (\boldsymbol{x}と\boldsymbol{y}のなす角\theta の \cos\theta )
\end{align}
m_x:= \frac{1}{n} \sum_{i=1}^{n}x_i \\
m_y:= \frac{1}{n} \sum_{i=1}^{n}y_i
\begin{align}
r_{xy}
&=\frac{\sum_{i=1}^{n} (x_i -m_x)(y_i -m_y)}{\sqrt{\sum_{i=1}^{n} (x_i -m_x)^2} \cdot \sqrt{\sum_{i=1}^{n} (y_i -m_y)^2}}\\
&=\frac{\frac{1}{n}\sum_{i=1}^{n} (x_i -m_x)(y_i -m_y)}{\sqrt{\frac{1}{n}\sum_{i=1}^{n} (x_i -m_x)^2} \cdot \sqrt{\frac{1}{n}\sum_{i=1}^{n} (y_i -m_y)^2}}\\
&=\frac{S_{xy}}{S_xS_y}
\end{align}
\begin{align}
S_{xy}&:=\frac{1}{n}\sum_{i=1}^{n} (x_i -m_x)(y_i -m_y)\\
S_x&:=\sqrt{\frac{1}{n}\sum_{i=1}^{n} (x_i -m_x)^2}\\
S_y&:=\sqrt{\frac{1}{n}\sum_{i=1}^{n} (y_i -m_y)^2}
\end{align}
$ head weight.csv
key,体重,体脂肪率,BMI
2017-12-10,58.5750,14.2425,23.4000
2017-12-11,58.6333,14.1255,23.4333
2017-12-12,58.3250,14.4700,23.3500
2017-12-13,58.4000,14.8275,23.3500
2017-12-14,57.2000,14.1450,22.9000
2017-12-15,57.9500,13.6900,23.2000
2017-12-16,58.0000,14.3400,23.2000
2017-12-17,58.6167,14.0995,23.4333
2017-12-18,58.7500,14.0150,23.4500
.....
$ sw_vers
ProductName: Mac OS X
ProductVersion: 10.14.1 (Mojaveです)
BuildVersion: 18B75
$ python3 -m venv ./venv
$ source ./venv/bin/activate
(venv) $ which python3
/xxx/venv/bin/python3
(venv) $ python3 --version
Python 3.7.1
(venv) $
(venv) $ cat requirement.txt
matplotlib==3.0.2
numpy==1.15.4
pandas==0.23.4
PyQt5==5.11.3
(venv) $ pip install -r requirement.txt
... 割愛
(venv) $ cat weight.py
\begin{align}
\boldsymbol{a}\cdot\boldsymbol{b} &= \|\boldsymbol{a}\|\|\boldsymbol{b}\|\ \cos\theta\\
&=\|\boldsymbol{a}\|\times( \|\boldsymbol{b}\|\ \cos\theta)\\
\end{align}
# !/usr/bin/env python
# -*- coding: utf-8 -*-
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sys
def main(args):
# データ読み込み
df = pd.read_csv('./weight.csv', index_col='key')
print(df.head(10))
print(df.shape)
# データ列取り出し
data1 = df['体重']
data2 = df['体脂肪率']
data3 = df['BMI']
# 平均
mean1 = np.mean(data1) # mx
mean2 = np.mean(data2) # my
# 分散と、共分散
v1 = np.var(data1, ddof=0) # 1/n * Σ (X-mx)^2
v2 = np.var(data2, ddof=0) # 1/n * Σ (Y-my)^2
cov = np.cov(data1, data2, ddof=0)[1, 0] # 1/n * Σ (X-mx)*(Y-my)
# 共分散は共分散行列で返ってくるので、[1,0],[0,1] をとればいい
# 標準偏差
std_dev1 = np.sqrt(v1)
std_dev2 = np.sqrt(v2)
# 相関係数
cor = np.corrcoef(data1, data2)[1, 0]
cor2 = cov / (std_dev1 * std_dev2) # もしくは計算で。
print('--- 体重と体脂肪率 ----')
print(f'  平均: {mean1:.3f}, {mean2:.3f}')
print(f'標準偏差: {std_dev1:.3f}, {std_dev2:.3f}')
print(f'  分散: {v1:.3f}, {v2:.3f}')
print(f' 共分散: {cov:.10f}')
print(f'相関係数: {cor:.10f}')
print(f'相関係数: {cor2:.10f}')
print('-------')
# ちなみに体重とBMIの相関係数
cor3 = np.corrcoef(data1, data3)[1, 0]
print(f'体重とBMIの相関係数: {cor3:.10f}')
plot(df)
def plot(df):
xorg = df['体重']
yorg = df['BMI']
mx = np.mean(xorg)
my = np.mean(yorg)
fig = plt.figure()
ax1 = fig.add_subplot(2, 1, 1)
ax2 = fig.add_subplot(2, 1, 2)
ax1.scatter(xorg, yorg, label='体重とBMI', s=5)
ax1.scatter(mx, my, label='平均')
ax1.set_xlabel('体重')
ax1.set_ylabel('BMI')
ax1.grid(True) # グリッド線
plot_poly_1d(ax1, xorg, yorg)
ax1.legend(loc='upper left')
yorg2 = df['体脂肪率']
my2 = np.mean(yorg2)
ax2.scatter(xorg, yorg2, label='体重と体脂肪率', s=5)
ax2.scatter(mx, my2, label='平均')
ax2.set_xlabel('体重')
ax2.set_ylabel('体脂肪率')
ax2.grid(True) # グリッド線
plot_poly_1d(ax2, xorg, yorg2)
ax2.legend(loc='lower left')
plt.tight_layout() # タイトルの被りを防ぐ
# グラフに情報を表示
plt.show()
# 線形回帰直線
def plot_poly_1d(ax, xorg, yorg):
# y = ax + b のa ,b を配列で返す
poly_fit = np.polyfit(xorg, yorg, 1)
# 関数を作成
poly_1d = np.poly1d(poly_fit)
xs = np.linspace(xorg.min(), xorg.max())
ys = poly_1d(xs)
ax.plot(xs, ys, label=f'{poly_fit[1]:.2f}+{poly_fit[0]:.2f}x')
if __name__ == "__main__":
main(sys.argv)
(venv) $ python3 weight.py
体重 体脂肪率 BMI
key
2017-12-10 58.5750 14.2425 23.4000
2017-12-11 58.6333 14.1255 23.4333
2017-12-12 58.3250 14.4700 23.3500
2017-12-13 58.4000 14.8275 23.3500
2017-12-14 57.2000 14.1450 22.9000
2017-12-15 57.9500 13.6900 23.2000
2017-12-16 58.0000 14.3400 23.2000
2017-12-17 58.6167 14.0995 23.4333
2017-12-18 58.7500 14.0150 23.4500
2017-12-19 58.5875 14.2555 23.4250
(356, 3)
--- 体重と体脂肪率 ----
  平均: 59.935, 14.514
標準偏差: 0.835, 0.478
  分散: 0.698, 0.229
 共分散: -0.0193105736
相関係数: -0.0483200035
相関係数: -0.0483200035
-------
体重とBMIの相関係数: 0.9980663360
\boldsymbol{a}\cdot(\boldsymbol{b}+\boldsymbol{c}) =
\boldsymbol{a}\cdot\boldsymbol{b}+\boldsymbol{a}\cdot\boldsymbol{c}
\begin{align}
\boldsymbol{a}\cdot(\boldsymbol{b}+\boldsymbol{c}) &=\boldsymbol{a}\cdot\boldsymbol{d}\\
&=\|\boldsymbol{a}\|\times( \boldsymbol{d}を\boldsymbol{a}へ射影したときの長さ)\\
&=\|\boldsymbol{a}\|\times( \boldsymbol{b}を\boldsymbol{a}へ射影したときの長さ +\boldsymbol{c}を\boldsymbol{a}へ射影したときの長さ )\\
&=\|\boldsymbol{a}\|\times( \boldsymbol{b}を\boldsymbol{a}へ射影したときの長さ) +\|\boldsymbol{a}\|\times(\boldsymbol{c}を\boldsymbol{a}へ射影したときの長さ )\\
&=\boldsymbol{a}\cdot\boldsymbol{b}+\boldsymbol{a}\cdot\boldsymbol{c}\\
(Q.E.D.)
\end{align}
\boldsymbol{a} =
\left(
\begin{array}{c}
a_1\\
a_2
\end{array}
\right)
,
\boldsymbol{b} =
\left(
\begin{array}{c}
b_1\\
b_2
\end{array}
\right)
\boldsymbol{e_1} =
\left(
\begin{array}{c}
1\\
0
\end{array}
\right)
,
\boldsymbol{e_2} =
\left(
\begin{array}{c}
0\\
1
\end{array}
\right)
\begin{align}
\boldsymbol{a}\cdot\boldsymbol{b} &=(a_1\boldsymbol{e_1}+a_2\boldsymbol{e_2})\cdot(b_1\boldsymbol{e_1} +b_2\boldsymbol{e_2})\\
&=a_1 b_1 \boldsymbol{e_1}\cdot\boldsymbol{e_1}+a_2b_2\boldsymbol{e_2}\cdot\boldsymbol{e_2}\\
&\qquad + a_1 b_2 \boldsymbol{e_1}\cdot\boldsymbol{e_2}+a_2 b_1 \boldsymbol{e_2}\cdot\boldsymbol{e_1}\\
&= a_1b_1 + a_2b_2\\
&=\|\boldsymbol{a}\|\|\boldsymbol{b}\|\ \cos\theta
\end{align}
\boldsymbol{a} =
\left(
\begin{array}{c}
a_1\\
a_2\\
\vdots\\
a_n\\
\end{array}
\right)
,
\boldsymbol{b} =
\left(
\begin{array}{c}
b_1\\
b_2\\
\vdots\\
b_n\\
\end{array}
\right)
\begin{align}
\boldsymbol{a}\cdot\boldsymbol{b} &=\|\boldsymbol{a}\|\|\boldsymbol{b}\|\ \cos\theta\\
&= a_1b_1 + a_2b_2 ... +a_nb_n\\
&= \sum_{i=1}^{n} a_ib_i
\end{align}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment