Created
January 1, 2017 14:10
-
-
Save jwlin/a52feae3d1d9d4032bdaf412ff0488eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import math | |
from collections import Counter | |
from matplotlib import pyplot as plt | |
def mean(x): | |
return sum(x) / len(x) | |
def de_mean(x): | |
x_bar = mean(x) | |
return [x_i - x_bar for x_i in x] | |
def variance(x): | |
deviations = de_mean(x) | |
variance_x = 0 | |
for d in deviations: | |
variance_x += d**2 | |
variance_x /= len(x) | |
return variance_x | |
def dot(x, y): | |
dot_product = sum(v_i * w_i for v_i, w_i in zip(x, y)) | |
dot_product /= (len(x)) | |
return dot_product | |
def correlation(x, y): | |
variance_x = variance(x) | |
variance_y = variance(y) | |
sd_x = math.sqrt(variance_x) | |
sd_y = math.sqrt(variance_y) | |
dot_xy = dot(de_mean(x), de_mean(y)) | |
return dot_xy/(sd_x*sd_y) | |
def decile(num): | |
return (num // 10) * 10 | |
if __name__ == '__main__': | |
''' | |
with open('example.json', 'r', encoding='utf-8') as f: | |
data_list = json.load(f) | |
images = [] | |
pushes = [] | |
for d in data_list: | |
images.append(d['num_image']) | |
pushes.append(d['push_count']) | |
''' | |
images = [3, 7, 1, 12, 9, 1, 2, 13, 0, 5, 27, 5, 1, 8, 0, 1, 14, 2, 3, 2, 1, 25, 3, 14, 27, 2] | |
pushes = [18, 20, 0, 0, 3, 6, 2, 12, 1, 13, 11, 5, 0, 20, 1, 7, 6, 2, 2, 0, 0, 32, 10, 13, 9, 2] | |
print('圖片數:', images, 'Max:', max(images), 'Min:', min(images)) | |
print('推文數:', pushes, 'Max:', max(pushes), 'Min:', min(pushes)) | |
print('平均圖片數:', mean(images), '平均推文數:', mean(pushes)) | |
print('相關係數:', correlation(images, pushes)) | |
# histogram | |
histogram = Counter(decile(push) for push in pushes) | |
print(histogram) | |
# histogram plot | |
plt.figure(1) | |
plt.bar([x-4 for x in histogram.keys()], histogram.values(), 8) | |
plt.axis([-5, 35, 0, 20]) | |
plt.title('Pushes') | |
plt.xlabel('# of pushes') | |
plt.ylabel('# of posts') | |
plt.xticks([10 * i for i in range(4)]) | |
# scattering plot | |
plt.figure(2) | |
plt.scatter(images, pushes) | |
plt.title('# of image v.s. push') | |
plt.xlabel('# of image') | |
plt.ylabel('# of push') | |
plt.axis('equal') | |
plt.show() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment