Skip to content

Instantly share code, notes, and snippets.

@matsuken92
Last active August 29, 2015 14:15
Show Gist options
  • Save matsuken92/156f6a16bddf6f687589 to your computer and use it in GitHub Desktop.
Save matsuken92/156f6a16bddf6f687589 to your computer and use it in GitHub Desktop.
手書き数字をpythonでもてあそぶ その1 ref: http://qiita.com/kenmatsu4/items/79905dadb07b69f182a1
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
size = 28
raw_data= np.loadtxt('train_small.csv',delimiter=',',skiprows=1)
digit_data = []
for i in range(len(raw_data)):
digit_data.append((raw_data[i,0],raw_data[i,1:785]))
digit_data.sort(key=lambda x: x[0]) # sort array by label
# draw digit images
plt.figure(figsize=(15, 15))
for i in range(len(digit_data)):
X, Y = np.meshgrid(range(size),range(size))
Z = digit_data[i][1].reshape(size,size) # convert from vector to 28x28 matrix
Z = Z[::-1,:] # flip vertical
plt.subplot(10, 20, i+1) # layout 200 cells
plt.xlim(0,27)
plt.ylim(0,27)
plt.pcolor(X, Y, Z)
plt.flag()
plt.gray()
plt.tick_params(labelbottom="off")
plt.tick_params(labelleft="off")
plt.show()
data_mat = []
# convert list to ndarray
for i in range(len(digit_data)):
label = digit_data[i][0]
data_mat.append(digit_data[i][1])
A = np.array(data_mat)
Z = np.corrcoef(A) # generate correlation matrix
area_size = len(digit_data)
X, Y = np.meshgrid(range(area_size),range(area_size))
plt.clf()
plt.figure(figsize=(10, 10))
plt.xlim(0,area_size-1)
plt.ylim(0,area_size-1)
plt.title("Correlation matrix of digit charcter vector. (corr>0.5)")
thresh = .5
Z1 = Z.copy()
Z1[Z1 > thresh] = 1
Z1[Z1 <= thresh] = 0
plt.pcolor(X, Y, Z1, cmap=cm.get_cmap('Blues'),alpha=0.6)
plt.xticks([(i * 20) for i in range(10)],range(10))
plt.yticks([(i * 20) for i in range(10)],range(10))
plt.grid(color='deeppink',linestyle='--')
plt.show()
summary_Z = np.zeros(100).reshape(10,10)
for i in range(10):
for j in range(10):
i1 = i * 20
j1 = j * 20
#print "[%d:%d,%d:%d]" % (i1,i1+20,j1,j1+20)
if i==j:
# 対角成分は1に決まっているので、値が上ぶれするのを避けるため除いて平均をとる
summary_Z[i,j] = (np.sum(Z[i1:i1+20,j1:j1+20])-20)/380
else:
summary_Z[i,j] = np.sum(Z[i1:i1+20,j1:j1+20])/400
# average of each digit's grid
plt.clf()
plt.figure(figsize=(10, 10))
plt.xlim(0,10)
plt.ylim(0,10)
sX, sY = np.meshgrid(range(11),range(11))
plt.title("Correlation matrix of summuation of each digit's cell")
plt.xticks(range(10),range(10))
plt.yticks(range(10),range(10))
plt.pcolor(sX, sY, summary_Z, cmap=cm.get_cmap('Blues'),alpha=0.6)
plt.show()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment