Skip to content

Instantly share code, notes, and snippets.

@nshelton
Created May 12, 2019 22:53
Show Gist options
  • Save nshelton/c709887375633f2d50aaf41fb068890a to your computer and use it in GitHub Desktop.
Save nshelton/c709887375633f2d50aaf41fb068890a to your computer and use it in GitHub Desktop.
import numpy as np
from sklearn.manifold import TSNE
import glob
import os
import sys
from numpy import genfromtxt
import matplotlib.pyplot as plt
numFilesRead = 0
data = np.zeros((144,1))
dataRows = []
dataFiles = []
def processData(fullPath) :
global data
global numFilesRead
if ( numFilesRead > 10000) :
return
featureData = genfromtxt(fullPath, delimiter=',')
dataRows.append(featureData)
pathparts = fullPath.split("/")
# dataFiles.append(pathparts[len(pathparts)-1].replace(".features",""))
dataFiles.append(fullPath)
numFilesRead += 1
for root, dirs, files in os.walk("."):
if ( numFilesRead > 10000) :
break
path = root.split(os.sep)
for fileName in files:
if fileName.endswith(".features") :
fullPath = root +"/"+ fileName
processData(fullPath)
data = np.vstack(dataRows)
print(data)
X_embedded = TSNE(n_components=2).fit_transform(data)
for i in range(len(X_embedded)) :
print(dataFiles[i] + "," + str(X_embedded[i][0]) + "," + str(X_embedded[i][1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment