Skip to content

Instantly share code, notes, and snippets.

@Jackzmc
Created November 26, 2024 16:32
Show Gist options
  • Save Jackzmc/717549762e5d7d6714c03bca593f46ad to your computer and use it in GitHub Desktop.
Save Jackzmc/717549762e5d7d6714c03bca593f46ad to your computer and use it in GitHub Desktop.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle as pk
import multiprocessing
import time
MemoryValue = 0.55
start_time = time.time()
reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
print(time.time() - start_time)
print(imageData.head())
print(transcriptomicsDF.head())
# exit()
GeneLegend = list(reservoir_weightsDF.columns)
WeightMatrix = reservoir_weightsDF.values
# print(GeneLegend)
TranscriptomicData = transcriptomicsDF[GeneLegend]
InitialMemory = list(TranscriptomicData.iloc[0])
print("InitialMemory", InitialMemory)
def getMaxExpression(TranscriptomicDF, GeneID):
return TranscriptomicDF[GeneID].max()
def worker(a, b, result, row):
n = len(b[0])
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def worker_multi(a, b, result, start_index, end_index):
n = len(b[0])
for row in range(start_index, end_index):
for j in range(n):
result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))
def rc_output(rc_weights, input_array):
memory = np.maximum(rc_weights.dot(input_array), 0)
return memory
NUM_PROCESSES=4
def split(a, n):
k, m = divmod(len(a), n)
return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))
def parallel_rc_output(rc_weights, input_array):
a = rc_weights
b = input_array
#reservoir_weights[:len(expanded_inputs), :]
rows = len(a)
print(b[0])
result = multiprocessing.Array('d', rows * len(b[0]))
result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
processes = []
# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
# index = 0
# for i in range(NUM_PROCESSES):
# start_index = index
# end_index = max(index + num_rows_per_proc, rows)
# index = end_index + 1
# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
# processes.append(p)
# p.start()
for i in range(rows):
# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
# merge reults
p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
processes.append(p)
p.start()
for p in processes:
p.join()
return np.maximum(result_np, 0)
def input_padding(input_geneID, input_value, GeneLegend, initMem):
input_matrix = np.array(initMem, dtype=float)
for ig in range(0, len(input_geneID)):
input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])
return input_matrix
#pick 15 random genes from GeneLegend
InputGenes = np.random.choice(GeneLegend, 15, replace=False)
print("InputGene", InputGenes)
# exit()
output_matrix = []
for i in range(0, 1):
# for i in range(0, len(imageData)):
imgTemp = imageData.iloc[i].values
# normalize the image
# imgTemp = imgTemp / 255
imgDims = np.sqrt(len(imgTemp)).astype(int)
img = imgTemp.reshape(imgDims, imgDims)
print(img)
for j in range(0, imgDims):
valueArray = img[:,j]
print(valueArray)
if j == 0:
input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
Output = parallel_rc_output(WeightMatrix, input_array)
else:
input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
Output = parallel_rc_output(WeightMatrix, input_array)
output_matrix.append(Output)
with open('ImageClassification/output_matrix.pkl', 'wb') as f:
pk.dump(output_matrix, f)
#to csv
#output_matrixDF = pd.DataFrame(output_matrix)
#add column names
#output_matrixDF.columns = GeneLegend
#print(output_matrixDF.head())
#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment