Jackzmc · November 26, 2024 16:32
diff --git a/MNISTGRNN_multi.py b/MNISTGRNN_multi.py
 import numpy as np
 import pandas as pd
 import matplotlib.pyplot as plt
 import pickle as pk
 import multiprocessing
 import time

 MemoryValue = 0.55

 start_time = time.time()
 reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
 transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
 imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
 print(time.time() - start_time)
 print(imageData.head())

 print(transcriptomicsDF.head())
 # exit()
 GeneLegend = list(reservoir_weightsDF.columns)

 WeightMatrix = reservoir_weightsDF.values
 # print(GeneLegend)

 TranscriptomicData = transcriptomicsDF[GeneLegend]

 InitialMemory = list(TranscriptomicData.iloc[0])
 print("InitialMemory", InitialMemory)



 def getMaxExpression(TranscriptomicDF, GeneID):
    return TranscriptomicDF[GeneID].max()

 def worker(a, b, result, row):
    n = len(b[0])
    for j in range(n):
        result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

 def worker_multi(a, b, result, start_index, end_index):
    n = len(b[0])
    for row in range(start_index, end_index):
        for j in range(n):
            result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

 def rc_output(rc_weights, input_array):
    memory = np.maximum(rc_weights.dot(input_array), 0)
    return memory

 NUM_PROCESSES=4

 def split(a, n):
    k, m = divmod(len(a), n)
    return (a[i*k+min(i, m):(i+1)*k+min(i+1, m)] for i in range(n))

 def parallel_rc_output(rc_weights, input_array):
    a = rc_weights
    b = input_array
    #reservoir_weights[:len(expanded_inputs), :]
    rows = len(a)
    print(b[0])
    result = multiprocessing.Array('d', rows * len(b[0]))
    result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))    
    processes = []

    # num_rows_per_proc = ceil(rows / NUM_PROCESSES)
    # index = 0
    # for i in range(NUM_PROCESSES):
    #     start_index = index
    #     end_index = max(index + num_rows_per_proc, rows)
    #     index = end_index + 1

    #     p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
    #     processes.append(p)
    #     p.start()   
    for i in range(rows):
    # only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
    # merge reults
        p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
        processes.append(p)
        p.start()   
    for p in processes:
        p.join()    
    return np.maximum(result_np, 0)

 def input_padding(input_geneID, input_value, GeneLegend, initMem):
    input_matrix = np.array(initMem, dtype=float)

    for ig in range(0, len(input_geneID)):
        input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])


    return input_matrix


 #pick 15 random genes from GeneLegend
 InputGenes = np.random.choice(GeneLegend, 15, replace=False)
 print("InputGene", InputGenes)
 # exit()

 output_matrix = []
 for i in range(0, 1):
 # for i in range(0, len(imageData)):

    imgTemp = imageData.iloc[i].values
    # normalize the image
    # imgTemp = imgTemp / 255
    imgDims = np.sqrt(len(imgTemp)).astype(int)
    img = imgTemp.reshape(imgDims, imgDims)
    print(img)

    for j in range(0, imgDims):
        valueArray = img[:,j]
        print(valueArray)

        if j == 0:
            input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
            Output = parallel_rc_output(WeightMatrix, input_array)
        else:
            input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
            Output = parallel_rc_output(WeightMatrix, input_array)
    output_matrix.append(Output)

 with open('ImageClassification/output_matrix.pkl', 'wb') as f:
    pk.dump(output_matrix, f)

 #to csv
 #output_matrixDF = pd.DataFrame(output_matrix)

 #add column names
 #output_matrixDF.columns = GeneLegend
 #print(output_matrixDF.head())
 #output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')
	import numpy as np
	import pandas as pd
	import matplotlib.pyplot as plt
	import pickle as pk
	import multiprocessing
	import time

	MemoryValue = 0.55

	start_time = time.time()
	reservoir_weightsDF = pd.read_csv('WeightMatrix.csv', header=0, index_col=0)
	transcriptomicsDF = pd.read_csv('InterpolateData.csv', header=0, index_col=0)
	imageData = pd.read_csv('ImageClassification/ResizedTrainImage.csv', header=None, index_col=0)
	print(time.time() - start_time)
	print(imageData.head())

	print(transcriptomicsDF.head())
	# exit()
	GeneLegend = list(reservoir_weightsDF.columns)

	WeightMatrix = reservoir_weightsDF.values
	# print(GeneLegend)

	TranscriptomicData = transcriptomicsDF[GeneLegend]

	InitialMemory = list(TranscriptomicData.iloc[0])
	print("InitialMemory", InitialMemory)



	def getMaxExpression(TranscriptomicDF, GeneID):
	return TranscriptomicDF[GeneID].max()

	def worker(a, b, result, row):
	n = len(b[0])
	for j in range(n):
	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

	def worker_multi(a, b, result, start_index, end_index):
	n = len(b[0])
	for row in range(start_index, end_index):
	for j in range(n):
	result[row][j] = sum(a[row][k] * b[k][j] for k in range(len(b)))

	def rc_output(rc_weights, input_array):
	memory = np.maximum(rc_weights.dot(input_array), 0)
	return memory

	NUM_PROCESSES=4

	def split(a, n):
	k, m = divmod(len(a), n)
	return (a[ik+min(i, m):(i+1)k+min(i+1, m)] for i in range(n))

	def parallel_rc_output(rc_weights, input_array):
	a = rc_weights
	b = input_array
	#reservoir_weights[:len(expanded_inputs), :]
	rows = len(a)
	print(b[0])
	result = multiprocessing.Array('d', rows * len(b[0]))
	result_np = np.frombuffer(result.get_obj()).reshape((rows, len(b[0])))
	processes = []

	# num_rows_per_proc = ceil(rows / NUM_PROCESSES)
	# index = 0
	# for i in range(NUM_PROCESSES):
	# start_index = index
	# end_index = max(index + num_rows_per_proc, rows)
	# index = end_index + 1

	# p = multiprocessing.Process(target=worker, args=(a, b, result_np, start_index, end_index))
	# processes.append(p)
	# p.start()
	for i in range(rows):
	# only run NUM_PROCESSES, tell worker array of rows=[1,2,3,...]
	# merge reults
	p = multiprocessing.Process(target=worker, args=(a, b, result_np, i))
	processes.append(p)
	p.start()
	for p in processes:
	p.join()
	return np.maximum(result_np, 0)

	def input_padding(input_geneID, input_value, GeneLegend, initMem):
	input_matrix = np.array(initMem, dtype=float)

	for ig in range(0, len(input_geneID)):
	input_matrix[GeneLegend.index(input_geneID[ig])] = input_value[ig]*getMaxExpression(TranscriptomicData, input_geneID[ig])


	return input_matrix


	#pick 15 random genes from GeneLegend
	InputGenes = np.random.choice(GeneLegend, 15, replace=False)
	print("InputGene", InputGenes)
	# exit()

	output_matrix = []
	for i in range(0, 1):
	# for i in range(0, len(imageData)):

	imgTemp = imageData.iloc[i].values
	# normalize the image
	# imgTemp = imgTemp / 255
	imgDims = np.sqrt(len(imgTemp)).astype(int)
	img = imgTemp.reshape(imgDims, imgDims)
	print(img)

	for j in range(0, imgDims):
	valueArray = img[:,j]
	print(valueArray)

	if j == 0:
	input_array = input_padding(InputGenes, valueArray, GeneLegend, InitialMemory)
	Output = parallel_rc_output(WeightMatrix, input_array)
	else:
	input_array = input_padding(InputGenes, valueArray, GeneLegend, Output*MemoryValue)
	Output = parallel_rc_output(WeightMatrix, input_array)
	output_matrix.append(Output)

	with open('ImageClassification/output_matrix.pkl', 'wb') as f:
	pk.dump(output_matrix, f)

	#to csv
	#output_matrixDF = pd.DataFrame(output_matrix)

	#add column names
	#output_matrixDF.columns = GeneLegend
	#print(output_matrixDF.head())
	#output_matrixDF.to_csv('MNISTData/test' + '1' + '.csv')