Syafiq Kamarul Azman syaffers

Machine learning, random code etc.

syaffers / simple_extract.py

Last active March 21, 2020 16:48

Looped window extraction from numpy matrix

	def extract_windows(array, clearing_time_index, max_time, sub_window_size):
	examples = []
	start = clearing_time_index + 1 - sub_window_size + 1

	for i in range(max_time+1):
	example = array[start+i:start+sub_window_size+i]
	examples.append(np.expand_dims(example, 0))

	return np.vstack(examples)

syaffers / vectorized_extract.py

Last active June 18, 2020 21:04

Vectorized version of sliding window extraction

	def extract_windows_vectorized(array, clearing_time_index, max_time, sub_window_size):
	start = clearing_time_index + 1 - sub_window_size + 1

	sub_windows = (
	start +
	# expand_dims are used to convert a 1D array to 2D array.
	np.expand_dims(np.arange(sub_window_size), 0) +
	np.expand_dims(np.arange(max_time + 1), 0).T
	)

syaffers / vectorized_stride.py

Last active June 17, 2020 16:57

Vectorized striding window extraction

	def vectorized_stride_v1(array, clearing_time_index, max_time, sub_window_size,
	stride_size):
	start = clearing_time_index + 1 - sub_window_size + 1

	sub_windows = (
	start +
	np.expand_dims(np.arange(sub_window_size), 0) +
	np.expand_dims(np.arange(max_time + 1), 0).T
	)

syaffers / downsampling_windows.py

Last active June 18, 2020 12:18

Downsampled sliding window extractor

	def extract_windows_downsampling(array, clearing_time_index, max_time_steps,
	sub_window_size, downsampling_ratio):
	start = clearing_time_index + 1 - (sub_window_size - 1) * downsampling_ratio

	K_indices = np.arange(0, sub_window_size * downsampling_ratio,
	step=downsampling_ratio)
	T_indices = np.arange(0, (max_time_steps + 1) * downsampling_ratio,
	step=downsampling_ratio)
	sub_windows = np.round(
	start +

syaffers / load_and_plot.py

Last active June 18, 2020 13:16

Reading and plotting an example data from the 9 bus system data

	import matplotlib.pyplot as plt
	import numpy as np
	import pandas as pd

	meta = pd.read_csv('metadata.csv', index_col='ID')

	n = 693
	data = np.load(meta.loc[n, 'Filename'])
	clearing_time = meta.loc[n, 'Clearing Time']

syaffers / vcf_to_numpy.py

Last active September 10, 2020 08:15

Converts a phased VCF file to NumPy dataset

	import io
	import subprocess
	from sklearn.model_selection import train_test_split
	import numpy as np
	import tqdm
	import vcf


	# List of coordinates for the MHC gene.
	coordinates = '''

syaffers / hla_dataset.py

Created September 10, 2020 08:56

HLA variants genotype data generator

	import numpy as np
	import matplotlib.pyplot as plt

	# Load samples.
	samples = np.load('data/hg19_HLA_train.npz')['data']

	# Probability of randomly deleting genotypes.
	noise_prob = 0.05

	# Some variants are multiallellic which means that the genotype numbers

syaffers / reduced_palette.py

Created December 4, 2020 09:18

K-means reduced palette class

	class KMeansReducedPalette:
	def __init__(self, num_colors):
	self.num_colors = num_colors
	# Random state for reproducibility.
	self.kmeans = KMeans(num_colors, random_state=0xfee1600d)
	self.source_pixels = None

	def _preprocess(self, image):
	assert image.shape[-1] == 3, 'image must have exactly 3 color channels'
	assert image.dtype == 'uint8', 'image must be in np.uint8 type'

syaffers / reduced_palette.py

Created December 4, 2020 09:41

K-means clustering on unique colors

	class UniqueKMeansReducedPalette(KMeansReducedPalette):
	def __init__(self, num_colors):
	super().__init__(num_colors)

	def fit(self, image):
	image_cpy = image.copy()
	pixels = self._preprocess(image_cpy)
	super().fit(np.unique(pixels, axis=0))

syaffers / reduced_palette.py

Created December 4, 2020 09:52

Random walk recolor method of the reduced palette class

	class KMeansReducedPalette:
	# ... omitted

	def random_walk_recolor(self, image, max_steps):
	original_shape = image.shape
	image = self._preprocess(image)
	centroids = self.kmeans.predict(image)
	start = np.round(self.kmeans.cluster_centers_[centroids])

	diff = np.zeros(image.shape)