jhumigas · June 5, 2017 19:31
diff --git a/extract_data.py b/extract_data.py
 """
 Small example of function to reorder a data set.
 """
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-

 import numpy as np

 def extract_k(labels, k):
    """Extract k percent of the dataset.

    Organise the dataset such as the first k percent of the indexes is
    s.t each class is represented by k samples of its class.

    Args:
        labels(np.ndarray): n*1 matrix
        k(int): Number between 0 and 1

    Returns:
        np.ndarray: Indices such as the 1st indices correspond to the 1st K k percent
            in each class
    """

    if not (0 <= k <=1):
        raise ValueError('k has to be between 0 and 1')
        return None
  
    cx = np.unique(labels)
    bins = np.concatenate((cx, [np.max(labels)+1]))
    hx, _ = np.histogram(labels, bins=bins)

    idx = np.array([])
    idx_ = np.array([])
    for ci in range(len(cx)):
        nx = int(hx[ci]*k)
        idx = np.concatenate((idx, np.where(labels==cx[ci])[0][:nx]))
        idx_ = np.concatenate((idx_, np.where(labels==cx[ci])[0][nx:]))
    
    idx = np.concatenate((idx, idx_))
    idx = idx.astype(int)
    return idx

 def reorder_k(X, y, k):
    """Order data in such a way the top k percent contain exactly k percent of each class.

    Args:
        X(np.ndarray): n*d matrix where each row represent a sample
        y(np.ndarray): n*1 matrix where each row represent a label
    
    Return:
        tuple: Samples and labels ordered
    """
    idx = extract_k(y, k)
    return X[idx,:], y[idx]
	"""
	Small example of function to reorder a data set.
	"""
	#!/usr/bin/env python
	# -- coding: utf-8 --

	import numpy as np

	def extract_k(labels, k):
	"""Extract k percent of the dataset.

	Organise the dataset such as the first k percent of the indexes is
	s.t each class is represented by k samples of its class.

	Args:
	labels(np.ndarray): n*1 matrix
	k(int): Number between 0 and 1

	Returns:
	np.ndarray: Indices such as the 1st indices correspond to the 1st K k percent
	in each class
	"""

	if not (0 <= k <=1):
	raise ValueError('k has to be between 0 and 1')
	return None

	cx = np.unique(labels)
	bins = np.concatenate((cx, [np.max(labels)+1]))
	hx, _ = np.histogram(labels, bins=bins)

	idx = np.array([])
	idx_ = np.array([])
	for ci in range(len(cx)):
	nx = int(hx[ci]*k)
	idx = np.concatenate((idx, np.where(labels==cx[ci])[0][:nx]))
	idx_ = np.concatenate((idx_, np.where(labels==cx[ci])[0][nx:]))

	idx = np.concatenate((idx, idx_))
	idx = idx.astype(int)
	return idx

	def reorder_k(X, y, k):
	"""Order data in such a way the top k percent contain exactly k percent of each class.

	Args:
	X(np.ndarray): n*d matrix where each row represent a sample
	y(np.ndarray): n*1 matrix where each row represent a label

	Return:
	tuple: Samples and labels ordered
	"""
	idx = extract_k(y, k)
	return X[idx,:], y[idx]