wenfahu · November 17, 2017 14:05
diff --git a/skyline.py b/skyline.py
 from collections import defaultdict
 import numpy as np
 import pdb
 from IPython import embed 


 sky = defaultdict(list)

 def find_tuple(pts, tgts):
    idx = []
    for t in tgts:
        idx.append(np.where((pts == t).all(axis=1))[0])
    return idx

 def input_pruning(pts, k):
    '''
    remove the k-dominated points, input preprocessing 

    pts: n x m ndarray, n points each has m attributes
    k: k value in k group skyline
    '''
    pairwise = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
    num_of_dominance = np.sum(pairwise, axis=1)
    return pts[num_of_dominance < k]

 def is_pareto_efficient(costs):
    '''
    the indices of skyline points
    costs: n x m ndarray
        n : number of input points
        m : number of attributes per point
    '''
    is_efficient = np.ones(costs.shape[0], dtype = bool)
    for i, c in enumerate(costs):
        if is_efficient[i]:
            is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1)  # Remove dominated points
    return is_efficient

 def pareto_frontier(pts):
    unit_g = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
    dominated = np.any(unit_g, axis=1)
    return np.logical_not(dominated)

 def compute_skyline(groups):
    '''
    groups: g x k x m ndarray, g specifies the number of candidate k-skyline groups,
        k is the k value of k-skyline, m is the number of attributes of each point

    return the k-skyline groups
    '''
    # aggregate the groups using SUM function 
    # aggregated_groups is g x m ndarray
    aggregated_groups = np.sum(groups, axis=1)

    return groups[pareto_frontier(aggregated_groups)]

 def join_groups(*args):
    '''
    group union of 2d array a and b
    '''
    return np.unique(np.vstack(args) , axis=0)

 # sky is global dict that serves as the table for dynamic programming algorithm
 # for computing k-skyline group.  the key of sky is a tuple (k, n) where k is 
 # the key value of k-skyline, and n is number of points. the value of sky is the
 # corresponding k-skyline in n points

 def sky_group(pts, k, n):
    '''
    recursive method to get the k-skyline in the n points
    returns a p x k x m ndarray,
        p : number of groups
        k : k-skyline
        m : number of attributes of each point
    '''
    # S2 : g x k x m matrix , sky^(n-1)_(k-1) union t_n
    # S1 : g x k x m matrix , sky^(n-1)_k 
    m = pts.shape[-1]
    tail_pt = pts[n-1]
    if np.any(sky[(k, n)]):
        return sky[(k, n)]
    if k==1:
        S2 = np.reshape(tail_pt, (1, k, m))
    else:
        S2 = np.empty([0, k, m])
        pre_k_sky = sky_group(pts, k-1, n-1)
        for g in pre_k_sky:
            # g is a single k-skyline of shape K x M
            candidate_group = join_groups(g, tail_pt[np.newaxis, :])
            S2 = join_groups(S2, candidate_group[np.newaxis, :])
    if k < n:
        S1 = sky_group(pts, k, n-1)
    else:
        S1 = np.empty([0, k, m])
    candidates = join_groups(S1, S2)
    # candidates is the group of candidates of k-skyline of shape GxKxM
    res = compute_skyline(candidates)
    sky[(k, n)] = res
    return res


 if __name__ == "__main__":
    # pts = np.random.randn(10 ,3)
    pts = np.array([ [3, 3], [0, 4], [4, 1], [2, 3],
                    [2, 2], [2, 1] ])
    pts_processed = input_pruning(pts, 3)
    pdb.set_trace()
    solution = sky_group(pts_processed, 3, pts_processed.shape[0]) 
    embed()
    print(solution)
	from collections import defaultdict
	import numpy as np
	import pdb
	from IPython import embed


	sky = defaultdict(list)

	def find_tuple(pts, tgts):
	idx = []
	for t in tgts:
	idx.append(np.where((pts == t).all(axis=1))[0])
	return idx

	def input_pruning(pts, k):
	'''
	remove the k-dominated points, input preprocessing

	pts: n x m ndarray, n points each has m attributes
	k: k value in k group skyline
	'''
	pairwise = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
	num_of_dominance = np.sum(pairwise, axis=1)
	return pts[num_of_dominance < k]

	def is_pareto_efficient(costs):
	'''
	the indices of skyline points
	costs: n x m ndarray
	n : number of input points
	m : number of attributes per point
	'''
	is_efficient = np.ones(costs.shape[0], dtype = bool)
	for i, c in enumerate(costs):
	if is_efficient[i]:
	is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points
	return is_efficient

	def pareto_frontier(pts):
	unit_g = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1))
	dominated = np.any(unit_g, axis=1)
	return np.logical_not(dominated)

	def compute_skyline(groups):
	'''
	groups: g x k x m ndarray, g specifies the number of candidate k-skyline groups,
	k is the k value of k-skyline, m is the number of attributes of each point

	return the k-skyline groups
	'''
	# aggregate the groups using SUM function
	# aggregated_groups is g x m ndarray
	aggregated_groups = np.sum(groups, axis=1)

	return groups[pareto_frontier(aggregated_groups)]

	def join_groups(*args):
	'''
	group union of 2d array a and b
	'''
	return np.unique(np.vstack(args) , axis=0)

	# sky is global dict that serves as the table for dynamic programming algorithm
	# for computing k-skyline group. the key of sky is a tuple (k, n) where k is
	# the key value of k-skyline, and n is number of points. the value of sky is the
	# corresponding k-skyline in n points

	def sky_group(pts, k, n):
	'''
	recursive method to get the k-skyline in the n points
	returns a p x k x m ndarray,
	p : number of groups
	k : k-skyline
	m : number of attributes of each point
	'''
	# S2 : g x k x m matrix , sky^(n-1)_(k-1) union t_n
	# S1 : g x k x m matrix , sky^(n-1)_k
	m = pts.shape[-1]
	tail_pt = pts[n-1]
	if np.any(sky[(k, n)]):
	return sky[(k, n)]
	if k==1:
	S2 = np.reshape(tail_pt, (1, k, m))
	else:
	S2 = np.empty([0, k, m])
	pre_k_sky = sky_group(pts, k-1, n-1)
	for g in pre_k_sky:
	# g is a single k-skyline of shape K x M
	candidate_group = join_groups(g, tail_pt[np.newaxis, :])
	S2 = join_groups(S2, candidate_group[np.newaxis, :])
	if k < n:
	S1 = sky_group(pts, k, n-1)
	else:
	S1 = np.empty([0, k, m])
	candidates = join_groups(S1, S2)
	# candidates is the group of candidates of k-skyline of shape GxKxM
	res = compute_skyline(candidates)
	sky[(k, n)] = res
	return res


	if __name__ == "__main__":
	# pts = np.random.randn(10 ,3)
	pts = np.array([ [3, 3], [0, 4], [4, 1], [2, 3],
	[2, 2], [2, 1] ])
	pts_processed = input_pruning(pts, 3)
	pdb.set_trace()
	solution = sky_group(pts_processed, 3, pts_processed.shape[0])
	embed()
	print(solution)