Created
November 17, 2017 14:05
-
-
Save wenfahu/9766641df18f403fa51c9fbc7f19b5bb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import defaultdict | |
import numpy as np | |
import pdb | |
from IPython import embed | |
sky = defaultdict(list) | |
def find_tuple(pts, tgts): | |
idx = [] | |
for t in tgts: | |
idx.append(np.where((pts == t).all(axis=1))[0]) | |
return idx | |
def input_pruning(pts, k): | |
''' | |
remove the k-dominated points, input preprocessing | |
pts: n x m ndarray, n points each has m attributes | |
k: k value in k group skyline | |
''' | |
pairwise = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1)) | |
num_of_dominance = np.sum(pairwise, axis=1) | |
return pts[num_of_dominance < k] | |
def is_pareto_efficient(costs): | |
''' | |
the indices of skyline points | |
costs: n x m ndarray | |
n : number of input points | |
m : number of attributes per point | |
''' | |
is_efficient = np.ones(costs.shape[0], dtype = bool) | |
for i, c in enumerate(costs): | |
if is_efficient[i]: | |
is_efficient[is_efficient] = np.any(costs[is_efficient]<=c, axis=1) # Remove dominated points | |
return is_efficient | |
def pareto_frontier(pts): | |
unit_g = np.logical_and(np.all(pts[:, None] <= pts, axis=-1), np.any(pts[:, None] < pts, axis=-1)) | |
dominated = np.any(unit_g, axis=1) | |
return np.logical_not(dominated) | |
def compute_skyline(groups): | |
''' | |
groups: g x k x m ndarray, g specifies the number of candidate k-skyline groups, | |
k is the k value of k-skyline, m is the number of attributes of each point | |
return the k-skyline groups | |
''' | |
# aggregate the groups using SUM function | |
# aggregated_groups is g x m ndarray | |
aggregated_groups = np.sum(groups, axis=1) | |
return groups[pareto_frontier(aggregated_groups)] | |
def join_groups(*args): | |
''' | |
group union of 2d array a and b | |
''' | |
return np.unique(np.vstack(args) , axis=0) | |
# sky is global dict that serves as the table for dynamic programming algorithm | |
# for computing k-skyline group. the key of sky is a tuple (k, n) where k is | |
# the key value of k-skyline, and n is number of points. the value of sky is the | |
# corresponding k-skyline in n points | |
def sky_group(pts, k, n): | |
''' | |
recursive method to get the k-skyline in the n points | |
returns a p x k x m ndarray, | |
p : number of groups | |
k : k-skyline | |
m : number of attributes of each point | |
''' | |
# S2 : g x k x m matrix , sky^(n-1)_(k-1) union t_n | |
# S1 : g x k x m matrix , sky^(n-1)_k | |
m = pts.shape[-1] | |
tail_pt = pts[n-1] | |
if np.any(sky[(k, n)]): | |
return sky[(k, n)] | |
if k==1: | |
S2 = np.reshape(tail_pt, (1, k, m)) | |
else: | |
S2 = np.empty([0, k, m]) | |
pre_k_sky = sky_group(pts, k-1, n-1) | |
for g in pre_k_sky: | |
# g is a single k-skyline of shape K x M | |
candidate_group = join_groups(g, tail_pt[np.newaxis, :]) | |
S2 = join_groups(S2, candidate_group[np.newaxis, :]) | |
if k < n: | |
S1 = sky_group(pts, k, n-1) | |
else: | |
S1 = np.empty([0, k, m]) | |
candidates = join_groups(S1, S2) | |
# candidates is the group of candidates of k-skyline of shape GxKxM | |
res = compute_skyline(candidates) | |
sky[(k, n)] = res | |
return res | |
if __name__ == "__main__": | |
# pts = np.random.randn(10 ,3) | |
pts = np.array([ [3, 3], [0, 4], [4, 1], [2, 3], | |
[2, 2], [2, 1] ]) | |
pts_processed = input_pruning(pts, 3) | |
pdb.set_trace() | |
solution = sky_group(pts_processed, 3, pts_processed.shape[0]) | |
embed() | |
print(solution) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment