Skip to content

Instantly share code, notes, and snippets.

@tansey
Last active March 28, 2020 15:20
Show Gist options
  • Save tansey/a2c2f0096303d511524cadf4a140671f to your computer and use it in GitHub Desktop.
Save tansey/a2c2f0096303d511524cadf4a140671f to your computer and use it in GitHub Desktop.
A fast (nlogn time) implementation of the knockoff filter
'''
A O(nlogn) time implementation of the knockoff filter.
Author: Wesley Tansey
Date: 3/27/2020
'''
import numpy as np
def knockoff_filter(knockoff_stats, alpha, offset=1.0, is_sorted=False):
'''Perform the knockoffs selection procedure at the target FDR threshold.
:param knockoff_stats: vector of knockoff statistics
:param alpha: nominal false discovery rate
:param offset: equal to one for strict false discovery rate control
:param is_sorted: if True, assumes knockoff_stats is already sorted.
:return an array of indices selected at the alpha FDR level.
This implementation runs in nlogn time.'''
n = len(knockoff_stats) # Length of the stats array
if is_sorted:
order = np.arange(n)
sorted_stats = knockoff_stats
else:
order = np.argsort(knockoff_stats)
sorted_stats = knockoff_stats[order]
# Edge case: if there are no positive stats, just return empty selections
if sorted_stats[-1] <= 0:
return np.array([], dtype='int64')
ridx = np.searchsorted(sorted_stats, 0, side='right') # find smallest positive value index
lidx = np.searchsorted(sorted_stats, -sorted_stats[ridx], side='left') # find matching negative value index
# Numpy correction: if -sorted_stats[ridx] is less than any number in the list,
# searchsorted returns 0 instead of -1. This is not the desired behavior here.
if lidx == 0 and sorted_stats[lidx] >= -sorted_stats[ridx]:
# If the current ratio isn't good enough, it will never get better.
if (lidx + 1 + offset) / max(1, n - ridx) > alpha:
return np.array([], dtype='int64')
# If we're below the alpha threshold, return everything positive
return order[ridx:]
# If the knockoff ratio is below the threshold, return all stats
# at or above the current value
while (lidx + 1 + offset) / max(1, n - ridx) > alpha:
# If we were at the end of the negative values, we won't get
# a better ratio by going further down the positive value side.
if lidx == -1:
return np.array([], dtype='int64')
# Move to the next smallest positive value
ridx += 1
# Check if we've reached the end of the list
if ridx == n:
break
# Find the matching negative value
while lidx >= 0 and sorted_stats[lidx] > -sorted_stats[ridx]:
lidx -= 1
# Return the set of stats with values above the threshold
return order[ridx:]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment