Created
March 10, 2015 04:48
-
-
Save ikegami-yukino/fb5fa79fde91ca0bbe98 to your computer and use it in GitHub Desktop.
Burst detection by kleinberg's algorithm
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from collections import namedtuple | |
import np | |
class Bursts: | |
def __init__(level, start, end): | |
self.level = level | |
self.start = start | |
self.end = end | |
def kleinberg(offsets, s=2, gamma=1): | |
if s <= 1: | |
raise ValueError('s must be greater than 1!') | |
if gamma <= 0: | |
raise ValueError('gamma must be positive!') | |
if length(offsets) < 1: | |
raise ValueError('offsets must be non-empty!') | |
if len(offsets) == 1: | |
return Bursts(level=0, start=offsets[1], end=offsets[1]) | |
# Get the data into the right format. The value of gaps[t] gives the length | |
# of the gap between the events at offsets[t] and offsets[t+1]. | |
offsets = sorted(offsets) | |
gaps = offsets[1:len(offsets)] - offsets[:len(offsets) - 1] | |
if np.any(gaps == 0): | |
raise ValueError('Input cannot contain events with zero time between!') | |
# Compute the average event rate, etc. | |
T = np.sum(gaps) | |
n = len(gaps) | |
ghat = T / n | |
# Compute an upper bound on the number of states used in the optimal state | |
# sequence, as per Kleinberg. | |
k = np.ceil(1 + np.log(T, s) + np.log(1 / np.min(gaps), s)) | |
# Set up the transition cost function tau, and f, the probability density | |
# function for gap lengths when in state j, with precomputed parameters. | |
gammalogn = gamma * np.log(n) | |
tau = lambda i, j: 0 if i >= j else (j - i) * gammalogn | |
alpha = map(lambda x: s ** x / ghat, range(k-1)) | |
f = lambda j, x: alpha[j] * np.exp(-alpha[j] * x) | |
# Compute the optimal state sequence for the model, using the Viterbi | |
# algorithm. In each iteration t, we compute for each possible state j the | |
# minimum costs of partial state sequences up to iteration t that end in | |
# that state. These numbers are stored in C. We use q to keep track of | |
# the optimal sequences for each j. | |
C = c(0, np.repeat(np.inf, k - 1)) | |
q = np.zeros((k, 1)) | |
for t in range(n): | |
Cprime = np.repeat(np.nan, k) | |
qprime = np.zeros((k, t)) | |
for j in range(k): | |
cost = map(lambda ell: C[ell] + tau(ell, j), range(k)) | |
ell = np.argmin(cost) | |
Cprime[j] = cost[ell] - np.log(f(j, gaps[t])) | |
if t > 1: | |
qprime[j, 0:(t-1)] = q[ell, 0] | |
qprime[j, t] = j | |
C = Cprime | |
q = qprime | |
# Extract the state sequence with the minimum final cost. | |
q = q[np.argmin(C)] | |
# Compute the number of entries we will need in the output. | |
prev_q = 0 | |
N = 0 | |
for t in range(n): | |
if q[t] > prev_q: | |
N += q[t] - prev_q | |
prev_q = q[t] | |
# Run through the state sequence, and pull out the durations of all the | |
# intervals for which the system is at or above a given state greater than 1. | |
bursts = Bursts(level=np.repeat(np.nan, N), start=np.repeat(offsets[1], N), | |
end=np.repeat(offsets[1], N)) # Using offsets[1] for the type. | |
burstcounter = 0 | |
prev_q = 0 | |
stack = np.repeat(np.nan, N) # Keeps track of which bursts are currently open. | |
stackcounter = 0 | |
for t in range(n): | |
if q[t] > prev_q: | |
num_levels_opened = q[t] - prev_q | |
for i in range(num_levels_opened): | |
burstcounter += 1 | |
bursts.level[burstcounter] = prev_q + i | |
bursts.start[burstcounter] = offsets[t] | |
stackcounter += 1 | |
stack[stackcounter] = burstcounter | |
elif q[t] < prev_q: | |
num_levels_closed = prev_q - q[t] | |
for i in range(num_levels_closed): | |
bursts.end[stack[stackcounter]] = offsets[t] | |
stackcounter -= 1 | |
prev_q = q[t] | |
while stackcounter >= 0: | |
bursts.end[stack[stackcounter]] = offsets[n + 1] | |
stackcounter -= 1 | |
return bursts |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment