Created
December 30, 2015 08:52
-
-
Save ybenjo/305c84fc0e817fab4f0e to your computer and use it in GitHub Desktop.
The Web as a Jungle: Non-Linear Dynamical Systems for Co-evolving Online Activities (WWW 2015)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
import sys | |
from collections import defaultdict | |
from lmfit import minimize, Parameters | |
from sklearn.decomposition import FastICA | |
import math | |
# This script is (incomplete and buggy) implementation of | |
# The Web as a Jungle: Non-Linear Dynamical Systems for Co-evolving Online Activities, (WWW 2015). | |
# http://www.cs.kumamoto-u.ac.jp/~yasuko/PUBLICATIONS/www15-ecoweb.pdf | |
# See original codes | |
# http://www.cs.kumamoto-u.ac.jp/~yasuko/SRC/ecoweb.zip | |
# Usage | |
# python3 ecoweb.py path_to_train_file time_of_train size_of_trends | |
# File format | |
# species_name \t t:count \t t:count ... | |
class Ecoweb: | |
def __init__(self, trend_size): | |
# fixed seasonal bin | |
self.n_p = 52 | |
self.trend_size = trend_size | |
self.x_train = defaultdict(lambda: defaultdict(int)) | |
self.x_test = defaultdict(lambda: defaultdict(int)) | |
def read_file(self, input_file, t_train): | |
self.t_train = t_train | |
self.species_name_id = defaultdict(lambda: len(self.species_name_id)) | |
self.species_id_name = defaultdict() | |
for l in open(input_file, 'r'): | |
# species \t time:count \t ... | |
ary = l.rstrip("\n").split("\t") | |
species_name = ary.pop(0) | |
species_id = self.species_name_id[species_name] | |
self.species_id_name[species_id] = species_name | |
for pair in ary: | |
t, count = tuple([int(x) for x in pair.split(':')]) | |
if t < t_train: | |
self.x_train[species_id][t] = count | |
else: | |
self.x_test[species_id][t] = count | |
# list of target speciess | |
self.d = self.x_train.keys() | |
self.t_train | |
def initialize(self): | |
# initialize variables | |
# parameters! | |
self.K = defaultdict(lambda: defaultdict(lambda: float)) | |
# initial popularity | |
self.p = defaultdict(lambda: float) | |
self.r = defaultdict(lambda: float) | |
self.A = defaultdict(lambda: defaultdict(lambda: float)) | |
# popularity | |
self.P = defaultdict(lambda: defaultdict(lambda: float)) | |
# estimated count | |
self.C = defaultdict(lambda: defaultdict(lambda: float)) | |
# seasonal effect | |
self.e = defaultdict(lambda: defaultdict(lambda: 0.0)) | |
self.W = defaultdict(lambda: defaultdict(lambda: 0.0)) | |
self.B = np.zeros((self.trend_size, self.n_p)) | |
# used in ICA | |
self.h = math.ceil(self.t_train / self.n_p) | |
for i in self.d: | |
# fix values | |
self.r[i] = 0.1 | |
self.p[i] = 2000.0 | |
self.K[i] = 10000 | |
for j in self.d: | |
val = 1.0 if i == j else 0.0 | |
self.A[i][j] = val | |
self.update_P_and_C() | |
def calc_P_and_C(self, K, p, r, A, W, use_seasonal = True): | |
P = defaultdict(lambda: defaultdict(lambda: float)) | |
C = defaultdict(lambda: defaultdict(lambda: float)) | |
for i in self.d: | |
P[i][-1] = p[i] | |
for t in range(0, self.t_train): | |
for i in self.d: | |
tau = t % self.n_p | |
sum_a_p = sum([A[i][j] * P[j][t - 1] for j in self.d]) | |
P[i][t] = P[i][t - 1] * (1 + r[i] * (1 - sum_a_p / K[i])) | |
# reconstruct e | |
e = 0.0 | |
if use_seasonal: | |
for k in range(0, self.trend_size - 1): | |
e += W[i][k] * self.B[k, tau] | |
C[i][t] = P[i][t] * (1 + e) | |
return (P, C) | |
def update_P_and_C(self): | |
self.P, self.C = self.calc_P_and_C(self.K, self.p, self.r, self.A, self.W, True) | |
def objective(self): | |
err = 0.0 | |
for t in range(0, self.t_train): | |
for i in self.d: | |
err += pow(self.x_train[i][t] - self.C[i][t], 2) | |
return math.sqrt(err / (self.t_train * len(self.d))) | |
def objective_for_lmfit(self, params): | |
err = 0.0 | |
# unpack | |
K = defaultdict(lambda: defaultdict(lambda: float)) | |
p = defaultdict(lambda: float) | |
r = defaultdict(lambda: float) | |
A = defaultdict(lambda: defaultdict(lambda: float)) | |
for i in self.d: | |
K[i] = params["K_{0}".format(i)] | |
p[i] = params["p_{0}".format(i)] | |
r[i] = params["r_{0}".format(i)] | |
# fix | |
A[i][i] = 1 | |
for j in self.d: | |
if i == j : continue | |
A[i][j] = params["A_{0}_{1}".format(i, j)] | |
P, C = self.calc_P_and_C(K, p, r, A, self.W, True) | |
errs = [ ] | |
for i in self.d: | |
for t in range(0, self.t_train): | |
e = self.x_train[i][t] - C[i][t] | |
errs.append(pow(e, 2)) | |
return errs | |
def levenberg_marquardt(self): | |
# expand each parameters | |
params = Parameters() | |
for i in self.d: | |
params.add("K_{0}".format(i), value = self.K[i], min = 0.0) | |
params.add("p_{0}".format(i), value = self.p[i]) | |
params.add("r_{0}".format(i), value = self.r[i], min = 0.0, max = 1.0) | |
# A | |
for j in self.d: | |
# skip i == j | |
if i == j : continue | |
params.add("A_{0}_{1}".format(i, j), value = self.A[i][j], min = 0.0, max = 1.0) | |
ret = minimize(self.objective_for_lmfit, params, maxfev = 500) | |
# ret = minimize(self.objective_for_lmfit, params) | |
optimized_params = ret.params | |
# update parameters | |
for i in self.d: | |
self.K[i] = optimized_params["K_{0}".format(i)].value | |
self.p[i] = optimized_params["p_{0}".format(i)].value | |
self.r[i] = optimized_params["r_{0}".format(i)].value | |
for j in self.d: | |
if i == j : continue | |
self.A[i][j] = optimized_params["A_{0}_{1}".format(i, j)].value | |
# update P and C | |
self.update_P_and_C() | |
def objective_for_seasonal_component(self, params): | |
# unpack W | |
W = defaultdict(lambda: defaultdict(float)) | |
for i in self.d: | |
for k in range(0, self.trend_size - 1): | |
W[i][k] = params["W_{0}_{1}".format(i, k)].value | |
# calc P and C | |
P, C = self.calc_P_and_C(self.K, self.p, self.r, self.A, W, True) | |
errs = [ ] | |
for t in range(0, self.t_train): | |
for i in self.d: | |
err = pow(self.x_train[i][t] - C[i][t], 2) | |
errs.append(err) | |
return errs | |
def decompose(self): | |
E = np.zeros((len(self.d) * self.h, self.n_p)) | |
# then, make E | |
# in this step, we don't use seasonal component to calc new P. | |
P, C = self.calc_P_and_C(self.K, self.p, self.r, self.A, self.W, False) | |
for i in self.d: | |
for t in range(0, self.t_train): | |
v = (self.x_train[i][t] - P[i][t]) / (P[i][t] + 1) | |
quotient = t // self.n_p | |
tau = t % self.n_p | |
pos = (i * self.h) + quotient | |
E[pos, tau] = v | |
# extract independent components. | |
ica = FastICA(n_components = self.trend_size) | |
ica.fit(E) | |
self.B = np.transpose(ica.mixing_) | |
def levenberg_marquardt_seasonal_component(self): | |
self.decompose() | |
params = Parameters() | |
for i in self.d: | |
for k in range(0, self.trend_size - 1): | |
params.add("W_{0}_{1}".format(i, k), self.W[i][k]) | |
ret = minimize(self.objective_for_seasonal_component, params) | |
optimized_params = ret.params | |
# update W | |
for i in self.d: | |
for k in range(0, self.trend_size - 1): | |
self.W[i][k] = optimized_params["W_{0}_{1}".format(i, k)].value | |
# update e[i][t] | |
for i in self.d: | |
for t in range(0, self.t_train): | |
tau = t % self.n_p | |
# reset e[i][t] | |
self.e[i][t] = 0.0 | |
for k in range(0, self.trend_size - 1): | |
self.e[i][t] += self.W[i][k] * self.B[k, tau] | |
# update P and C | |
self.update_P_and_C() | |
def train(self, i = 1): | |
print("{0}'s train".format(i)) | |
print("before: {0}".format(model.objective())) | |
self.levenberg_marquardt() | |
print("after: {0}".format(model.objective())) | |
self.levenberg_marquardt_seasonal_component() | |
print("after: {0}".format(model.objective())) | |
if __name__ == '__main__': | |
input_file = sys.argv[1] | |
t_train = int(sys.argv[2]) | |
trend_size = int(sys.argv[3]) | |
model = Ecoweb(trend_size) | |
model.read_file(input_file, t_train) | |
model.initialize() | |
for i in range(0, 100): | |
model.train(i) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment