Created
August 15, 2016 04:32
-
-
Save akelleh/4081035c907ba923fc689f91927f101d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
from sklearn.linear_model import LinearRegression | |
from sklearn.linear_model import Lasso | |
from sklearn.cross_validation import train_test_split | |
from sklearn.metrics import r2_score | |
import networkx as nx | |
import pandas as pd | |
from scipy.optimize import minimize | |
import time | |
class RandomData(object): | |
def __init__(self, variables=3, p=0.3, n=50): | |
# DAG iff there exists a lower triangular form; go ahead an make edges the lin reg coeffs | |
np.random.seed(int(100*time.time() % 4294967295)) | |
adj_matrix = np.array([[np.random.normal() if np.random.binomial(1,p) and i > j else 0. for j in range(variables) ] for i in range(variables)]) | |
self.g = nx.DiGraph(adj_matrix) | |
self.generate_data(n=n) | |
self.X = pd.DataFrame({attributes['name']:attributes['data'] for node, attributes in self.g.nodes(data=True)}) | |
def generate_data(self, n=1000): | |
# generate data by traversing the graph from the roots; save as lists on nodes | |
roots = [node for node, indegree in self.g.in_degree().items() if indegree == 0] | |
for node in self.g.nodes(): | |
self.g.node[node]['data'] = [] | |
self.g.node[node]['name'] = 'X_{}'.format(node) | |
traversal = [node for node in self.bfs_traversal(roots)] | |
for _ in xrange(n): | |
for node in roots: | |
self.g.node[node]['data'].append(np.random.normal()) | |
for node in traversal: | |
value = np.random.normal() | |
for predecessor in self.g.predecessors(node): | |
value += self.g.node[predecessor]['data'][-1] * self.g.adj[predecessor][node]['weight'] | |
self.g.node[node]['data'].append(value) | |
def bfs_traversal(self, roots): | |
this_generation = [k for k, v in self.g.in_degree().items() if v ==0] | |
next_generation = [] | |
traversal = [] | |
while this_generation: | |
for node in this_generation: | |
for successor in self.g.successors(node): | |
next_generation.append(successor) | |
traversal.extend(list(set(next_generation))) | |
this_generation = next_generation | |
next_generation = [] | |
seen = set() | |
traversal.reverse() | |
resolved_traversal = [] | |
for node in traversal: | |
if node not in seen: | |
resolved_traversal.append(node) | |
seen.add(node) | |
resolved_traversal.reverse() | |
return resolved_traversal | |
df = RandomData().X | |
print df.head() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment