Last active
March 31, 2020 11:42
-
-
Save sn1p3r46/aaa127996404378c10fdaf0632a07632 to your computer and use it in GitHub Desktop.
Python Implementation of: "Fast Generation of Accurate Synthetic Microdata" https://crises-deim.urv.cat/web/docs/publications/lncs/443.pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import numpy as np | |
def compute_mean(v): | |
return sum(v)/len(v) | |
def subtract_mean_to_col(M, idx): | |
M[:, idx] = M[:, idx] - compute_mean(M[:, idx]) | |
def alg_two(n=None, m=None, A=None): | |
#1 | |
# we can provide dimensions or a random matrix | |
if n is None and A is None: | |
raise ValueError("At least one parameter among n and A should be passed") | |
#if random matrix is not provided then generate one! | |
if A is None: | |
if m is None: | |
m = n | |
A = np.random.rand(n, m) | |
else: | |
(n, m) = np.shape(A) | |
#2 | |
subtract_mean_to_col(A, 0) | |
#3 | |
for i in range(1, m): | |
A[-i:,i] = np.linalg.solve(A[-i:,:i].T, -A[:-i,:i].T.dot(A[:-i,i])) | |
subtract_mean_to_col(A,i) | |
#4 | |
return A / np.std(A,0) | |
def alg_one(X): | |
# 1 | |
A = alg_two(*X.shape) | |
# 2 | |
C_x = np.cov(X,ddof=0, rowvar=False) | |
# 3 | |
U = np.linalg.cholesky(C_x).T | |
# 4 | |
X_I = A.dot(U) | |
# 5 | |
X_col_means = np.mean(X,0) | |
return X_I + X_col_means |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment