Skip to content

Instantly share code, notes, and snippets.

@wmalarski
Created March 28, 2019 06:40
Show Gist options
  • Save wmalarski/0aba11168a449e7426b61d5c4b681656 to your computer and use it in GitHub Desktop.
Save wmalarski/0aba11168a449e7426b61d5c4b681656 to your computer and use it in GitHub Desktop.
import multiprocessing
import os
import pickle
from os import path, getpid
from typing import Dict, Set, Tuple
import numpy as np
import pandas as pd
import seaborn as sns
from tqdm import tqdm
sns.set_style('whitegrid')
class Storage:
def __init__(self, dict_storage: dict, gens: int):
# storage config
self._gens = gens
self._gens2 = 2 ** gens
# creating pattern
comb_pattern = np.array([[int(b) for b in format(v, "0" + str(gens) + "b")] for v in range(self._gens2)])
self._pattern = np.where(comb_pattern)
self._storage: Dict[str, Set[str]] = dict_storage
# def load(self, file_path: str):
# if path.isfile(file_path):
# with open(file_path, 'rb') as file:
# # self._storage: Dict[str, Set[str]] = pickle.load(file)
# dict_storage = pickle.load(file)
#
def dump(self, file_path: str):
with open(file_path, 'wb') as file:
pickle.dump(self._storage, file)
# pickle.dump(dict_storage, file)
def get(self, row: np.ndarray) -> Set[str]:
key = str(row)
# row_set = self._storage.get(key)
row_set = self._storage.get(key)
if row_set is None:
row_map = row[np.zeros((self._gens2,), dtype=np.int)]
row_map[self._pattern] = 2
row_map = np.apply_along_axis(str, 1, row_map)
row_set = set(row_map.tolist())
self._storage[key] = row_set
# print(key, len(self._storage), id(self._storage), getpid())
return row_set
def storage(self):
return self._storage
def __len__(self):
return len(self._storage)
def single_test(data) -> Tuple[int, int, Storage]:
n, gens, storage = data
unique = set()
np.random.seed(int.from_bytes(os.urandom(4), byteorder='little'))
population = np.random.randint(0, 2, (n, gens))
population = np.unique(population, axis=0)
for row in population:
row_set = storage.get(row.reshape((1, gens)))
unique.update(row_set)
return n, len(unique), storage
def main():
gens = 10
file_path = f"storage-{gens}.p"
manager = multiprocessing.Manager()
dict_storage = manager.dict()
storage = Storage(dict_storage, gens)
ranges = [range(1, 100, 1),
range(100, 251, 10),
range(300, 501, 50),
range(500, 1001, 100)]
n_list = np.array([n for rng in ranges for n in rng]).repeat(10)
with multiprocessing.Pool(processes=8) as pool:
with tqdm(total=n_list.shape[0]) as pbar:
results = []
for n, unique, storage in tqdm(pool.imap_unordered(single_test, [(n, gens, storage) for n in n_list])):
results.append((n, unique))
pbar.update()
dict_storage.update(storage.storage())
pbar.set_description(f"n: {n}, unique: {unique}, storage: {len(storage)}")
storage.dump(file_path)
res_df = pd.DataFrame(results)
res_df.to_csv(f'result-{gens}.csv')
# print(len(dict_storage))
# plt.plot(results[:, 0], results[:, 1])
# plt.show()
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment