Skip to content

Instantly share code, notes, and snippets.

@proger
Created December 7, 2023 11:37
Show Gist options
  • Select an option

  • Save proger/c5aa475e4ce1e8380e9e7965bf5fb118 to your computer and use it in GitHub Desktop.

Select an option

Save proger/c5aa475e4ce1e8380e9e7965bf5fb118 to your computer and use it in GitHub Desktop.
import gzip
import numpy as np
from sklearn.cluster import MiniBatchKMeans
def read(filename):
with gzip.open(filename, 'rb') as file:
compressed_data = file.read()
data = np.frombuffer(compressed_data, dtype=np.float32)
return data
filenames = """
./exp/embed/1/data\_split\_ad/stdout
./exp/embed/1/data\_split\_an/stdout
./exp/embed/1/data\_split\_az/stdout
./exp/embed/1/data\_split\_ap/stdout
./exp/embed/1/data\_split\_bf/stdout
./exp/embed/1/data\_split\_ba/stdout
./exp/embed/1/data\_split\_aw/stdout
./exp/embed/1/data\_split\_ac/stdout
./exp/embed/1/data\_split\_ai/stdout
./exp/embed/1/data\_split\_as/stdout
./exp/embed/1/data\_split\_ay/stdout
./exp/embed/1/data\_split\_am/stdout
./exp/embed/1/data\_split\_ag/stdout
./exp/embed/1/data\_split\_be/stdout
./exp/embed/1/data\_split\_bb/stdout
./exp/embed/1/data\_split\_aj/stdout
./exp/embed/1/data\_split\_at/stdout
./exp/embed/1/data\_split\_av/stdout
./exp/embed/1/data\_split\_ah/stdout
./exp/embed/1/data\_split\_ab/stdout
./exp/embed/1/data\_split\_ao/stdout
./exp/embed/1/data\_split\_ae/stdout
./exp/embed/1/data\_split\_aq/stdout
./exp/embed/1/data\_split\_bc/stdout
./exp/embed/1/data\_split\_aa/stdout
./exp/embed/1/data\_split\_ak/stdout
./exp/embed/1/data\_split\_au/stdout
./exp/embed/1/data\_split\_ax/stdout
./exp/embed/1/data\_split\_ar/stdout
./exp/embed/1/data\_split\_af/stdout
./exp/embed/1/data\_split\_al/stdout
./exp/embed/1/data\_split\_bd/stdout
"""
kmeans = MiniBatchKMeans(n_clusters=2**16)
for name in filenames.strip().split():
print(name)
kmeans.partial_fit(read(name))
np.save('exp/clusters.npy', kmeans.cluster_centers_)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment