Skip to content

Instantly share code, notes, and snippets.

@cobanov
Created January 18, 2023 10:39
Show Gist options
  • Save cobanov/d1bf36857b1647c4e58d4188c55f87cc to your computer and use it in GitHub Desktop.
Save cobanov/d1bf36857b1647c4e58d4188c55f87cc to your computer and use it in GitHub Desktop.
from cuml.manifold.umap import UMAP
from datetime import datetime
import pandas as pd
import numpy as np
import json
import cupy
import os
import pandas as pd
import utils
# GPU_ID = 1
# cupy.cuda.Device(GPU_ID).use()
N_NEIGHBORS = [15, 12, 9, 3]
INPUT_PATH = "/mnt/datauniverse/../.."
def calculate_umap(embeddings, n_neighbors):
parameters = {"min_dist": 0.1, "spread": 1.0}
result = UMAP(
n_components=6,
n_neighbors=n_neighbors,
min_dist=parameters["min_dist"],
spread=parameters["spread"],
).fit_transform(embeddings)
return result
if __name__ == "__main__":
if not os.path.exists(f"./umap"):
os.makedirs(f"./umap")
file_name, extension = os.path.splitext(INPUT_PATH)
embeddings = utils.read_embeddings(INPUT_PATH, extension)
for n_neighbors in N_NEIGHBORS:
print(
f'{datetime.now().strftime("%H:%M")} - Calculation started with {n_neighbors} neighbors...'
)
umap_out = calculate_umap(embeddings, n_neighbors)
OUTPUT_PATH = os.path.join(f"./umap/umap_{file_name}_n{n_neighbors}")
np.save(OUTPUT_PATH, umap_out)
print(f'{datetime.now().strftime("%H:%M")} - File Saved: {OUTPUT_PATH}')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment