Skip to content

Instantly share code, notes, and snippets.

View alex0dd's full-sized avatar

Alex O. P. alex0dd

View GitHub Profile
@alex0dd
alex0dd / confidence_scores.py
Created September 19, 2023 12:37
Active learning helpers
with torch.no_grad():
results["probabilities"] = torch.nn.functional.sigmoid(results["logits"].cuda()).cpu()
results["confidence"] = (
(results["probabilities"] < 0.5).int() * (1 - results["probabilities"]) +
(results["probabilities"] >= 0.5).int() * (results["probabilities"])
).squeeze()
if "confidence" in results:
sort_idxs = torch.argsort(results["confidence"])
else:
@alex0dd
alex0dd / slice_list.py
Created September 18, 2023 19:45
Slice list is a list with sliceable indices, working like numpy/torch arrays.
import torch
import numpy as np
class slice_list(list):
def __getitem__(self, idx):
if (type(idx) == torch.Tensor and idx.dtype == torch.bool) or (type(idx) == np.ndarray and idx.dtype == np.bool):
new_l = []
for l_item, cond in zip(self, idx.tolist()):
if cond:
@alex0dd
alex0dd / load_dataframe.py
Last active March 1, 2023 15:16
Boilerplate to load a JSON row dataframe in chunks and remap the columns to new names.
import pandas as pd
def load_dataframe_in_chunks(df_path, columns_to_load, chunksize=2**19, chunks_limit=None, verbose=0):
"""
Loads a json into a dataframe, by consuming chunks of data and recombining it together
"""
with pd.read_json(df_path, lines=True, chunksize=chunksize) as df_chunked_loader:
dfs_to_concat = []
df_chunked_enumerator = enumerate(df_chunked_loader)
for idx, df_chunk in df_chunked_enumerator:
@alex0dd
alex0dd / sparse_cosine_similarity.py
Created January 31, 2023 12:12
Function to compute sparse row-wise cosine similarity in PyTorch
import tqdm
def sparse_row_wise_cosine_similarity(sparse_matrix, threshold=0.0, eps=1e-8):
"""
Given a matrix, where for each row, we have associated a vector of features (e.g. adjacency matrix, TF-IDF, feature vectors)
Return a similarity matrix of shape (n_rows, n_rows), indicating cosine similarity score between each two pairs of input matrix rows.
"""
n_rows = sparse_matrix.shape[0]
# Compute the similarity dot product sparse matrix (numerator of the cosine similarity)
@alex0dd
alex0dd / example.py
Last active April 20, 2023 16:59
Visualizing memory consumption of a PyTorch model.
import torch
from pytorch_memory_profiling import memory_logging
from torchvision.models import resnet18
def training_loop(model, criterion, optimizer, dataloader):
model.train()
for (batch_x, batch_y) in dataloader:
pred_y = model(batch_x)
loss = criterion(pred_y, batch_y)
@alex0dd
alex0dd / dllogger_stdout_parser.py
Created November 10, 2021 16:21
NVIDIA DLLogger stdout parser
import numpy as np
def update_dict(accumulator_data, incoming_data):
"""
Given two dictionaries accumulator_data and incoming_data,
update accumulator's keys according to incoming data keys.
"""
for key in incoming_data.keys():
if key not in accumulator_data:
accumulator_data[key] = [incoming_data[key]]
@alex0dd
alex0dd / ema_update.py
Last active July 9, 2020 20:52
A simple Tensorflow 2.2 function to perform exponential moving average (EMA) update of a clone model.
def ema_update(model, model_ema, beta=0.9999):
"""
Performs a model update by using exponential moving average (EMA)
of first model's weights to update second model's weights
as defined in "The Unusual Effectiveness of Averaging in GAN Training" (https://arxiv.org/abs/1806.04498),
realizing the following update:
model_ema.weights = beta * model_ema.weights + (1 - beta) * model.weights
:param model: original, gradient descent trained model.