Skip to content

Instantly share code, notes, and snippets.

View pzelasko's full-sized avatar
🌴
Building stuff!

Piotr Żelasko pzelasko

🌴
Building stuff!
View GitHub Profile
@pzelasko
pzelasko / pytorch_shared_memory_disable.py
Created March 1, 2021 16:59
Disable shared memory in PyTorch dataloader
import sys
import torch
from torch.utils.data import dataloader
from torch.multiprocessing import reductions
from multiprocessing.reduction import ForkingPickler
default_collate_func = dataloader.default_collate
def default_collate_override(batch):
@pzelasko
pzelasko / install_k2.sh
Last active July 31, 2024 08:13
Steps needed to install K2 from scratch
#!/usr/bin/env bash
# Common steps
conda create -n k2 python=3.8
conda activate k2
conda install -c nvidia cudnn=7.6.5 cudatoolkit=10.2
conda install -c pytorch pytorch torchaudio
pip install cmake
mkdir build
pushd build
@pzelasko
pzelasko / debug_pickle.py
Created May 23, 2021 02:52 — forked from jneight/debug_pickle.py
Debug unpickled errors.
# from http://stackoverflow.com/questions/569754/how-to-tell-for-which-object-attribute-pickle-fails
"""
Show which fields cannot be pickled
"""
import pickle
def get_pickling_errors(obj,seen=None):
if seen is None:
seen = []
@pzelasko
pzelasko / lhotse_datapipes.py
Last active November 5, 2021 16:06
A draft of Lhotse + DataPipes integration
#!/usr/bin/env python
import warnings
from collections import deque, defaultdict
from functools import partial
from pathlib import Path
from typing import Optional
from lhotse import CutSet, load_manifest
from lhotse.utils import Seconds
@pzelasko
pzelasko / download_citations.py
Created July 25, 2023 21:13
Download Google Scholar citation list as a persistent Python dict
# Note: after downloading ~250 citations further connections might be blocked by Google Scholar.
import time
import shelve
# Make sure to run: pip install scholarly tqdm
from scholarly import scholarly
from tqdm.auto import tqdm
search_query = scholarly.search_author('Piotr Żelasko') # replace the author
@pzelasko
pzelasko / analyze_wer.py
Created April 5, 2024 23:44
Analyze where the most errors are found in ASR transcripts using a NeMo manifest with `text` and `pred_text` keys.
"""
Make sure to first run:
$ pip install click pandas lhotse kaldialign
"""
import click
import pandas as pd
from lhotse.serialization import load_jsonl
from kaldialign import align, bootstrap_wer_ci
EPS = '*'
from io import BytesIO
from lhotse import CutSet, Recording, SupervisionSegment, AudioSource
from lhotse.shar import AudioTarWriter
import soundfile as sf
from lhotse.shar.utils import to_shar_placeholder
from nemo.collections.common.data.lhotse import get_lhotse_dataloader_from_config