Skip to content

Instantly share code, notes, and snippets.

View lucasnewman's full-sized avatar

Lucas Newman lucasnewman

  • San Francisco, CA
View GitHub Profile
import datetime
from pathlib import Path
import torch
import torchaudio
from torchaudio.transforms import MelSpectrogram
from einops import rearrange
from vocos import Vocos
from __future__ import annotations
from typing import Any, Optional
import torch
from torch import nn
import torchaudio
import yaml
from __future__ import annotations
from functools import reduce
from typing import List
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pad_sequence
import einx
import torch
def mel_filterbank(
n_freqs,
f_min,
f_max,
n_mels,
sample_rate,
norm=None,
mel_scale="htk"
from functools import lru_cache
import librosa
import mlx.core as mx
import numpy as np
@lru_cache(maxsize=None)
def hanning(size):
"""
Compute the Hanning window.
import mlx.core as mx
def istft(
x: mx.array, # (freq_bins, num_frames)
window: mx.array,
hop_length: int = 256,
win_length: int = 1024,
):
num_frames = x.shape[1]
total_len = (num_frames - 1) * hop_length + win_length
diff --git a/mlx_audio/tts/models/spark/bicodec.py b/mlx_audio/tts/models/spark/bicodec.py
index 8de944b..1e2a320 100644
--- a/mlx_audio/tts/models/spark/bicodec.py
+++ b/mlx_audio/tts/models/spark/bicodec.py
@@ -1,5 +1,5 @@
from pathlib import Path
-from typing import Any, Dict
+from typing import Any, Dict, Optional
import mlx.core as mx