Created
          August 10, 2020 00:24 
        
      - 
      
 - 
        
Save neelriyer/e2ee03a713f2d842f0fff6ee16f63ca6 to your computer and use it in GitHub Desktop.  
    convert waveform to matrix
  
        
  
    
      This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
      Learn more about bidirectional Unicode characters
    
  
  
    
  | from torch.autograd import Variable | |
| import librosa | |
| import numpy as np | |
| import torch | |
| N_FFT=2048 | |
| def read_audio_spectum(filename): | |
| x, fs = librosa.load(filename) | |
| S = librosa.stft(x, N_FFT) | |
| p = np.angle(S) | |
| S = np.log1p(np.abs(S)) | |
| return S, fs | |
| style_audio, style_sr = read_audio_spectum(style_audio_name) | |
| content_audio, content_sr = read_audio_spectum(content_audio_name) | |
| if(content_sr != style_sr): | |
| raise 'Sampling rates are not same' | |
| style_audio = style_audio.reshape([1,1025,style_audio.shape[1]]) | |
| content_audio = content_audio.reshape([1,1025,style_audio.shape[1]]) | |
| if torch.cuda.is_available(): | |
| style_float = Variable((torch.from_numpy(style_audio)).cuda()) | |
| content_float = Variable((torch.from_numpy(content_audio)).cuda()) | |
| else: | |
| style_float = Variable(torch.from_numpy(style_audio)) | |
| content_float = Variable(torch.from_numpy(content_audio)) | 
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment