-
-
Save korakot/c21c3476c024ad6d56d5f48b0bca92be to your computer and use it in GitHub Desktop.
# all imports | |
from IPython.display import Javascript | |
from google.colab import output | |
from base64 import b64decode | |
from io import BytesIO | |
!pip -q install pydub | |
from pydub import AudioSegment | |
RECORD = """ | |
const sleep = time => new Promise(resolve => setTimeout(resolve, time)) | |
const b2text = blob => new Promise(resolve => { | |
const reader = new FileReader() | |
reader.onloadend = e => resolve(e.srcElement.result) | |
reader.readAsDataURL(blob) | |
}) | |
var record = time => new Promise(async resolve => { | |
stream = await navigator.mediaDevices.getUserMedia({ audio: true }) | |
recorder = new MediaRecorder(stream) | |
chunks = [] | |
recorder.ondataavailable = e => chunks.push(e.data) | |
recorder.start() | |
await sleep(time) | |
recorder.onstop = async ()=>{ | |
blob = new Blob(chunks) | |
text = await b2text(blob) | |
resolve(text) | |
} | |
recorder.stop() | |
}) | |
""" | |
def record(sec=3): | |
display(Javascript(RECORD)) | |
s = output.eval_js('record(%d)' % (sec*1000)) | |
b = b64decode(s.split(',')[1]) | |
audio = AudioSegment.from_file(BytesIO(b)) | |
return audio |
# all imports | |
from IPython.display import Javascript | |
from google.colab import output | |
from base64 import b64decode | |
RECORD = """ | |
const sleep = time => new Promise(resolve => setTimeout(resolve, time)) | |
const b2text = blob => new Promise(resolve => { | |
const reader = new FileReader() | |
reader.onloadend = e => resolve(e.srcElement.result) | |
reader.readAsDataURL(blob) | |
}) | |
var record = time => new Promise(async resolve => { | |
stream = await navigator.mediaDevices.getUserMedia({ audio: true }) | |
recorder = new MediaRecorder(stream) | |
chunks = [] | |
recorder.ondataavailable = e => chunks.push(e.data) | |
recorder.start() | |
await sleep(time) | |
recorder.onstop = async ()=>{ | |
blob = new Blob(chunks) | |
text = await b2text(blob) | |
resolve(text) | |
} | |
recorder.stop() | |
}) | |
""" | |
def record(sec=3): | |
display(Javascript(RECORD)) | |
s = output.eval_js('record(%d)' % (sec*1000)) | |
b = b64decode(s.split(',')[1]) | |
with open('audio.wav','wb') as f: | |
f.write(b) | |
return 'audio.wav' # or webm ? |
@the-psychedelic Hi, I got a same problem.. Did you ever find a solution to the problem? I hope you found a way
were you able to find a solution?
for invalid RIFF-header error:
!pip install ffmpeg-python
import ffmpeg
def fix_riff_header(binary):
process = (ffmpeg
.input('pipe:0')
.output('pipe:1', format='wav')
.run_async(pipe_stdin=True, pipe_stdout=True, pipe_stderr=True, quiet=True, overwrite_output=True)
)
output, err = process.communicate(input=binary)
riff_chunk_size = len(output) - 8
# Break up the chunk size into four bytes, held in b.
q = riff_chunk_size
b = []
for i in range(4):
q, r = divmod(q, 256)
b.append(r)
# Replace bytes 4:8 in proc.stdout with the actual size of the RIFF chunk.
riff = output[:4] + bytes(b) + output[8:]
return riff
def record(sec=3):
display(Javascript(RECORD))
s = output.eval_js('record(%d)' % (sec*1000))
b = b64decode(s.split(',')[1])
b = fix_riff_header(b)
with open('audio.wav','wb') as f:
f.write(b)
audio = AudioSegment.from_file(BytesIO(b))
return audio
How can we modify seconds in record(sec=3) to record short as well as long clips?
This script is a basic example of how to continuously record audio until the user has finished speaking and then process that speech. You can customize and expand upon it for your specific use case or application.
# all imports
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
from io import BytesIO
!pip -q install pydub
from pydub import AudioSegment
RECORD = """
const sleep = time => new Promise(resolve => setTimeout(resolve, time))
const b2text = blob => new Promise((resolve, reject) => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.target.result)
reader.onerror = e => reject(new Error("Failed to read blob"))
reader.readAsDataURL(blob)
})
var recordUntilSilence = time => new Promise(async (resolve, reject) => {
let stream, recorder, chunks, blob, text, audioContext, analyser, dataArr, silenceStart, threshold = 50, silenceDelay = 2000
try {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
} catch (err) {
return reject(new Error("Failed to get media stream"))
}
audioContext = new AudioContext()
const source = audioContext.createMediaStreamSource(stream)
analyser = audioContext.createAnalyser()
analyser.fftSize = 512
dataArr = new Uint8Array(analyser.frequencyBinCount)
source.connect(analyser)
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.onstop = async () => {
blob = new Blob(chunks)
try {
text = await b2text(blob)
resolve(text)
} catch (err) {
reject(new Error("Failed to convert blob to text"))
}
}
recorder.onerror = e => reject(new Error("Recorder error"))
recorder.start()
const checkSilence = () => {
analyser.getByteFrequencyData(dataArr)
const avg = dataArr.reduce((p, c) => p + c, 0) / dataArr.length
if (avg < threshold) {
if (silenceStart === null) silenceStart = new Date().getTime()
else if (new Date().getTime() - silenceStart > silenceDelay) {
recorder.stop()
audioContext.close()
return
}
} else {
silenceStart = null
}
requestAnimationFrame(checkSilence)
}
silenceStart = null
checkSilence()
})
console.log("JavaScript code executed successfully.")
"""
def record_until_silence():
try:
display(Javascript(RECORD))
s = output.eval_js('recordUntilSilence()')
b = b64decode(s.split(',')[1])
audio = AudioSegment.from_file(BytesIO(b))
return audio
except Exception as e:
print(f"An error occurred: {e}")
return None
# from https://gist.github.com/korakot/c21c3476c024ad6d56d5f48b0bca92be
from IPython.display import Javascript
from google.colab import output
from base64 import b64decode
# RECORD = """
# const sleep = time => new Promise(resolve => setTimeout(resolve, time))
# const b2text = blob => new Promise(resolve => {
# const reader = new FileReader()
# reader.onloadend = e => resolve(e.srcElement.result)
# reader.readAsDataURL(blob)
# })
# var record = time => new Promise(async resolve => {
# stream = await navigator.mediaDevices.getUserMedia({ audio: true })
# recorder = new MediaRecorder(stream)
# chunks = []
# recorder.ondataavailable = e => chunks.push(e.data)
# recorder.start()
# await sleep(time)
# recorder.onstop = async ()=>{
# blob = new Blob(chunks)
# text = await b2text(blob)
# resolve(text)
# }
# recorder.stop()
# })
# """
RECORD = """
const sleep = time => new Promise(resolve => {
setTimeout(resolve, time)
}, )
const b2text = blob => new Promise(resolve => {
const reader = new FileReader()
reader.onloadend = e => resolve(e.srcElement.result)
reader.readAsDataURL(blob)
})
var espacio = document.querySelector("#output-area")
var record = time => new Promise(async resolve => {
stream = await navigator.mediaDevices.getUserMedia({ audio: true })
recorder = new MediaRecorder(stream)
chunks = []
recorder.ondataavailable = e => chunks.push(e.data)
recorder.start()
var numerillo = (time/1000)-1
for (var i = 0; i < numerillo; i++) {
espacio.appendChild(document.createTextNode(numerillo-i))
await sleep(1000)
espacio.removeChild(espacio.lastChild)
}
recorder.onstop = async ()=>{
blob = new Blob(chunks)
text = await b2text(blob)
resolve(text)
}
recorder.stop()
})
"""
def record(sec, filename='audio.wav'):
display(Javascript(RECORD))
print("before s")
s = output.eval_js('record(%d)' % (sec))
print(s)
b = b64decode(s.split(',')[1])
with open(filename, 'wb+') as f:
f.write(b)
audio = 'audio.wav'
second = 5
print(f"Speak to your microphone {second} sec...")
record(1, audio)
print("Done!")
import librosa
import librosa.display
speech, rate = librosa.load(audio)
librosa.display.waveshow(speech, sr=rate)
import matplotlib.pyplot as plt
plt.show()
import pysndfile
pysndfile.sndio.write('audio_ds.wav', speech, rate=rate, format='wav', enc='pcm16')
from IPython.display import display, Audio
display(Audio(speech, rate=rate))
I am executing the above code. But the cell doesn't stop executing. And the output till now is as follows:
Speak to your microphone 5 sec...
before s
It doesn't change even if I use the commented section instead.
I am executing this in jupyter notebook locally. Is that the problem? or is it something else.
+1
Thanks :)