Skip to content

Instantly share code, notes, and snippets.

@rickt
Created September 25, 2024 22:19
Show Gist options
  • Save rickt/e311d3e9c225abce183a3e0d4e522f3d to your computer and use it in GitHub Desktop.
Save rickt/e311d3e9c225abce183a3e0d4e522f3d to your computer and use it in GitHub Desktop.
simple faster-whisper API server for GCP Cloud Run GPU
from flask import Flask, request, jsonify, Response
from faster_whisper import WhisperModel
import subprocess
app = Flask(__name__)
# Initialize the faster-whisper model
model = WhisperModel("small", device="cuda") # Use 'cuda' to leverage the GPU
@app.route('/transcribe', methods=['POST'])
def transcribe():
file = request.files['audio']
# Set task to 'translate' for translation to English
segments, info = model.transcribe(file, task="translate", language=None)
# Return translated transcriptions
return jsonify({
"duration": info.duration,
"translation": [segment.text for segment in segments]
})
@app.route('/stats', methods=['GET'])
def gpu_stats():
# Run the nvidia-smi command
result = subprocess.run(['nvidia-smi'], stdout=subprocess.PIPE)
gpu_info = result.stdout.decode('utf-8')
# Return the output of nvidia-smi as plain text
return Response(gpu_info, mimetype='text/plain')
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment