Skip to content

Instantly share code, notes, and snippets.

@hamees-sayed
Last active July 10, 2025 08:37
Show Gist options
  • Select an option

  • Save hamees-sayed/de8fd280935415744460c416a728d9cc to your computer and use it in GitHub Desktop.

Select an option

Save hamees-sayed/de8fd280935415744460c416a728d9cc to your computer and use it in GitHub Desktop.
import requests
import numpy as np
import base64
import json
import argparse
import librosa
import time
from typing import Optional, Dict, Any
class EmotionClient:
"""Client for the Emotion Recognition API"""
def __init__(self, base_url: str = "https://32e949f50eff.ngrok-free.app"):
self.base_url = base_url.rstrip('/')
def _audio_to_base64(self, audio: np.ndarray) -> str:
"""Convert numpy audio array to base64 encoded bytes"""
audio_bytes = audio.astype(np.float32).tobytes()
return base64.b64encode(audio_bytes).decode('utf-8')
def load_audio(self, file_path: str, sample_rate: int = 16000) -> np.ndarray:
"""Load audio file and resample to target sample rate"""
audio, sr = librosa.load(file_path, sr=sample_rate)
return audio
def predict_all_emotions(self, audio: np.ndarray, sample_rate: int = 16000) -> Dict[str, Any]:
"""Predict all emotions from audio"""
audio_b64 = self._audio_to_base64(audio)
payload = {
"audio": audio_b64,
"sample_rate": sample_rate,
"dtype": "float32"
}
response = requests.post(f"{self.base_url}/predict/all", json=payload)
response.raise_for_status()
return response.json()
def predict_single_emotion(self, audio: np.ndarray, emotion_key: str, sample_rate: int = 16000) -> Dict[str, Any]:
"""Predict a single emotion from audio"""
audio_b64 = self._audio_to_base64(audio)
payload = {
"audio": audio_b64,
"emotion_key": emotion_key,
"sample_rate": sample_rate,
"dtype": "float32"
}
response = requests.post(f"{self.base_url}/predict/single", json=payload)
response.raise_for_status()
return response.json()
def get_top_emotions(self, audio: np.ndarray, top_k: int = 5, sample_rate: int = 16000) -> Dict[str, Any]:
"""Get top K emotions from audio"""
audio_b64 = self._audio_to_base64(audio)
payload = {
"audio": audio_b64,
"top_k": top_k,
"sample_rate": sample_rate,
"dtype": "float32"
}
response = requests.post(f"{self.base_url}/predict/top", json=payload)
response.raise_for_status()
return response.json()
def get_available_emotions(self) -> Dict[str, Any]:
"""Get list of available emotions"""
response = requests.get(f"{self.base_url}/emotions/available")
response.raise_for_status()
return response.json()
def health_check(self) -> Dict[str, Any]:
"""Check API health"""
response = requests.get(f"{self.base_url}/health")
return response.json()
def main():
parser = argparse.ArgumentParser(description="Emotion Recognition Inference Client")
parser.add_argument("--audio", "-a", default='segment_2.wav', help="Path to audio file")
parser.add_argument("--url", "-u", default="http://localhost:8001", help="API base URL")
parser.add_argument("--mode", "-m", choices=["all", "single", "top"], default="all",
help="Prediction mode")
parser.add_argument("--emotion", "-e", default='Valence', help="Emotion key for single prediction")
parser.add_argument("--top-k", "-k", type=int, default=5, help="Number of top emotions to return")
parser.add_argument("--sample-rate", "-sr", type=int, default=16000, help="Audio sample rate")
args = parser.parse_args()
# Initialize client
client = EmotionClient(args.url)
# Health check
try:
health = client.health_check()
print(f"API Health: {health}")
except Exception as e:
print(f"Error connecting to API: {e}")
return
# Load audio
try:
print(f"Loading audio from: {args.audio}")
audio = client.load_audio(args.audio, args.sample_rate)
print(f"Audio shape: {audio.shape}, Sample rate: {args.sample_rate}")
except Exception as e:
print(f"Error loading audio: {e}")
return
# Get available emotions
try:
emotions_info = client.get_available_emotions()
print(f"Available emotions: {emotions_info['emotions']}")
except Exception as e:
print(f"Error getting emotions: {e}")
return
# Perform prediction based on mode
try:
start_time = time.time()
if args.mode == "all":
print("\n--- Predicting All Emotions ---")
result = client.predict_all_emotions(audio, args.sample_rate)
print(f"Processing time: {result['processing_time']:.3f}s")
print("Predictions:")
for emotion, score in result['predictions'].items():
print(f" {emotion}: {score}")
elif args.mode == "single":
if not args.emotion:
print("Error: --emotion required for single prediction mode")
return
print(f"\n--- Predicting Single Emotion: {args.emotion} ---")
result = client.predict_single_emotion(audio, args.emotion, args.sample_rate)
print(f"Emotion: {result['emotion_key']}")
print(f"Score: {result['score']}")
elif args.mode == "top":
print(f"\n--- Getting Top {args.top_k} Emotions ---")
result = client.get_top_emotions(audio, args.top_k, args.sample_rate)
print(f"Processing time: {result['processing_time']:.3f}s")
print("Top emotions:")
for i, (emotion, score) in enumerate(result['top_emotions'], 1):
print(f" {i}. {emotion}: {score}")
total_time = time.time() - start_time
print(f"\nTotal inference time: {total_time:.3f}s")
except Exception as e:
print(f"Error during prediction: {e}")
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment