Last active
July 10, 2025 08:37
-
-
Save hamees-sayed/de8fd280935415744460c416a728d9cc to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import requests | |
| import numpy as np | |
| import base64 | |
| import json | |
| import argparse | |
| import librosa | |
| import time | |
| from typing import Optional, Dict, Any | |
| class EmotionClient: | |
| """Client for the Emotion Recognition API""" | |
| def __init__(self, base_url: str = "https://32e949f50eff.ngrok-free.app"): | |
| self.base_url = base_url.rstrip('/') | |
| def _audio_to_base64(self, audio: np.ndarray) -> str: | |
| """Convert numpy audio array to base64 encoded bytes""" | |
| audio_bytes = audio.astype(np.float32).tobytes() | |
| return base64.b64encode(audio_bytes).decode('utf-8') | |
| def load_audio(self, file_path: str, sample_rate: int = 16000) -> np.ndarray: | |
| """Load audio file and resample to target sample rate""" | |
| audio, sr = librosa.load(file_path, sr=sample_rate) | |
| return audio | |
| def predict_all_emotions(self, audio: np.ndarray, sample_rate: int = 16000) -> Dict[str, Any]: | |
| """Predict all emotions from audio""" | |
| audio_b64 = self._audio_to_base64(audio) | |
| payload = { | |
| "audio": audio_b64, | |
| "sample_rate": sample_rate, | |
| "dtype": "float32" | |
| } | |
| response = requests.post(f"{self.base_url}/predict/all", json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def predict_single_emotion(self, audio: np.ndarray, emotion_key: str, sample_rate: int = 16000) -> Dict[str, Any]: | |
| """Predict a single emotion from audio""" | |
| audio_b64 = self._audio_to_base64(audio) | |
| payload = { | |
| "audio": audio_b64, | |
| "emotion_key": emotion_key, | |
| "sample_rate": sample_rate, | |
| "dtype": "float32" | |
| } | |
| response = requests.post(f"{self.base_url}/predict/single", json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def get_top_emotions(self, audio: np.ndarray, top_k: int = 5, sample_rate: int = 16000) -> Dict[str, Any]: | |
| """Get top K emotions from audio""" | |
| audio_b64 = self._audio_to_base64(audio) | |
| payload = { | |
| "audio": audio_b64, | |
| "top_k": top_k, | |
| "sample_rate": sample_rate, | |
| "dtype": "float32" | |
| } | |
| response = requests.post(f"{self.base_url}/predict/top", json=payload) | |
| response.raise_for_status() | |
| return response.json() | |
| def get_available_emotions(self) -> Dict[str, Any]: | |
| """Get list of available emotions""" | |
| response = requests.get(f"{self.base_url}/emotions/available") | |
| response.raise_for_status() | |
| return response.json() | |
| def health_check(self) -> Dict[str, Any]: | |
| """Check API health""" | |
| response = requests.get(f"{self.base_url}/health") | |
| return response.json() | |
| def main(): | |
| parser = argparse.ArgumentParser(description="Emotion Recognition Inference Client") | |
| parser.add_argument("--audio", "-a", default='segment_2.wav', help="Path to audio file") | |
| parser.add_argument("--url", "-u", default="http://localhost:8001", help="API base URL") | |
| parser.add_argument("--mode", "-m", choices=["all", "single", "top"], default="all", | |
| help="Prediction mode") | |
| parser.add_argument("--emotion", "-e", default='Valence', help="Emotion key for single prediction") | |
| parser.add_argument("--top-k", "-k", type=int, default=5, help="Number of top emotions to return") | |
| parser.add_argument("--sample-rate", "-sr", type=int, default=16000, help="Audio sample rate") | |
| args = parser.parse_args() | |
| # Initialize client | |
| client = EmotionClient(args.url) | |
| # Health check | |
| try: | |
| health = client.health_check() | |
| print(f"API Health: {health}") | |
| except Exception as e: | |
| print(f"Error connecting to API: {e}") | |
| return | |
| # Load audio | |
| try: | |
| print(f"Loading audio from: {args.audio}") | |
| audio = client.load_audio(args.audio, args.sample_rate) | |
| print(f"Audio shape: {audio.shape}, Sample rate: {args.sample_rate}") | |
| except Exception as e: | |
| print(f"Error loading audio: {e}") | |
| return | |
| # Get available emotions | |
| try: | |
| emotions_info = client.get_available_emotions() | |
| print(f"Available emotions: {emotions_info['emotions']}") | |
| except Exception as e: | |
| print(f"Error getting emotions: {e}") | |
| return | |
| # Perform prediction based on mode | |
| try: | |
| start_time = time.time() | |
| if args.mode == "all": | |
| print("\n--- Predicting All Emotions ---") | |
| result = client.predict_all_emotions(audio, args.sample_rate) | |
| print(f"Processing time: {result['processing_time']:.3f}s") | |
| print("Predictions:") | |
| for emotion, score in result['predictions'].items(): | |
| print(f" {emotion}: {score}") | |
| elif args.mode == "single": | |
| if not args.emotion: | |
| print("Error: --emotion required for single prediction mode") | |
| return | |
| print(f"\n--- Predicting Single Emotion: {args.emotion} ---") | |
| result = client.predict_single_emotion(audio, args.emotion, args.sample_rate) | |
| print(f"Emotion: {result['emotion_key']}") | |
| print(f"Score: {result['score']}") | |
| elif args.mode == "top": | |
| print(f"\n--- Getting Top {args.top_k} Emotions ---") | |
| result = client.get_top_emotions(audio, args.top_k, args.sample_rate) | |
| print(f"Processing time: {result['processing_time']:.3f}s") | |
| print("Top emotions:") | |
| for i, (emotion, score) in enumerate(result['top_emotions'], 1): | |
| print(f" {i}. {emotion}: {score}") | |
| total_time = time.time() - start_time | |
| print(f"\nTotal inference time: {total_time:.3f}s") | |
| except Exception as e: | |
| print(f"Error during prediction: {e}") | |
| if __name__ == "__main__": | |
| main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment