hamees-sayed · July 10, 2025 08:37
diff --git a/emo_infer.py b/emo_infer.py
 import requests
 import numpy as np
 import base64
 import json
 import argparse
 import librosa
 import time
 from typing import Optional, Dict, Any

 class EmotionClient:
    """Client for the Emotion Recognition API"""
    
    def __init__(self, base_url: str = "https://32e949f50eff.ngrok-free.app"):
        self.base_url = base_url.rstrip('/')
        
    def _audio_to_base64(self, audio: np.ndarray) -> str:
        """Convert numpy audio array to base64 encoded bytes"""
        audio_bytes = audio.astype(np.float32).tobytes()
        return base64.b64encode(audio_bytes).decode('utf-8')
    
    def load_audio(self, file_path: str, sample_rate: int = 16000) -> np.ndarray:
        """Load audio file and resample to target sample rate"""
        audio, sr = librosa.load(file_path, sr=sample_rate)
        return audio
    
    def predict_all_emotions(self, audio: np.ndarray, sample_rate: int = 16000) -> Dict[str, Any]:
        """Predict all emotions from audio"""
        audio_b64 = self._audio_to_base64(audio)
        
        payload = {
            "audio": audio_b64,
            "sample_rate": sample_rate,
            "dtype": "float32"
        }
        
        response = requests.post(f"{self.base_url}/predict/all", json=payload)
        response.raise_for_status()
        return response.json()
    
    def predict_single_emotion(self, audio: np.ndarray, emotion_key: str, sample_rate: int = 16000) -> Dict[str, Any]:
        """Predict a single emotion from audio"""
        audio_b64 = self._audio_to_base64(audio)
        
        payload = {
            "audio": audio_b64,
            "emotion_key": emotion_key,
            "sample_rate": sample_rate,
            "dtype": "float32"
        }
        
        response = requests.post(f"{self.base_url}/predict/single", json=payload)
        response.raise_for_status()
        return response.json()
    
    def get_top_emotions(self, audio: np.ndarray, top_k: int = 5, sample_rate: int = 16000) -> Dict[str, Any]:
        """Get top K emotions from audio"""
        audio_b64 = self._audio_to_base64(audio)
        
        payload = {
            "audio": audio_b64,
            "top_k": top_k,
            "sample_rate": sample_rate,
            "dtype": "float32"
        }
        
        response = requests.post(f"{self.base_url}/predict/top", json=payload)
        response.raise_for_status()
        return response.json()
    
    def get_available_emotions(self) -> Dict[str, Any]:
        """Get list of available emotions"""
        response = requests.get(f"{self.base_url}/emotions/available")
        response.raise_for_status()
        return response.json()
    
    def health_check(self) -> Dict[str, Any]:
        """Check API health"""
        response = requests.get(f"{self.base_url}/health")
        return response.json()

 def main():
    parser = argparse.ArgumentParser(description="Emotion Recognition Inference Client")
    parser.add_argument("--audio", "-a", default='segment_2.wav', help="Path to audio file")
    parser.add_argument("--url", "-u", default="http://localhost:8001", help="API base URL")
    parser.add_argument("--mode", "-m", choices=["all", "single", "top"], default="all", 
                       help="Prediction mode")
    parser.add_argument("--emotion", "-e", default='Valence', help="Emotion key for single prediction")
    parser.add_argument("--top-k", "-k", type=int, default=5, help="Number of top emotions to return")
    parser.add_argument("--sample-rate", "-sr", type=int, default=16000, help="Audio sample rate")
    
    args = parser.parse_args()
    
    # Initialize client
    client = EmotionClient(args.url)
    
    # Health check
    try:
        health = client.health_check()
        print(f"API Health: {health}")
    except Exception as e:
        print(f"Error connecting to API: {e}")
        return
    
    # Load audio
    try:
        print(f"Loading audio from: {args.audio}")
        audio = client.load_audio(args.audio, args.sample_rate)
        print(f"Audio shape: {audio.shape}, Sample rate: {args.sample_rate}")
    except Exception as e:
        print(f"Error loading audio: {e}")
        return
    
    # Get available emotions
    try:
        emotions_info = client.get_available_emotions()
        print(f"Available emotions: {emotions_info['emotions']}")
    except Exception as e:
        print(f"Error getting emotions: {e}")
        return
    
    # Perform prediction based on mode
    try:
        start_time = time.time()
        
        if args.mode == "all":
            print("\n--- Predicting All Emotions ---")
            result = client.predict_all_emotions(audio, args.sample_rate)
            print(f"Processing time: {result['processing_time']:.3f}s")
            print("Predictions:")
            for emotion, score in result['predictions'].items():
                print(f"  {emotion}: {score}")
                
        elif args.mode == "single":
            if not args.emotion:
                print("Error: --emotion required for single prediction mode")
                return
            print(f"\n--- Predicting Single Emotion: {args.emotion} ---")
            result = client.predict_single_emotion(audio, args.emotion, args.sample_rate)
            print(f"Emotion: {result['emotion_key']}")
            print(f"Score: {result['score']}")
                
        elif args.mode == "top":
            print(f"\n--- Getting Top {args.top_k} Emotions ---")
            result = client.get_top_emotions(audio, args.top_k, args.sample_rate)
            print(f"Processing time: {result['processing_time']:.3f}s")
            print("Top emotions:")
            for i, (emotion, score) in enumerate(result['top_emotions'], 1):
                print(f"  {i}. {emotion}: {score}")
        
        total_time = time.time() - start_time
        print(f"\nTotal inference time: {total_time:.3f}s")
        
    except Exception as e:
        print(f"Error during prediction: {e}")

 if __name__ == "__main__":
    main()
	import requests
	import numpy as np
	import base64
	import json
	import argparse
	import librosa
	import time
	from typing import Optional, Dict, Any

	class EmotionClient:
	"""Client for the Emotion Recognition API"""

	def __init__(self, base_url: str = "https://32e949f50eff.ngrok-free.app"):
	self.base_url = base_url.rstrip('/')

	def _audio_to_base64(self, audio: np.ndarray) -> str:
	"""Convert numpy audio array to base64 encoded bytes"""
	audio_bytes = audio.astype(np.float32).tobytes()
	return base64.b64encode(audio_bytes).decode('utf-8')

	def load_audio(self, file_path: str, sample_rate: int = 16000) -> np.ndarray:
	"""Load audio file and resample to target sample rate"""
	audio, sr = librosa.load(file_path, sr=sample_rate)
	return audio

	def predict_all_emotions(self, audio: np.ndarray, sample_rate: int = 16000) -> Dict[str, Any]:
	"""Predict all emotions from audio"""
	audio_b64 = self._audio_to_base64(audio)

	payload = {
	"audio": audio_b64,
	"sample_rate": sample_rate,
	"dtype": "float32"
	}

	response = requests.post(f"{self.base_url}/predict/all", json=payload)
	response.raise_for_status()
	return response.json()

	def predict_single_emotion(self, audio: np.ndarray, emotion_key: str, sample_rate: int = 16000) -> Dict[str, Any]:
	"""Predict a single emotion from audio"""
	audio_b64 = self._audio_to_base64(audio)

	payload = {
	"audio": audio_b64,
	"emotion_key": emotion_key,
	"sample_rate": sample_rate,
	"dtype": "float32"
	}

	response = requests.post(f"{self.base_url}/predict/single", json=payload)
	response.raise_for_status()
	return response.json()

	def get_top_emotions(self, audio: np.ndarray, top_k: int = 5, sample_rate: int = 16000) -> Dict[str, Any]:
	"""Get top K emotions from audio"""
	audio_b64 = self._audio_to_base64(audio)

	payload = {
	"audio": audio_b64,
	"top_k": top_k,
	"sample_rate": sample_rate,
	"dtype": "float32"
	}

	response = requests.post(f"{self.base_url}/predict/top", json=payload)
	response.raise_for_status()
	return response.json()

	def get_available_emotions(self) -> Dict[str, Any]:
	"""Get list of available emotions"""
	response = requests.get(f"{self.base_url}/emotions/available")
	response.raise_for_status()
	return response.json()

	def health_check(self) -> Dict[str, Any]:
	"""Check API health"""
	response = requests.get(f"{self.base_url}/health")
	return response.json()

	def main():
	parser = argparse.ArgumentParser(description="Emotion Recognition Inference Client")
	parser.add_argument("--audio", "-a", default='segment_2.wav', help="Path to audio file")
	parser.add_argument("--url", "-u", default="http://localhost:8001", help="API base URL")
	parser.add_argument("--mode", "-m", choices=["all", "single", "top"], default="all",
	help="Prediction mode")
	parser.add_argument("--emotion", "-e", default='Valence', help="Emotion key for single prediction")
	parser.add_argument("--top-k", "-k", type=int, default=5, help="Number of top emotions to return")
	parser.add_argument("--sample-rate", "-sr", type=int, default=16000, help="Audio sample rate")

	args = parser.parse_args()

	# Initialize client
	client = EmotionClient(args.url)

	# Health check
	try:
	health = client.health_check()
	print(f"API Health: {health}")
	except Exception as e:
	print(f"Error connecting to API: {e}")
	return

	# Load audio
	try:
	print(f"Loading audio from: {args.audio}")
	audio = client.load_audio(args.audio, args.sample_rate)
	print(f"Audio shape: {audio.shape}, Sample rate: {args.sample_rate}")
	except Exception as e:
	print(f"Error loading audio: {e}")
	return

	# Get available emotions
	try:
	emotions_info = client.get_available_emotions()
	print(f"Available emotions: {emotions_info['emotions']}")
	except Exception as e:
	print(f"Error getting emotions: {e}")
	return

	# Perform prediction based on mode
	try:
	start_time = time.time()

	if args.mode == "all":
	print("\n--- Predicting All Emotions ---")
	result = client.predict_all_emotions(audio, args.sample_rate)
	print(f"Processing time: {result['processing_time']:.3f}s")
	print("Predictions:")
	for emotion, score in result['predictions'].items():
	print(f" {emotion}: {score}")

	elif args.mode == "single":
	if not args.emotion:
	print("Error: --emotion required for single prediction mode")
	return
	print(f"\n--- Predicting Single Emotion: {args.emotion} ---")
	result = client.predict_single_emotion(audio, args.emotion, args.sample_rate)
	print(f"Emotion: {result['emotion_key']}")
	print(f"Score: {result['score']}")

	elif args.mode == "top":
	print(f"\n--- Getting Top {args.top_k} Emotions ---")
	result = client.get_top_emotions(audio, args.top_k, args.sample_rate)
	print(f"Processing time: {result['processing_time']:.3f}s")
	print("Top emotions:")
	for i, (emotion, score) in enumerate(result['top_emotions'], 1):
	print(f" {i}. {emotion}: {score}")

	total_time = time.time() - start_time
	print(f"\nTotal inference time: {total_time:.3f}s")

	except Exception as e:
	print(f"Error during prediction: {e}")

	if __name__ == "__main__":
	main()
No results found