Created
August 20, 2024 22:26
-
-
Save Hamid-K/04cce1aa9d28d609c16f509ed8b65546 to your computer and use it in GitHub Desktop.
Whisper: Transcribe Audio to Text
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Sample script to use OpenAI Whisper API | |
# This script demonstrates how to convert input audio files to text, fur further processing. | |
# The code can be still improved and optimized in many ways. Feel free to modify and use it | |
# for your own needs. | |
# | |
import openai | |
from openai import OpenAI | |
client = OpenAI(api_key="sk-proj-....") | |
import argparse | |
from rich import print | |
from rich.console import Console | |
from rich.traceback import install | |
# Set your API key | |
console = Console() | |
install() | |
def transcribe_audio(file_path, language): | |
"""Transcribe the audio using the Whisper API.""" | |
console.print(f"[bold blue]Starting transcription for file:[/bold blue] {file_path}") | |
try: | |
with open(file_path, "rb") as audio_file: | |
console.print("[bold blue]Sending audio file to Whisper API...[/bold blue]") | |
response = client.audio.transcribe(model="whisper-1", | |
file=audio_file, | |
response_format="json", | |
language=language) | |
console.print("[bold blue]Transcription received.[/bold blue]") | |
if isinstance(response, dict) and "text" in response: | |
return response.text | |
else: | |
console.print("[bold red]Unexpected response format received from Whisper API.[/bold red]") | |
return None | |
except openai.OpenAIError as e: | |
if "Invalid language" in str(e) and "ISO-639-1 format" in str(e): | |
console.print(f"[bold red]An API error occurred during transcription: Invalid language '{language}'. Language parameter must be specified in ISO-639-1 format.[/bold red]") | |
else: | |
console.print(f"[bold red]An API error occurred during transcription:[/bold red] {e}") | |
return None | |
except Exception as e: | |
console.print(f"[bold red]An unexpected error occurred during transcription:[/bold red] {e}") | |
return None | |
def summarize_text(text): | |
"""Generate a summary of the transcribed text using GPT-4.""" | |
console.print("[bold blue]Starting text summarization...[/bold blue]") | |
try: | |
summary = client.chat.completions.create(model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"Summarize the following text:\n\n{text}"} | |
]) | |
console.print("[bold blue]Summary received.[/bold blue]") | |
return summary.choices[0].message.content | |
except openai.OpenAIError as e: | |
console.print(f"[bold red]An API error occurred during summarization:[/bold red] {e}") | |
return None | |
except Exception as e: | |
console.print(f"[bold red]An unexpected error occurred during summarization:[/bold red] { e }") | |
return None | |
def generate_bullet_points(text): | |
"""Generate bullet points of key topics from the text.""" | |
console.print("[bold blue]Starting bullet points generation...[/bold blue]") | |
try: | |
bullet_points = client.chat.completions.create(model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"Create bullet points from the following text:\n\n{text}"} | |
]) | |
console.print("[bold blue]Bullet points received.[/bold blue]") | |
return bullet_points.choices[0].message.content | |
except openai.OpenAIError as e: | |
console.print(f"[bold red]An API error occurred while generating bullet points:[/bold red] {e}") | |
return None | |
except Exception as e: | |
console.print(f"[bold red]An unexpected error occurred while generating bullet points:[/bold red] {e}") | |
return None | |
def generate_mind_map_description(text): | |
"""Generate a mind map description based on the conversation.""" | |
console.print("[bold blue]Starting mind map description generation...[/bold blue]") | |
try: | |
mind_map = client.chat.completions.create(model="gpt-4", | |
messages=[ | |
{"role": "system", "content": "You are a helpful assistant."}, | |
{"role": "user", "content": f"Describe how a mind map of the following text would look:\n\n{text}"} | |
]) | |
console.print("[bold blue]Mind map description received.[/bold blue]") | |
return mind_map.choices[0].message.content | |
except openai.OpenAIError as e: | |
console.print(f"[bold red]An API error occurred while generating the mind map description:[/bold red] {e}") | |
return None | |
except Exception as e: | |
console.print(f"[bold red]An unexpected error occurred while generating the mind map description:[/bold red] {e}") | |
return None | |
def process_audio(file_path, options): | |
"""Process the audio file according to the specified options.""" | |
console.print(f"[bold blue]Processing audio file:[/bold blue] {file_path}") | |
transcript = transcribe_audio(file_path, options.language) | |
if not transcript: | |
console.print("[bold yellow]No transcript available.[/bold yellow]") | |
return | |
if options.full_text: | |
console.print("[bold green]Full Text:[/bold green]\n", transcript) | |
if options.summary: | |
console.print("[bold blue]Generating summary...[/bold blue]") | |
summary = summarize_text(transcript) | |
if summary: | |
console.print("[bold green]Summary:[/bold green]\n", summary) | |
if options.bullet_points: | |
console.print("[bold blue]Generating bullet points...[/bold blue]") | |
bullet_points = generate_bullet_points(transcript) | |
if bullet_points: | |
console.print("[bold green]Bullet Points:[/bold green]\n", bullet_points) | |
if options.mind_map: | |
console.print("[bold blue]Generating mind map description...[/bold blue]") | |
mind_map = generate_mind_map_description(transcript) | |
if mind_map: | |
console.print("[bold green]Mind Map Description:[/bold green]\n", mind_map) | |
def main(): | |
parser = argparse.ArgumentParser(description="Transcribe audio and generate various outputs using OpenAI Whisper API.") | |
parser.add_argument("audio_file", help="Path to the input audio file (mp3 format).") | |
parser.add_argument("--summary", action="store_true", help="Generate a summary of the conversation.") | |
parser.add_argument("--bullet_points", action="store_true", help="Generate bullet points from the most talked about topics.") | |
parser.add_argument("--mind_map", action="store_true", help="Generate a mind-map graph of subjects discussed during the conversation.") | |
parser.add_argument("--full_text", action="store_true", help="Get the full text of the conversation.") | |
parser.add_argument("--language", default="en", help="Specify the language of the audio for transcription (default: English). Language must be in ISO-639-1 format.") | |
args = parser.parse_args() | |
process_audio(args.audio_file, args) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment