Created
November 10, 2023 10:33
-
-
Save thekie/1469ebe2622882ae8f78b11c7012946e to your computer and use it in GitHub Desktop.
The code I used in this video: https://youtu.be/lNdpu6u9ZYM
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import copy | |
import json | |
import os | |
from openai import OpenAI | |
client = OpenAI( | |
api_key="<Your api key here>" | |
) | |
prompt = """ | |
Transcribe the following audio from videos of the youtube channel "Kie Codes" into English. | |
""" | |
metadata = [ | |
{ | |
"url": "https://youtu.be/KdelZvfcPfk?si=IZVJVsinerSj6tN8", | |
"title": "5 Productivity Tips for Programmers", | |
}, | |
{ | |
"url": "https://youtu.be/Hlp-9cdImSM?si=HO2B24MerwuS7B0b", | |
"title": "How to commit better with Git", | |
}, | |
{ | |
"url": "https://youtu.be/EvpZkdkp-v0?si=4gFG9XB7Th9I42kY", | |
"title": "How to use GIT HOOKS for better COMMITS (PYTHON CODE INCLUDED)", | |
}, | |
{ | |
"url": "https://youtu.be/T0qWZ7St_GE?si=yGyhL-9XpqVtrHr3", | |
"title": "What Programming Language should I learn first 2022?" | |
}, | |
{ | |
"url": "https://youtu.be/211tiIqZ-58?si=mvKAcpmqVd5juaFX", | |
"title": "Learn faster to code in a new programming language" | |
}, | |
{ | |
"url": "https://youtu.be/JF4z2u2ftv0?si=jsflpLbFDCry91nG", | |
"title": "Learn programming efficiently (5 Tips)" | |
}, | |
{ | |
"url": "https://youtu.be/zombLkjem00?si=Fe1-2vX0LzQGVNi_", | |
"title": "5 Things I wish I knew before becoming a software engineer" | |
}, | |
{ | |
"url": "https://youtu.be/uQj5UNhCPuo?si=VfJYXM7JPgP2zQG6", | |
"title": "Genetic Algorithms Explained By Example" | |
}, | |
{ | |
"url": "https://youtu.be/nhT56blfRpE?si=gd9jZQsxTjtnWWVt", | |
"title": "Genetic Algorithm from Scratch in Python (tutorial with code)" | |
}, | |
{ | |
"url": "https://youtu.be/aOsET8KapQQ?si=yC9o95_5iapNJxli", | |
"title": "Genetic Algorithm in Python generates Music (code included)" | |
}, | |
{ | |
"url": "https://youtu.be/5y7pQaP-5Qw?si=A3b7488gx30mUpQU", | |
"title": "Why you should use Type Hints in Python - Are type hints worth it?" | |
}, | |
{ | |
"url": "https://youtu.be/a1NLvZ5rgvQ?si=zJuviwEn9d0MsJVr", | |
"title": "How to create a pre-commit git hook for your python type checker?", | |
}, | |
{ | |
"url": "https://youtu.be/yScuF1UgGU0?si=kH0NoV-a69KTeKYx", | |
"title": "How to use python type hinting?", | |
}, | |
{ | |
"url": "https://youtu.be/51EoNgwoaTo?si=JgIqDZC2SbHKeHjr", | |
"title": "Neural Network from Scratch in Python", | |
}, | |
{ | |
"url": "https://youtu.be/P8Xrj70qtyo?si=D8dsXi-sPU9FNJ72", | |
"title": "Neural Networks Matrix Math and NumPy" | |
}, | |
{ | |
"url": "https://youtu.be/pdyyQ-w_x0I?si=BRk6Si0JJZCC1ste", | |
"title": "When did you learn to program? (Ask Kie #1)", | |
}, | |
{ | |
"url": "https://youtu.be/GAFh2Z5VtgM?si=Ff4fFfDDpnCoKVvq", | |
"title": "Coding an NFT crypto collectible in 3 days (DAY 1)", | |
}, | |
{ | |
"url": "https://youtu.be/75D0JjX7EZg?si=zRE4sPjnJ92OmuOJ", | |
"title": "How to implement an ERC721 Token and connect it to OpenSea (DAY 2)", | |
}, | |
{ | |
"url": "https://youtu.be/EnIrWNFwN-U?si=VNoOUYSQgWEbmo3K", | |
"title": "Creating a dApp and migrating to the Ethereum Mainnet (DAY 3)" | |
}, | |
{ | |
"url": "https://youtu.be/LW1i-axSoYE?si=N8g0HNtBw-U3PDb5", | |
"title": "Random NFT pictures in under 100 lines of JavaScript" | |
} | |
] | |
# Transcribe the audio | |
print("Transcribing audio") | |
result = "" | |
result_json = copy.deepcopy(metadata) | |
for filename in os.listdir("audio"): | |
name = os.path.splitext(filename)[0] | |
ext = os.path.splitext(filename)[1] | |
if ext not in [".mp3", ".mp4"]: | |
continue | |
print("Transcribing: " + filename) | |
if os.path.isfile("transcript/" + name + ".txt"): | |
print("File already transcribed: " + name + ".txt -> Skipping") | |
else: | |
audio_file = open("audio/" + filename, "rb") | |
transcript = client.audio.transcriptions.create( | |
model="whisper-1", | |
file=audio_file, | |
response_format="text", | |
prompt=prompt | |
) | |
with open(f"transcript/{name}.txt", "w") as f: | |
f.write(transcript) | |
f.flush() | |
transcript = open("transcript/" + name + ".txt", "r").read() | |
result += f""" | |
Video Title: {metadata[int(name)-1]["title"]} | |
Video URL: {metadata[int(name)-1]["url"]} | |
Transcript: {transcript} | |
--- | |
""" | |
result_json[int(name)-1]["transcript"] = transcript | |
with open(f"final/video-{name}.json", "w") as f: | |
json.dump( | |
{ | |
"title": metadata[int(name)-1]["title"], | |
"url": metadata[int(name)-1]["url"], | |
"transcript": transcript | |
}, | |
f, | |
indent=2 | |
) | |
f.flush() | |
# Put all transcripts into one text file with urls and titles | |
with open("kiecodes-transcripts.txt", "w") as f: | |
f.write(result) | |
f.flush() | |
# Put all transcripts into one json file with urls and titles | |
with open("kiecodes-videos.json", "w") as f: | |
json.dump(result_json, f) | |
f.flush() | |
# Put all transcripts into one json file with urls and titles (beautified) | |
with open("kiecodes-videos-beautified.json", "w") as f: | |
json.dump(result_json, f, indent=2) | |
f.flush() | |
# Summarize the transcript | |
print("Summarizing transcripts") | |
for filename in os.listdir("transcript"): | |
name = os.path.splitext(filename)[0] | |
print(f"Summarizing: {filename}") | |
if os.path.isfile(f"summary/{filename}"): | |
print(f"File already summarized: {filename} -> Skipping") | |
else: | |
with open(f"transcript/{filename}", "r") as f: | |
transcript = f.read() | |
response = client.chat.completions.create( | |
model="gpt-3.5-turbo-16k", | |
messages=[ | |
{ | |
"role": "system", | |
"content": """ | |
You are a youtube script summarizer, which outputs a two sentence summary of a youtube video transcript. | |
The host in the video is Kie and is male. The channel name is Kie Codes. | |
The user will only provide you with a transcript of the full video. | |
You only output the two sentences that summary. | |
""" | |
}, | |
{ | |
"role": "user", | |
"content": transcript | |
} | |
] | |
) | |
with open(f"summary/{filename}", "w") as f: | |
f.write(response.choices[0].message.content) | |
f.flush() | |
# Create a summary file with all titles, urls and summaries of all videos | |
result = "" | |
result_json = copy.deepcopy(metadata) | |
for summary_filename in os.listdir("summary"): | |
name = os.path.splitext(summary_filename)[0] | |
with open(f"summary/{summary_filename}", "r") as f: | |
summary = f.read() | |
result += f""" | |
Video Title: {metadata[int(name)-1]["title"]} | |
Video URL: {metadata[int(name)-1]["url"]} | |
Summary: {summary} | |
--- | |
""" | |
result_json[int(name)-1]["summary"] = summary | |
result_json[int(name)-1]["transcript_file"] = f"video-{name}.json" | |
with open("kiecodes-summary.txt", "w") as f: | |
f.write(result) | |
f.flush() | |
with open("kiecodes-summary-beautified.json", "w") as f: | |
json.dump(result_json, f, indent=2) | |
f.flush() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment