dbreunig · December 14, 2022 19:43
diff --git a/podscriber.py b/podscriber.py
 import feedparser
 import whisper
 import sqlite3
 import requests

 # Connect to the database and create the tables
 con = sqlite3.connect("podscribe.db")
 cur = con.cursor()
 cur.execute("""
    CREATE TABLE episodes(
        id TEXT PRIMARY KEY, 
        title TEXT,
        pub_date TEXT,
        link TEXT,
        summary TEXT,
        audio_link TEXT,
        processed INTEGER DEFAULT 0
    )
    """)
 cur.execute("""
    CREATE TABLE segments(
        episode_id INTEGER, 
        seek REAL,
        start REAL,
        end REAL,
        text TEXT,
        FOREIGN KEY (episode_id) 
            REFERENCES episodes (id)
                ON DELETE CASCADE
                ON UPDATE NO ACTION
    )
    """)

 # Load the model
 model = whisper.load_model("tiny")

 # Download an RSS feed
 feed_url = "https://feed.xml" # DUMMY FEED, REPLACE WITH YOUR OWN
 feed = feedparser.parse(feed_url)

 # Load the episodes into the database
 episodes = []
 for e in feed.entries:
    # Find the audio link
    filename = ""
    for l in e['links']:
        if l['rel'] == 'enclosure':
            audio_link = l['href']
    # Load the metadata we need
    episodes.append((
        e['id'],
        e['title'],
        e['published'],
        e['link'],
        e['summary'],
        audio_link,
        0
    ))
 cur.executemany("INSERT INTO episodes VALUES(?, ?, ?, ?, ?, ?, ?)", episodes)
 con.commit()

 # Kick off the transcription of each
 transcription = ""
 for e in episodes:
    print(f"Starting {e[1]}")
    # Download
    filename = f"{e[5].split('/')[-1]}.mp3"
    response = requests.get(e[5])
    open(filename, "wb").write(response.content)
    # Transcribe
    transcription = model.transcribe(filename)
    # Load
    segments = []
    for s in transcription['segments']:
        segments.append((e[0], s['seek'], s['start'], s['end'], s['text']))
    cur.executemany("INSERT INTO segments VALUES(?, ?, ?, ?, ?)", segments)
    con.commit()
    # Puts result
    print(f"Loaded {e[1]}")
	import feedparser
	import whisper
	import sqlite3
	import requests

	# Connect to the database and create the tables
	con = sqlite3.connect("podscribe.db")
	cur = con.cursor()
	cur.execute("""
	CREATE TABLE episodes(
	id TEXT PRIMARY KEY,
	title TEXT,
	pub_date TEXT,
	link TEXT,
	summary TEXT,
	audio_link TEXT,
	processed INTEGER DEFAULT 0
	)
	""")
	cur.execute("""
	CREATE TABLE segments(
	episode_id INTEGER,
	seek REAL,
	start REAL,
	end REAL,
	text TEXT,
	FOREIGN KEY (episode_id)
	REFERENCES episodes (id)
	ON DELETE CASCADE
	ON UPDATE NO ACTION
	)
	""")

	# Load the model
	model = whisper.load_model("tiny")

	# Download an RSS feed
	feed_url = "https://feed.xml" # DUMMY FEED, REPLACE WITH YOUR OWN
	feed = feedparser.parse(feed_url)

	# Load the episodes into the database
	episodes = []
	for e in feed.entries:
	# Find the audio link
	filename = ""
	for l in e['links']:
	if l['rel'] == 'enclosure':
	audio_link = l['href']
	# Load the metadata we need
	episodes.append((
	e['id'],
	e['title'],
	e['published'],
	e['link'],
	e['summary'],
	audio_link,
	0
	))
	cur.executemany("INSERT INTO episodes VALUES(?, ?, ?, ?, ?, ?, ?)", episodes)
	con.commit()

	# Kick off the transcription of each
	transcription = ""
	for e in episodes:
	print(f"Starting {e[1]}")
	# Download
	filename = f"{e[5].split('/')[-1]}.mp3"
	response = requests.get(e[5])
	open(filename, "wb").write(response.content)
	# Transcribe
	transcription = model.transcribe(filename)
	# Load
	segments = []
	for s in transcription['segments']:
	segments.append((e[0], s['seek'], s['start'], s['end'], s['text']))
	cur.executemany("INSERT INTO segments VALUES(?, ?, ?, ?, ?)", segments)
	con.commit()
	# Puts result
	print(f"Loaded {e[1]}")