Skip to content

Instantly share code, notes, and snippets.

@NeraSnow
Created August 20, 2022 01:08
Show Gist options
  • Save NeraSnow/5662d55de1b837e2838b5df41db04cb7 to your computer and use it in GitHub Desktop.
Save NeraSnow/5662d55de1b837e2838b5df41db04cb7 to your computer and use it in GitHub Desktop.
Podcast Fetching Scripts
import feedparser
import urllib
import requests
import sys
import os
import re
import subprocess
ParentPath = '/u/USER_ID/public_html/Podcast/九八新闻台/'
RSSFeed = 'https://feeds.soundcloud.com/users/soundcloud:users:496937352/sounds.rss'
feed = feedparser.parse(RSSFeed)
feed = feedparser.parse(RSSFeed)
# print(feed)
def get_valid_filename(s):
"""
Return the given string converted to a string that can be used for a clean
filename. Remove leading and trailing spaces; convert other spaces to
underscores; and remove anything that is not an alphanumeric, dash,
underscore, or dot.
>>> get_valid_filename("john's portrait in 2004.jpg")
'johns_portrait_in_2004.jpg'
"""
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
print('Number of RSS posts: {}'.format(len(feed.entries)))
for episode in feed.entries:
episode_title = episode['title'].replace("/", " ")
# valid_filename = get_valid_filename(episode_title)
date = "{}_{}_{}_".format(episode['published_parsed'].tm_year,
episode['published_parsed'].tm_mon,
episode['published_parsed'].tm_mday)
full_path = ParentPath + date + episode_title + ".mp3"
if os.path.isfile(full_path):
print("Skipping episode: {}".format(episode_title))
else:
print("Downloading episode: {}".format(episode_title))
subprocess.run(["wget", episode['links'][1]['href'], "--output-document="+full_path, "-q", "-P", ParentPath])
# print(episode['links'][1]['href'])
# requested_url = requests.get(episode['links'][1]['href'], timeout=5)
# open((full_path), 'wb').write(requested_url.content)
import feedparser
import urllib
import requests
import sys
import os
import re
ParentPath = '/u/USER_ID/public_html/Podcast/股癌/'
RSSFeed = 'https://feeds.soundon.fm/podcasts/954689a5-3096-43a4-a80b-7810b219cef3.xml'
feed = feedparser.parse(RSSFeed)
# print(feed)
def get_valid_filename(s):
"""
Return the given string converted to a string that can be used for a clean
filename. Remove leading and trailing spaces; convert other spaces to
underscores; and remove anything that is not an alphanumeric, dash,
underscore, or dot.
>>> get_valid_filename("john's portrait in 2004.jpg")
'johns_portrait_in_2004.jpg'
"""
s = str(s).strip().replace(' ', '_')
return re.sub(r'(?u)[^-\w.]', '', s)
print('Number of RSS posts :{}'.format(len(feed.entries)))
for episode in feed.entries:
episode_title = episode['title'].replace("/", " ")
# valid_filename = get_valid_filename(episode_title)
full_path = ParentPath + episode_title + ".mp3"
if os.path.isfile(full_path):
print("Skipping episode: {}".format(episode_title))
else:
print("Downloading episode: {}".format(episode_title))
requested_url = requests.get(episode['links'][1]['href'])
open((full_path), 'wb').write(requested_url.content)
# print(feed.entries[0]['title_detail'])
# title = feed.entries[0]['title']
# print(title)
# print(type(title))
# print(feed.entries[0]['links'][1]['href'])
# requested_url = requests.get(feed.entries[0]['links'][1]['href'])
# open((feed.entries[0]['title']+".mp3").encode(sys.getfilesystemencoding()), 'wb').write(requested_url.content)
# real_mp3 = requested_url.
# print(r)
# urllib.request.Request(feed.entries[0]['links'][1]['href'])
# urllib.request.urlretrieve(feed.entries[0]['links'][1]['href'], "{}.mp3".format(feed.entries[0]['title']))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment