Created
August 20, 2022 01:08
-
-
Save NeraSnow/5662d55de1b837e2838b5df41db04cb7 to your computer and use it in GitHub Desktop.
Podcast Fetching Scripts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import urllib | |
import requests | |
import sys | |
import os | |
import re | |
import subprocess | |
ParentPath = '/u/USER_ID/public_html/Podcast/九八新闻台/' | |
RSSFeed = 'https://feeds.soundcloud.com/users/soundcloud:users:496937352/sounds.rss' | |
feed = feedparser.parse(RSSFeed) | |
feed = feedparser.parse(RSSFeed) | |
# print(feed) | |
def get_valid_filename(s): | |
""" | |
Return the given string converted to a string that can be used for a clean | |
filename. Remove leading and trailing spaces; convert other spaces to | |
underscores; and remove anything that is not an alphanumeric, dash, | |
underscore, or dot. | |
>>> get_valid_filename("john's portrait in 2004.jpg") | |
'johns_portrait_in_2004.jpg' | |
""" | |
s = str(s).strip().replace(' ', '_') | |
return re.sub(r'(?u)[^-\w.]', '', s) | |
print('Number of RSS posts: {}'.format(len(feed.entries))) | |
for episode in feed.entries: | |
episode_title = episode['title'].replace("/", " ") | |
# valid_filename = get_valid_filename(episode_title) | |
date = "{}_{}_{}_".format(episode['published_parsed'].tm_year, | |
episode['published_parsed'].tm_mon, | |
episode['published_parsed'].tm_mday) | |
full_path = ParentPath + date + episode_title + ".mp3" | |
if os.path.isfile(full_path): | |
print("Skipping episode: {}".format(episode_title)) | |
else: | |
print("Downloading episode: {}".format(episode_title)) | |
subprocess.run(["wget", episode['links'][1]['href'], "--output-document="+full_path, "-q", "-P", ParentPath]) | |
# print(episode['links'][1]['href']) | |
# requested_url = requests.get(episode['links'][1]['href'], timeout=5) | |
# open((full_path), 'wb').write(requested_url.content) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import feedparser | |
import urllib | |
import requests | |
import sys | |
import os | |
import re | |
ParentPath = '/u/USER_ID/public_html/Podcast/股癌/' | |
RSSFeed = 'https://feeds.soundon.fm/podcasts/954689a5-3096-43a4-a80b-7810b219cef3.xml' | |
feed = feedparser.parse(RSSFeed) | |
# print(feed) | |
def get_valid_filename(s): | |
""" | |
Return the given string converted to a string that can be used for a clean | |
filename. Remove leading and trailing spaces; convert other spaces to | |
underscores; and remove anything that is not an alphanumeric, dash, | |
underscore, or dot. | |
>>> get_valid_filename("john's portrait in 2004.jpg") | |
'johns_portrait_in_2004.jpg' | |
""" | |
s = str(s).strip().replace(' ', '_') | |
return re.sub(r'(?u)[^-\w.]', '', s) | |
print('Number of RSS posts :{}'.format(len(feed.entries))) | |
for episode in feed.entries: | |
episode_title = episode['title'].replace("/", " ") | |
# valid_filename = get_valid_filename(episode_title) | |
full_path = ParentPath + episode_title + ".mp3" | |
if os.path.isfile(full_path): | |
print("Skipping episode: {}".format(episode_title)) | |
else: | |
print("Downloading episode: {}".format(episode_title)) | |
requested_url = requests.get(episode['links'][1]['href']) | |
open((full_path), 'wb').write(requested_url.content) | |
# print(feed.entries[0]['title_detail']) | |
# title = feed.entries[0]['title'] | |
# print(title) | |
# print(type(title)) | |
# print(feed.entries[0]['links'][1]['href']) | |
# requested_url = requests.get(feed.entries[0]['links'][1]['href']) | |
# open((feed.entries[0]['title']+".mp3").encode(sys.getfilesystemencoding()), 'wb').write(requested_url.content) | |
# real_mp3 = requested_url. | |
# print(r) | |
# urllib.request.Request(feed.entries[0]['links'][1]['href']) | |
# urllib.request.urlretrieve(feed.entries[0]['links'][1]['href'], "{}.mp3".format(feed.entries[0]['title'])) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment