Skip to content

Instantly share code, notes, and snippets.

@Yuffster
Created February 18, 2017 23:26
Show Gist options
  • Save Yuffster/279e448193942ba9cf28ad181fe235dd to your computer and use it in GitHub Desktop.
Save Yuffster/279e448193942ba9cf28ad181fe235dd to your computer and use it in GitHub Desktop.
Morse Sample Scraper
from bs4 import BeautifulSoup
import requests
import os
def fetch(url):
if (url[0:4] != "http"):
url = 'http://www.arrl.org'+url
fname = url.split("/")[-1]
print("Fetching [{}] from {}".format(fname, url))
if os.path.isfile(fname):
print("\tCached.")
return
res = requests.get(url)
if not res.ok:
print("\tERROR")
return
with open(fname, 'wb') as f:
for block in res.iter_content(1024):
f.write(block)
def getSamples(speed):
r = requests.get('http://www.arrl.org/{}-wpm-code-archive'.format(speed))
soup = BeautifulSoup(r.text, 'html.parser')
mp3s = []
transcripts = []
for a in soup.find_all('a'):
if (a.get('href') and a.get('href')[-3:]=='mp3'):
fetch(a.get('href'))
text = a.find_parent('td').find_next_sibling().find('a').get('href')
fetch(text)
for n in ['5', '7-5', '10', '13', '15', '20', '25', '30', '35', '40']:
getSamples(n)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment