mfitzp · April 17, 2020 18:01
diff --git a/34632838.py b/34632838.py
 from bs4 import BeautifulSoup
 # Python 3.x
 from urllib.request import urlopen, urlretrieve

 # Removed the trailing / from the URL
 url = 'https://www.rbi.org.in/Scripts/bs_viewcontent.aspx?Id=2009'
 u = urlopen(url)
 try:
    html = u.read().decode('utf-8')
 finally:
    u.close()

 soup = BeautifulSoup(html, "html.parser")

 # Select all A elements that have an href attribute, starting with http://
 for link in soup.select('a[href^="http://"]'):
    href = link.get('href')
    if not any(href.endswith(x) for x in ['.csv','.xls','.xlsx']):
        continue
        
    filename = href.rsplit('/', 1)[-1]
    
    # You don't need to join + quote as URLs in the HTML are absolute.
    # However, we need a https:// URL (in spite of what the link says: check request in your web browser's developer tools)
    href = href.replace('http://','https://')
        
    print("Downloading %s to %s..." % (href, filename) )
    urlretrieve(href, filename)
    print("Done.")
	from bs4 import BeautifulSoup
	# Python 3.x
	from urllib.request import urlopen, urlretrieve

	# Removed the trailing / from the URL
	url = 'https://www.rbi.org.in/Scripts/bs_viewcontent.aspx?Id=2009'
	u = urlopen(url)
	try:
	html = u.read().decode('utf-8')
	finally:
	u.close()

	soup = BeautifulSoup(html, "html.parser")

	# Select all A elements that have an href attribute, starting with http://
	for link in soup.select('a[href^="http://"]'):
	href = link.get('href')
	if not any(href.endswith(x) for x in ['.csv','.xls','.xlsx']):
	continue

	filename = href.rsplit('/', 1)[-1]

	# You don't need to join + quote as URLs in the HTML are absolute.
	# However, we need a https:// URL (in spite of what the link says: check request in your web browser's developer tools)
	href = href.replace('http://','https://')

	print("Downloading %s to %s..." % (href, filename) )
	urlretrieve(href, filename)
	print("Done.")