Created
April 26, 2023 18:09
-
-
Save AreebaYousuf/c6c7a6063cb10897518b1c1466247581 to your computer and use it in GitHub Desktop.
n this assignment you will write a Python program that expands on http://www.py4e.com/code3/urllinks.py. The program will use urllib to read the HTML from the data files below, extract the href= vaues from the anchor tags, scan for a tag that is in a particular position relative to the first name in the list, follow that link and repeat the proc…
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
import urllib.parse | |
import urllib.error | |
from bs4 import BeautifulSoup | |
import ssl | |
import collections | |
collections.Callable = collections.abc.Callable | |
# Ignore SSL certificate errors | |
ctx = ssl.create_default_context() | |
ctx.check_hostname = False | |
ctx.verify_mode = ssl.CERT_NONE | |
url = input('Enter URL: ') | |
count = int(input('Enter count: ')) | |
position = int(input('Enter position: ')) | |
for i in range(count): | |
html = urllib.request.urlopen(url, context=ctx).read() | |
soup = BeautifulSoup(html, 'html.parser') | |
# Retrieve all of the anchor tags | |
tags = soup('a') | |
print('Retrieving:', url) | |
url = tags[position - 1].get('href', None) | |
print('Last Url:', url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment