Last active
February 10, 2020 19:42
-
-
Save EkremDincel/a2fdaf919a8050f3dac51a2504872afd to your computer and use it in GitHub Desktop.
urllib code example
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from urllib.request import urlopen, Request | |
from bs4 import BeautifulSoup | |
rootlink = 'https://www.transfermarkt.pl' # root link of website | |
link = 'https://www.transfermarkt.pl/schnellsuche/ergebnis/schnellsuche?query=' # our link, as you know | |
def create_request(url): # let me explain it below | |
req = Request( # we are creating a Request instance | |
url, # giving our 'url' | |
data=None, # this is not important for now :D | |
headers={ # If we would use urlopen('ourlink'), the website understands that we are a bot, so we're fooling him here. This useragent means that we are using a real Browser :D | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.1916.47 Safari/537.36' | |
} | |
) | |
return req # just return our req variable | |
def get_request(req): # a little wrapper for opening and reading any url | |
return urlopen(req).read() | |
data = input('Enter name: ') | |
data = data.replace(" ", "+") | |
print(data) | |
search = link + data + '&x=0&y=0' | |
print(search) | |
soup = BeautifulSoup( # we are creating a soup for searching web elements | |
get_request(create_request(search)), # firstly we are creating a request for our url, and then we are reading it. Lastly we are passing readed HTML of website as parameter | |
features="lxml" # not important | |
) | |
anchor = soup.find("a",{"class":"spielprofil_tooltip"}) # this code founds first anchor element which has a class attribute named "spielerprofil tooltip", this is what we are looking for | |
link = anchor.get("href") # we are getting attribute "href" of anchor, which is the relative link address of person | |
original_link = rootlink + link # we are adding our link to rootlink, because the 'link' is relative to root | |
print(original_link) # print it (you can copy it to browser so you will see what it is) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment