Skip to content

Instantly share code, notes, and snippets.

@Xnuvers007
Created June 26, 2023 01:31
Show Gist options
  • Save Xnuvers007/ef5818089cf23b05825f721f1b8e5d18 to your computer and use it in GitHub Desktop.
Save Xnuvers007/ef5818089cf23b05825f721f1b8e5d18 to your computer and use it in GitHub Desktop.
This is code for get all link href from website and get result where that link to redirect it
import requests
from urllib.parse import urljoin
def get_redirected_url(url):
response = requests.head(url, allow_redirects=True)
return response.url
def get_all_links(url):
response = requests.get(url)
if response.status_code == 200:
base_url = response.url
html_content = response.text
links = html_content.split('href="')[1:]
for link in links:
link_url = link.split('"')[0]
absolute_url = urljoin(base_url, link_url)
redirected_url = get_redirected_url(absolute_url)
print(f"{absolute_url} -> {redirected_url}")
else:
print(f"Failed to retrieve content from {url} (Status code: {response.status_code})")
# Usage example
get_all_links('https://www.halodoc.com/artikel/search/Pusing')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment