Skip to content

Instantly share code, notes, and snippets.

@alexcg1
Created February 8, 2025 15:56
Show Gist options
  • Select an option

  • Save alexcg1/ef9a50ffe9a34ce717afb26c36118ef3 to your computer and use it in GitHub Desktop.

Select an option

Save alexcg1/ef9a50ffe9a34ce717afb26c36118ef3 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import argparse
def scrape_urls(source_url, output_file):
response = requests.get(source_url)
response.raise_for_status()
soup = BeautifulSoup(response.text, "html.parser")
story_list = soup.find_all(class_="story_list")
urls = []
for story in story_list:
links = story.find_all("a", href=True)
for link in links:
urls.append(link["href"])
with open(output_file, "w") as f:
for url in urls:
f.write(url + "\n")
def main():
parser = argparse.ArgumentParser(description="Scrape URLs from a webpage")
parser.add_argument("source_url", help="URL of the webpage to scrape")
parser.add_argument("output_file", help="File to store the extracted URLs")
args = parser.parse_args()
scrape_urls(args.source_url, args.output_file)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment