Skip to content

Instantly share code, notes, and snippets.

@emresaglam
Created June 10, 2021 20:54
Show Gist options
  • Save emresaglam/8da2a417b47fd169ed529f4fbc456dbb to your computer and use it in GitHub Desktop.
Save emresaglam/8da2a417b47fd169ed529f4fbc456dbb to your computer and use it in GitHub Desktop.
Crude scraper for the security conferences page
import requests
import json
from bs4 import BeautifulSoup as bs
# The URL for the conf page
url = "https://infosec-conferences.com/"
r = requests.get(url)
r.text
# header needed as their nginx blocks other User Agents...
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.90 Safari/537.36'}
r = requests.get(url, headers=headers)
conf = r.text
conferences = []
soup = bs(conf,features="html.parser")
conf_list = soup.find("div", {"class": "conf_list"})
for p in conf_list.find_all("p"):
conference = {}
link = p.find("a", {"rel": "ugc"})
conference["link"] = link["href"]
conference["text"] = link.text
date_blob = p.find("span", {"class": "e74c3c"})
conference["date"] = date_blob.text
conferences.append(conference)
loc_blob = p.find("a", {"rel": "tag"})
conference["location"] = loc_blob.text
# Printing, but it's up to you to do whatever...
print(json.dumps(conferences))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment