Last active
August 10, 2019 06:15
-
-
Save tomrandle/09ad2e2ec2aa0dea74c834abe107b477 to your computer and use it in GitHub Desktop.
Scraping team member info from old site and creating markdown file for each
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from yaml import load, dump | |
html_doc = open('raw.html') | |
soup = BeautifulSoup(html_doc, 'html.parser') | |
teamMembers = (soup.find_all(class_="t-member")) | |
for member in teamMembers: | |
name = (member.find(class_="team-list__name").string) | |
title = (member.find(class_="team-list__title").string) | |
bio = (member.find(class_="bio").string) | |
photo = (member.find("img")["src"]) | |
links = member.select(".team-list__links li a") | |
socialLinks = { | |
'twitter' : '', | |
'linkedin' : '', | |
'dribbble' : '', | |
'github' : '', | |
'medium' : '' | |
} | |
for x in socialLinks: | |
try: | |
socialLinks[x] = member.find(class_=x)["href"] | |
except: | |
pass | |
filename = name.lower().replace(" ","-") + ".markdown" | |
filename = filename | |
print filename | |
docHead = """--- | |
name: %s | |
title: %s | |
bio: %s | |
photo: %s | |
social-links: | |
twitter: %s | |
dribbble: %s | |
linkedin: %s | |
github: %s | |
medium: %s | |
--- | |
""" % (name, title, bio, photo,socialLinks['twitter'],socialLinks['dribbble'],socialLinks['linkedin'],socialLinks['github'],socialLinks['medium']) | |
print docHead | |
file = open(filename, "w") | |
file.write(docHead.encode('utf8')) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment