Last active
June 21, 2018 17:05
-
-
Save AO8/64e32f1ec0b1328d0800293f614c92bb to your computer and use it in GitHub Desktop.
Using Python and BeautifulSoup, pull top stories from Medium on the topic of your choice.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Get unlimited access to the smartest writers and biggest ideas by becoming | |
| # a Medium member for just $5 / month. Visit https://medium.com/membership | |
| import sys | |
| import requests | |
| from bs4 import BeautifulSoup | |
| # Build elements of url to get | |
| url = "https://medium.com/search?q=" # replace 'python' with the search keyword of your choice | |
| keyword = input("Enter a search keyword: ").lower().strip() | |
| # Try to get page, display error if there is a problem | |
| try: | |
| html = requests.get(url + keyword).text | |
| except Expection as e: | |
| print(f"The following error occured: {e}" | |
| print("Unable to process this request. Please try again later.") | |
| sys.exit() | |
| soup = BeautifulSoup(html, "html.parser") | |
| contents = soup.find_all("div", {"class":"postArticle-content"}) # returns list, usually defaults to length of 10 | |
| # Prepare top stories report | |
| top_stories = [] | |
| # While testing topics I found that that in rare cases an h2 tag is used, | |
| # but h3 appears to be Medium's norm for titles hence the try / except for h3 tags | |
| for story in contents: | |
| try: | |
| title = story.find("h3").get_text() | |
| raw_url = story.find("a").attrs["href"] | |
| clean_url = raw_url[0:raw_url.find("?")] | |
| top_stories.append(title + "\n\n" + clean_url + "\n\n***\n\n") | |
| except: | |
| continue | |
| print("".join(top_stories)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment