Skip to content

Instantly share code, notes, and snippets.

@jheiselman
Created May 10, 2016 17:37
Show Gist options
  • Select an option

  • Save jheiselman/4fd5933c3d6895a21a23a22eaf553620 to your computer and use it in GitHub Desktop.

Select an option

Save jheiselman/4fd5933c3d6895a21a23a22eaf553620 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
from html.parser import HTMLParser
import requests
class MyHTMLParser(HTMLParser):
book_title = ""
start_title_check = False
grab_book_title = False
def handle_starttag(self, tag, attrs):
if tag == "div":
for attr in attrs:
if attr[0] == "class" and attr[1] == "dotd-title":
#print("Starting title block check")
self.start_title_check = True
elif self.start_title_check == True and tag == "h2":
#print("Starting book title grab")
self.grab_book_title = True
def handle_data(self, data):
if self.grab_book_title == True:
self.book_title = data.replace("\\n", "").replace("\\t", "")
self.grab_book_title = False
self.start_title_check = False
url = "https://www.packtpub.com/packt/offers/free-learning"
parser = MyHTMLParser()
r = requests.get(url)
parser.feed(str(r.content))
print(parser.book_title)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment