Skip to content

Instantly share code, notes, and snippets.

@sproctor
Last active March 31, 2018 15:52
Show Gist options
  • Save sproctor/4e1f9f4b1cec8572ca2b4e8148ac85b1 to your computer and use it in GitHub Desktop.
Save sproctor/4e1f9f4b1cec8572ca2b4e8148ac85b1 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
import codecs
import sys
import urllib2
from bs4 import BeautifulSoup
sys.stdout = codecs.getwriter('utf8')(sys.stdout)
page_url = 'https://iscrapapp.com/yards/us-pennsylvania-catasauqua-catasauqua-scrap-yard-premium/pricing/'
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
'Referer': 'https://cssspritegenerator.com',
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
'Accept-Encoding': 'none',
'Accept-Language': 'en-US,en;q=0.8',
'Connection': 'keep-alive'}
request = urllib2.Request(page_url, headers = hdr)
page = urllib2.urlopen(request)
soup = BeautifulSoup(page, 'html5lib')
print(soup.find("div", {"class": "pricing-top"}))
for pricing_table in soup.find_all("table", {"class": "pricing-table"}):
table_name = pricing_table.parent.find_previous_sibling("div", {"class": "yard-section-header"}).find("h3").get_text()
print("<h3>" + table_name + "</h3>")
print("<table><tr><th>Material Name</th><th>Material Price</th>")
for row in pricing_table.find_all('tr'):
material_name = row.find("td", {"class": "material-name"})
material_price = row.find("td", {"class": "material-price"})
if material_name and material_price:
print("<tr><td class=\"material-name\">")
print(material_name.get_text())
print("</td><td class=\"material-price\">")
print(material_price.get_text())
print("</td></tr>")
print("</table>")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment