-
-
Save jcsalterego/170851 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pprint import pprint | |
import re | |
import sys | |
import urllib2 | |
URL = "http://www.modifiedcartrader.com/information/newestCars.aspx" | |
TRANSLATIONS = {(1, "Label1"): "mileage", | |
(1, "Label2"): "hp", | |
(1, "Panel1"): "make", | |
(2, "Label1"): "hp", | |
(2, "Label2"): "hpx", | |
(2, "Panel1"): "transmission"} | |
def info_transform(item, depth=1): | |
"""Transforms | |
""" | |
id, field = item[0].replace("ctl", "").split("_")[-2:] | |
id = int(id) | |
if (depth, field) in TRANSLATIONS: | |
field = TRANSLATIONS[(depth, field)] | |
value = " ".join([word for word in item[1].split(" ") | |
if word]) | |
if not value: | |
return None | |
# check for digits | |
value_ = value.replace(",", "") | |
if value_.isdigit(): | |
value = int(value_) | |
return [id, field, value] | |
def main(argv): | |
try: | |
doc = file('cache.html').read() | |
except: | |
doc = urllib2.urlopen(URL).read() | |
file('cache.html', 'w').write(doc) | |
doc = doc.replace("\n", "").replace("\r", "") | |
ids = re.compile(r'id="(.+?)"').findall(doc) | |
ids = [tag for tag in ids if 'GridView1_ctl' in tag] | |
info_re = re.compile(r'id="(.+?)".*?>([^<]+)</') | |
info = [[el.strip() for el in tag] | |
for tag | |
in info_re.findall(doc) | |
if tag[0] in ids] | |
# transform keys | |
info = [info_transform(item) for item in info | |
if info_transform(item)] | |
pprint(info) | |
info_re = re.compile(r'id="(.+?)".*?><.*?>([^<]+)</') | |
info = [[el.strip() for el in tag] | |
for tag | |
in info_re.findall(doc) | |
if tag[0] in ids] | |
info = [info_transform(item, depth=2) for item in info | |
if info_transform(item)] | |
pprint(info) | |
return 0 | |
if __name__ == '__main__': | |
sys.exit(main(sys.argv)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment