Skip to content

Instantly share code, notes, and snippets.

@Vegasq
Last active May 19, 2017 20:13
Show Gist options
  • Save Vegasq/de1a83883cde10975903148baf7f9cdb to your computer and use it in GitHub Desktop.
Save Vegasq/de1a83883cde10975903148baf7f9cdb to your computer and use it in GitHub Desktop.
import requests
from lxml import html
def get_downtowns():
result = {}
page = requests.get("https://en.wikipedia.org/wiki/Downtown")
phtml = html.fromstring(page.content)
for i in phtml.iterlinks():
if i[2].startswith("/wiki/Downtown_"):
key = i[2].replace("/wiki/Downtown_", "")
# Some sub-districts
if "Historic_District" in key:
continue
# Some subcategories
if "(" in key:
continue
# What?
if key == "music":
continue
result[key] = "https://en.wikipedia.org%s" % i[2]
return result
def mirror_mirror_on_the_wall_who_is_me_and_who_is_wrong():
downtowns = get_downtowns()
total = 0
bdist = 0
nobd = 0
for city in downtowns.keys():
page = requests.get(downtowns[city])
phtml = html.fromstring(page.content)
value = phtml.get_element_by_id("mw-content-text").text_content()[0:1000]
if "business district" in value.lower() or "financial district" in value.lower():
print("%s: Success" % city)
bdist += 1
else:
print("%s: Fail" % city)
nobd += 1
total += 1
print("We checked %s cities. %s from them defined their Downtowns as " \
"Bussines District, what is %s percent from all checked cities." %
(total, bdist, bdist/total * 100)
)
mirror_mirror_on_the_wall_who_is_me_and_who_is_wrong()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment