Skip to content

Instantly share code, notes, and snippets.

@ishan-marikar
Created May 10, 2015 06:09
Show Gist options
  • Save ishan-marikar/3143d8ba2c524814a063 to your computer and use it in GitHub Desktop.
Save ishan-marikar/3143d8ba2c524814a063 to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import collections
import urlparse
def get_last_record_number(parsed_html):
try:
stuff = parsed_html.find('a', {'title':'End'})
link = stuff.get('href')
parsed = urlparse.urlparse(link)
record_last_number = urlparse.parse_qs(parsed.query)['start']
return record_last_number[0]
except Exception as e:
print '[!] Handling Exception:', e
pass
def open_site(query, start=0):
url = "http://rainbowpages.lk/search-directory/"
payload = {'search':query, 'start':start}
response = requests.get(url, params=payload)
parsed_html = BeautifulSoup(response.text)
return parsed_html
def extract_records(parsed_html):
details = []
Person = collections.namedtuple('Person', 'name address telephone')
misc_records = parsed_html.find_all("div", {"class":"jd-item"})
for stuff in misc_records:
# Extract Name
un_names = stuff.find('div', {'class':'jd-itemTtile'})
record_name = un_names.text
# Extract Address
un_address = stuff.find('div', {'class':'jd-itemAddress'})
record_address = un_address.text.replace('Address','').strip()
# Extract Telephone Number
un_telephone = stuff.find('div', {'class':'jd-block-row'})
record_telephone = un_telephone.find('span', {'class':'jd-fields-li-value'}).text
# Append to list
details.append(Person(name=record_name, address=record_address, telephone=record_telephone))
return details
def lookup_name(name):
print "[*] Looking up name '%s' from SLT RainbowPages .." % (name)
current_record = 0
last_record = 1
complete_results = []
while True:
parsed_html = open_site(name, current_record)
if current_record < last_record:
last_record = get_last_record_number(parsed_html)
current_record = current_record + 15
elif current_record > last_record:
break
try:
current_page_results = extract_records(parsed_html)
except Exception as e:
print '[!] Handling Exception:', e
pass
complete_results.extend(current_page_results)
print "[*] Appending %s of %s" % (current_record, last_record)
return complete_results
def main():
search_query = raw_input("Enter name to search: ")
all_entries = lookup_name(search_query)
for record in all_entries:
print "*"*30
print record.name
print record.address
print record.telephone
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment