Created
February 22, 2021 18:41
-
-
Save niyumard/db3f18b138f357d3f225376573f6b7d4 to your computer and use it in GitHub Desktop.
Gets a text file with urls and extracts info for renting houses from divar.ir
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from bs4 import BeautifulSoup | |
import requests | |
import convert_numbers | |
from csv import writer | |
URL_List_as_txt = open('url.lst', 'r') | |
URLs = URL_List_as_txt.read().splitlines() | |
for URL in URLs: | |
print("\n-------------------\nLet's start!") | |
# Row data sits here and finally gets appended into a csv file as another row | |
row_data=[] | |
try: | |
# Downloads the webpage | |
ad_webpage = requests.get(URL) | |
print("Got it! Successful with code "+str(ad_webpage.status_code)+":'"+URL+"'") | |
except: | |
# Make an error if you can't | |
print("Error Number "+str(ad_webpage.status_code)+": "+URL) | |
pass | |
soup = BeautifulSoup(ad_webpage.content, 'html.parser') | |
# Ad Title is the first item in our row: | |
print(soup.title.string) | |
row_data.append(soup.title.string) | |
print(row_data) | |
raw_basic_info=soup.find_all('div', class_='kt-group-row-item kt-group-row-item--info-row') | |
for i in raw_basic_info: | |
for q,j in enumerate(list(i.children)): | |
if q%2!=0: | |
row_data.append(j.get_text()) | |
raw_other_info=soup.find_all('div', class_='kt-base-row kt-base-row--large kt-unexpandable-row') | |
for i in raw_other_info: | |
for q,j in enumerate(list(i.children)): | |
if q%2!=0: | |
row_data.append(j.get_text()) | |
for i in [1,2,3,4,5]: | |
row_data[i]=convert_numbers.persian_to_english(row_data[i]) | |
row_data.append(list(soup.find_all('p', class_='kt-description-row__text post-description kt-description-row__text--primary'))[0].get_text()) | |
row_data.append(URL) | |
print(row_data) | |
# Appends the row to the end of a csv file named divar.csv | |
with open('divar.csv', 'a') as f_object: | |
writer_object = writer(f_object) | |
writer_object.writerow(row_data) | |
f_object.close() | |
print("Okay! Next!\n-------------------\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment