Skip to content

Instantly share code, notes, and snippets.

@kperry2215
Last active November 6, 2022 04:06
Show Gist options
  • Save kperry2215/cfebb259a332bb9a9523c100b2bd89e0 to your computer and use it in GitHub Desktop.
Save kperry2215/cfebb259a332bb9a9523c100b2bd89e0 to your computer and use it in GitHub Desktop.
web_scrape
df = pd.DataFrame(columns = ["FC #", "Owner Name", "Street", "Zip", "Subdivision", "Balance Due", "Status"],
dtype=object)
#Flip through the records and save them
for n in range(2, 15):
for i in range(3):
try:
mytable = driver.find_element_by_css_selector("table[id='ctl00_ContentPlaceHolder1_gvSearchResults']")
#Read in all of the data into the dataframe
for row in mytable.find_elements_by_css_selector('tr'):
row_list = []
#Add to dataframe accordingly
for cell in row.find_elements_by_css_selector('td'):
cell_reading = cell.text
row_list.append(cell_reading)
#Add the list as a row, if possible
try:
a_series = pd.Series(row_list, index = df.columns)
df = df.append(a_series, ignore_index=True)
except:
print("Could not append: " + str(row_list))
break
except:
driver.implicitly_wait(5)
if n%10 == 1:
#Click second "..." if on greater than page 10
if n < 20:
driver.find_elements_by_xpath("//td/a[text()='...']")[0].click()
else:
driver.find_elements_by_xpath("//td/a[text()='...']")[1].click()
else:
driver.find_element_by_xpath("//td/a[text()='" + str(n)+ "']").click()
#Wait three seconds so the website doesn't crash
driver.implicitly_wait(3)
#Write to a csv
df.to_csv("jefferson_county_foreclosures.csv", index= False)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment