Skip to content

Instantly share code, notes, and snippets.

@salgo60
Created October 26, 2019 22:56
Show Gist options
  • Select an option

  • Save salgo60/9b01ee10eecae54f277305f02d3cf1a1 to your computer and use it in GitHub Desktop.

Select an option

Save salgo60/9b01ee10eecae54f277305f02d3cf1a1 to your computer and use it in GitHub Desktop.
webscraper
import requests
import urllib.request
import time
from bs4 import BeautifulSoup
#url = 'https://www.alvin-portal.org/alvin/resultList.jsf?faces-redirect=true&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%7B"free_text"%3A"Turkiet"%7D%5D%5D&aqe=%5B%5D&dswid=-2660'
urls =['https://www.alvin-portal.org/alvin/resultList.jsf?faces-redirect=true&includeViewParams=true&query=&searchType=PLACE&dswid=-2660&noOfRows=1000'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=1001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=2001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=3001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=4001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=5001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=6001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D'
,'https://www.alvin-portal.org/alvin/resultList.jsf?dswid=-2660&p=7001&searchType=PLACE&sortString=name_sort_asc&noOfRows=1000&af=%5B%5D&query=&aq=%5B%5B%5D%5D&aqe=%5B%5D']
for url in urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
soup.findAll('a')
one_a_tag = soup.findAll('a')[36]
link = one_a_tag['href']
time.sleep(1)
for i in range(1,len(soup.findAll('a'))):
one_a_tag = soup.findAll('a')[i]
# print(one_a_tag)
if "alvin-place" in one_a_tag['href']:
print( one_a_tag.text,";",one_a_tag['href'].split('&')[6].replace("pid=","").replace("%3A",":"))
#print( one_a_tag['href'].split('&')[6].replace("pid=","").replace("%3A",":"))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment