Created
March 26, 2018 11:08
-
-
Save slavikme/78e6012bb414f5e3b7eee29838c86e0e to your computer and use it in GitHub Desktop.
Scrape all clubs and their info from FreeFit.co.il service
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from requests_html import HTMLSession | |
import json | |
import datetime | |
session = HTMLSession() | |
now = datetime.datetime.now() | |
def getMoreInfo(id): | |
url = 'https://freefit.co.il/CLUBS/?CLUB=%d&SUBCLUBCATEGORY=-1' % id | |
r = session.get(url) | |
details = r.html.find('#details', first=True) | |
return({ | |
'about': r.html.find('#textAbout', first=True).text, | |
'address': details.xpath('//div[3]/text()', first=True), | |
'phone': details.xpath('//div[4]/text()', first=True), | |
'parking': details.xpath('//div[5]/text()', first=True), | |
'parking_for_disabled': details.xpath('//div[6]/text()', first=True), | |
'website': details.xpath('//div[7]/a/text()', first=True), | |
'contact': details.xpath('//div[8]/text()', first=True), | |
'open_hours': details.xpath('//div[10]/text()', first=True), | |
'notes': details.find('div:nth-child(11) .binder', first=True).text if details.find('div:nth-child(11) .binder', first=True) else '' | |
}) | |
def getList(): | |
url = 'https://freefit.co.il/Master.asmx/SearchClubList' | |
headers = { | |
'Referer': 'https://freefit.co.il/', | |
'Content-Type': 'application/json; charset=UTF-8', | |
'Origin': 'https://freefit.co.il', | |
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0.3 Safari/604.5.6', | |
'Accept': 'application/json, text/javascript, */*; q=0.01', | |
'X-Requested-With': 'XMLHttpRequest' | |
} | |
payload = '{"CompanyID":0,"subcategoryId":"-1","area":"-1","freeText":""}' | |
r = requests.post(url, data=payload, headers=headers) | |
return(r.json()['d']) | |
clubs_full = [] | |
clubs_basic = getList() | |
clubs_count = len(clubs_basic) | |
i = 0 | |
for item in clubs_basic: | |
cid = item['Id'] | |
i += 1 | |
print('%d/%d [%d] %s' % (i, clubs_count, cid, item['Name'])) | |
club = getMoreInfo(cid); | |
club['name'] = item['Name'] | |
club['logo'] = ('https://freefit.co.il' + item['LogoPath']) if item['LogoPath'] else '' | |
club['id'] = cid | |
clubs_full.append(club) | |
with open(now.strftime('freefit-clubs_%Y-%m-%d.json'), 'w') as outfile: | |
json.dump(clubs_full, outfile) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment