Skip to content

Instantly share code, notes, and snippets.

@slavikme
Created March 26, 2018 11:08
Show Gist options
  • Save slavikme/78e6012bb414f5e3b7eee29838c86e0e to your computer and use it in GitHub Desktop.
Save slavikme/78e6012bb414f5e3b7eee29838c86e0e to your computer and use it in GitHub Desktop.
Scrape all clubs and their info from FreeFit.co.il service
import requests
from requests_html import HTMLSession
import json
import datetime
session = HTMLSession()
now = datetime.datetime.now()
def getMoreInfo(id):
url = 'https://freefit.co.il/CLUBS/?CLUB=%d&SUBCLUBCATEGORY=-1' % id
r = session.get(url)
details = r.html.find('#details', first=True)
return({
'about': r.html.find('#textAbout', first=True).text,
'address': details.xpath('//div[3]/text()', first=True),
'phone': details.xpath('//div[4]/text()', first=True),
'parking': details.xpath('//div[5]/text()', first=True),
'parking_for_disabled': details.xpath('//div[6]/text()', first=True),
'website': details.xpath('//div[7]/a/text()', first=True),
'contact': details.xpath('//div[8]/text()', first=True),
'open_hours': details.xpath('//div[10]/text()', first=True),
'notes': details.find('div:nth-child(11) .binder', first=True).text if details.find('div:nth-child(11) .binder', first=True) else ''
})
def getList():
url = 'https://freefit.co.il/Master.asmx/SearchClubList'
headers = {
'Referer': 'https://freefit.co.il/',
'Content-Type': 'application/json; charset=UTF-8',
'Origin': 'https://freefit.co.il',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/604.5.6 (KHTML, like Gecko) Version/11.0.3 Safari/604.5.6',
'Accept': 'application/json, text/javascript, */*; q=0.01',
'X-Requested-With': 'XMLHttpRequest'
}
payload = '{"CompanyID":0,"subcategoryId":"-1","area":"-1","freeText":""}'
r = requests.post(url, data=payload, headers=headers)
return(r.json()['d'])
clubs_full = []
clubs_basic = getList()
clubs_count = len(clubs_basic)
i = 0
for item in clubs_basic:
cid = item['Id']
i += 1
print('%d/%d [%d] %s' % (i, clubs_count, cid, item['Name']))
club = getMoreInfo(cid);
club['name'] = item['Name']
club['logo'] = ('https://freefit.co.il' + item['LogoPath']) if item['LogoPath'] else ''
club['id'] = cid
clubs_full.append(club)
with open(now.strftime('freefit-clubs_%Y-%m-%d.json'), 'w') as outfile:
json.dump(clubs_full, outfile)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment