Skip to content

Instantly share code, notes, and snippets.

@lucasgautheron
Last active April 5, 2018 17:42
Show Gist options
  • Save lucasgautheron/1c467b61a37c32c5e011030033a631b8 to your computer and use it in GitHub Desktop.
Save lucasgautheron/1c467b61a37c32c5e011030033a631b8 to your computer and use it in GitHub Desktop.
from urllib.request import Request, urlopen
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import re
import numpy as np
import json
urls = ['https://www.lepotcommun.fr/pot/qwgkeart', 'https://www.leetchi.com/fr/Cagnotte/31978353/a8a95db7', 'https://www.lepotcommun.fr/pot/w6md18bt']
# for loop
data = []
for url in urls:
req = Request(url, headers={'User-Agent': 'Mozilla/5.0'})
page = urlopen(req).read()
soup = BeautifulSoup(page, 'html.parser')
participants = '0'
funds = '0'
domain = urlparse(url).netloc
if domain == 'www.lepotcommun.fr' :
funds_box = soup.findAll('span', attrs={'class': 'pink-color'})
participants = funds_box[0].text.strip()
funds = funds_box[1].text.strip()
elif domain == 'www.leetchi.com' :
funds_box = soup.findAll('h1', attrs={'class': 'o-article-status__heading'})
funds = funds_box[0].text.strip()
p_box = soup.findAll('span', attrs={'class': 'c-status__counter'})
participants = p_box[1].text.strip()
else:
pass
try:
participants = int(participants)
funds = float(re.sub('[^\d\.]', '', funds.replace(',', '.')))
data.append([participants, funds])
except:
print ('err')
sums = np.sum(data, axis=0)
output = {}
output['list'] = data
output['sums'] = sums.tolist()
print(output)
with open('data.json', 'w') as f:
json.dump(output, f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment