Last active
July 29, 2024 12:27
-
-
Save romanejaquez/4a5a7703681a0bbacdb842173d7498a2 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// deploy this as a first-generation Cloud Function with Python 3.10 as its runtime | |
// and with an HTTP Trigger; this function will use the list of profile URLS from CloudSkillsBoost | |
// and scrape the necessary information | |
// add a "main.py" with the code below | |
// plus a requirements.txt with the following imports: | |
// --- | |
// beautifulsoup4 | |
// requests | |
// replace the profile_urls array with the | |
// list of URLS from your attendees URLS from CloudSkillsBoost | |
// and then just | |
# main.py | |
import functions_framework | |
import requests | |
import json | |
from bs4 import BeautifulSoup | |
# add all profile urls from all your members here | |
# future versions will pull this from Firestore | |
profile_urls = [ | |
'https://www.cloudskillsboost.google/public_profiles/13d2fc34-8aff-44b4-adcc-77032ccf8cb2', | |
'https://www.cloudskillsboost.google/public_profiles/e770c4c3-5a42-495c-80cf-f9db5b4371e4', | |
'https://www.cloudskillsboost.google/public_profiles/1978d95d-b6af-4a61-9961-761e2f7cd45f', | |
'https://www.cloudskillsboost.google/public_profiles/82344445-515c-44f0-86ae-68d8d000e328', | |
] | |
@functions_framework.http | |
def get_badges(request): | |
payload = [] | |
for url in profile_urls: | |
response = requests.get(url) | |
soup = BeautifulSoup(response.text, 'html.parser') | |
badges = process_badges(soup) | |
user_profile = process_user(soup, url) | |
if user_profile['name'] != '': | |
user_payload = { | |
'badges': badges, | |
'profile': user_profile | |
} | |
payload.append(user_payload) | |
return json.dumps(payload), 200, { | |
'Access-Control-Allow-Origin': '*', | |
'Content-Type': 'application/json' | |
} | |
def process_user(soup, url): | |
user = { | |
'name': '', | |
'member_since': '', | |
'avatar': '', | |
'profile_link': str(url) | |
} | |
root_container = soup.find('main', attrs={'id': 'jump-content'}) | |
avatar_container = root_container.find('div', { 'class': 'text--center'}) | |
user['name'] = avatar_container.find('h1', { 'class': 'ql-display-small'}).text.strip() | |
user['member_since'] = avatar_container.find('p', { 'class': 'ql-body-large'}).text.strip() | |
try: | |
avatar = avatar_container.find('ql-avatar', { 'class': 'l-mbl'}) | |
user['avatar'] = avatar_container.find('ql-avatar', { 'class': 'profile-avatar'})['src'] | |
except: | |
user['avatar'] = 'https://www.gstatic.com/images/branding/product/2x/avatar_anonymous_512dp.png' | |
return user | |
def process_badges(soup): | |
profile_badges_container = soup.find('div', attrs={'class': 'profile-badges'}) | |
profile_badges_list = [] | |
try: | |
profile_badges = profile_badges_container.findAll('div', { 'class': 'profile-badge'}) | |
for badge in profile_badges: | |
badge_dic = {} | |
badge_dic['badgeTitle'] = badge.find('span', { 'class': 'ql-title-medium'}).text.strip() | |
badge_dic['link'] = badge.find('a', { 'class': 'badge-image'})['href'] | |
badge_dic['earned'] = badge.find('span', { 'class': 'ql-body-medium'}).text.strip() | |
profile_badges_list.append(badge_dic) | |
except: | |
profile_badges_list = [] | |
return profile_badges_list | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment