Skip to content

Instantly share code, notes, and snippets.

@romanejaquez
Last active July 29, 2024 12:27
Show Gist options
  • Save romanejaquez/4a5a7703681a0bbacdb842173d7498a2 to your computer and use it in GitHub Desktop.
Save romanejaquez/4a5a7703681a0bbacdb842173d7498a2 to your computer and use it in GitHub Desktop.
// deploy this as a first-generation Cloud Function with Python 3.10 as its runtime
// and with an HTTP Trigger; this function will use the list of profile URLS from CloudSkillsBoost
// and scrape the necessary information
// add a "main.py" with the code below
// plus a requirements.txt with the following imports:
// ---
// beautifulsoup4
// requests
// replace the profile_urls array with the
// list of URLS from your attendees URLS from CloudSkillsBoost
// and then just
# main.py
import functions_framework
import requests
import json
from bs4 import BeautifulSoup
# add all profile urls from all your members here
# future versions will pull this from Firestore
profile_urls = [
'https://www.cloudskillsboost.google/public_profiles/13d2fc34-8aff-44b4-adcc-77032ccf8cb2',
'https://www.cloudskillsboost.google/public_profiles/e770c4c3-5a42-495c-80cf-f9db5b4371e4',
'https://www.cloudskillsboost.google/public_profiles/1978d95d-b6af-4a61-9961-761e2f7cd45f',
'https://www.cloudskillsboost.google/public_profiles/82344445-515c-44f0-86ae-68d8d000e328',
]
@functions_framework.http
def get_badges(request):
payload = []
for url in profile_urls:
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
badges = process_badges(soup)
user_profile = process_user(soup, url)
if user_profile['name'] != '':
user_payload = {
'badges': badges,
'profile': user_profile
}
payload.append(user_payload)
return json.dumps(payload), 200, {
'Access-Control-Allow-Origin': '*',
'Content-Type': 'application/json'
}
def process_user(soup, url):
user = {
'name': '',
'member_since': '',
'avatar': '',
'profile_link': str(url)
}
root_container = soup.find('main', attrs={'id': 'jump-content'})
avatar_container = root_container.find('div', { 'class': 'text--center'})
user['name'] = avatar_container.find('h1', { 'class': 'ql-display-small'}).text.strip()
user['member_since'] = avatar_container.find('p', { 'class': 'ql-body-large'}).text.strip()
try:
avatar = avatar_container.find('ql-avatar', { 'class': 'l-mbl'})
user['avatar'] = avatar_container.find('ql-avatar', { 'class': 'profile-avatar'})['src']
except:
user['avatar'] = 'https://www.gstatic.com/images/branding/product/2x/avatar_anonymous_512dp.png'
return user
def process_badges(soup):
profile_badges_container = soup.find('div', attrs={'class': 'profile-badges'})
profile_badges_list = []
try:
profile_badges = profile_badges_container.findAll('div', { 'class': 'profile-badge'})
for badge in profile_badges:
badge_dic = {}
badge_dic['badgeTitle'] = badge.find('span', { 'class': 'ql-title-medium'}).text.strip()
badge_dic['link'] = badge.find('a', { 'class': 'badge-image'})['href']
badge_dic['earned'] = badge.find('span', { 'class': 'ql-body-medium'}).text.strip()
profile_badges_list.append(badge_dic)
except:
profile_badges_list = []
return profile_badges_list
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment