Skip to content

Instantly share code, notes, and snippets.

@vzakharov
Last active October 26, 2024 13:59
Show Gist options
  • Save vzakharov/50ce3265a019eacf7ce9c82e00be442d to your computer and use it in GitHub Desktop.
Save vzakharov/50ce3265a019eacf7ce9c82e00be442d to your computer and use it in GitHub Desktop.
snippet from actual code to get og tags from Suno. Doesn’t include the Player model, but you’ll get the gist (lol)
import re
from typing import cast
import requests
from bs4 import BeautifulSoup, Tag
from elo.models import Player
from rand.errors import BadRequestError
from .models import Player
def parse_og_tags(url: str, soup: BeautifulSoup | None = None):
soup = soup or get_soup(url)
def parse_tag(tag: Tag):
property = tag.get('property')
content = tag.get('content')
if isinstance(property, str) and isinstance(content, str):
return (property.replace('og:', ''), content)
return cast(
dict[str, str| None],
{
parsed_tag[0]: parsed_tag[1]
for parsed_tag in map(parse_tag, soup.find_all('meta'))
if parsed_tag
}
) or None
def get_soup(url: str):
return BeautifulSoup(requests.get(url).text, 'html.parser')
def update_player_og_tags(player: Player, og_tags: dict[str, str | None], ignore_parsed_fields: list[str] = []):
map = dict(
name=['title'],
description=['description'],
image_url=['image'],
audio_url=['audio', 'audio:url']
)
for field, tags in map.items():
if field in ignore_parsed_fields:
continue
for tag in tags:
value = og_tags.get(tag)
if value and not getattr(player, field):
setattr(player, field, value)
def parse_url(player: Player):
if not player.url:
raise BadRequestError('Player does not have a URL')
soup = get_soup(player.url)
og_tags = parse_og_tags(player.url, soup)
ignore_parsed_fields = player.project.ignore_parsed_fields
if og_tags:
update_player_og_tags(player, og_tags, ignore_parsed_fields)
regexes = player.project.parsing_regexes
html = str(soup)
if regexes:
for field, regex in player.project.parsing_regexes.items():
if field in ignore_parsed_fields:
continue
match = re.search(regex, html)
if match:
setattr(player, field, match.group(1))
selectors = player.project.parsing_selectors
if selectors:
for field, selector in selectors.items():
if field in ignore_parsed_fields:
continue
element = soup.select_one(selector['selector'])
if element:
attribute = selector.get('attribute')
value = element.get(attribute) if attribute else element.text
setattr(player, field, value)
player.url_parsed = True
player.save()
return player.data
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment