Skip to content

Instantly share code, notes, and snippets.

@mpereira
Created May 12, 2024 21:55
Show Gist options
  • Save mpereira/053b2497a7a33aae016dbac2405b5ecd to your computer and use it in GitHub Desktop.
Save mpereira/053b2497a7a33aae016dbac2405b5ecd to your computer and use it in GitHub Desktop.
import json
import os
import urllib.parse
import requests
WEBSCRAPERAPI_BASE_URL = "https://api.webscraperapi.ai/v1/scrape"
def scrape(url: str, prompt: str, narrowing_css_selector: str) -> requests.Response:
use_proxy = "premium"
render_js = "true"
query_params = {
"api_key": os.getenv("WEBSCRAPERAPI_API_KEY"),
"url": url,
"narrowing_css_selector": narrowing_css_selector,
"prompt": prompt,
"use_proxy": use_proxy,
"render_js": render_js,
}
query_params_string = "&".join(
f"{key}={urllib.parse.quote(value)}" for key, value in query_params.items()
)
response = requests.get(f"{WEBSCRAPERAPI_BASE_URL}?{query_params_string}")
return response
################################################################################
# 1. Scrape asset store categories from Unreal Engine Marketplace ##############
################################################################################
url = "https://www.unrealengine.com/marketplace/en-US/store"
narrowing_css_selector = ".store-top-categories ul"
prompt = 'Return a JSON array of objects with the "name" and "url" keys. Make sure the "url" is an absolute URL.'
categories_response = scrape(url, prompt, narrowing_css_selector)
categories = categories_response.json()
print(json.dumps(categories_response.json(), indent=4))
print(categories[2])
################################################################################
# 2. Scrape asset list items in Unreal Engine Marketplace asset store category #
################################################################################
url = categories[2]["url"]
narrowing_css_selector = "article.asset"
prompt = 'Return a JSON array of objects with the "creatorName", "creatorUrl", "assetUrl", "assetStatus", "assetRating", "assetPrice" and "assetDetails" keys. Make sure all URLs are absolute URLs relative to https://www.unrealengine.com/marketplace/en-US/store'
category_assets_response = scrape(url, prompt, narrowing_css_selector)
asset_list_items = category_assets_response.json()
print(json.dumps(asset_list_items, indent=4))
################################################################################
# 3. Scrape detailed asset in Unreal Engine Marketplace asset ##################
################################################################################
url = asset_list_items[0]["assetUrl"]
narrowing_css_selector = ".asset-details-container"
prompt = 'Return a JSON array of objects with the "creatorName", "creatorUrl", "assetUrl", "assetStatus", "assetRating", "assetPrice", "assetDetails", "assetSupportedPlatforms", "assetSupportedEngineVersions", "assetDownloadType", "assetDescription" and "assetTags" keys. Make sure all URLs are absolute URLs'
asset_response = scrape(url, prompt, narrowing_css_selector)
asset = asset_response.json()
print(json.dumps(asset, indent=4))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment