Created
May 12, 2024 21:55
-
-
Save mpereira/053b2497a7a33aae016dbac2405b5ecd to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import urllib.parse | |
import requests | |
WEBSCRAPERAPI_BASE_URL = "https://api.webscraperapi.ai/v1/scrape" | |
def scrape(url: str, prompt: str, narrowing_css_selector: str) -> requests.Response: | |
use_proxy = "premium" | |
render_js = "true" | |
query_params = { | |
"api_key": os.getenv("WEBSCRAPERAPI_API_KEY"), | |
"url": url, | |
"narrowing_css_selector": narrowing_css_selector, | |
"prompt": prompt, | |
"use_proxy": use_proxy, | |
"render_js": render_js, | |
} | |
query_params_string = "&".join( | |
f"{key}={urllib.parse.quote(value)}" for key, value in query_params.items() | |
) | |
response = requests.get(f"{WEBSCRAPERAPI_BASE_URL}?{query_params_string}") | |
return response | |
################################################################################ | |
# 1. Scrape asset store categories from Unreal Engine Marketplace ############## | |
################################################################################ | |
url = "https://www.unrealengine.com/marketplace/en-US/store" | |
narrowing_css_selector = ".store-top-categories ul" | |
prompt = 'Return a JSON array of objects with the "name" and "url" keys. Make sure the "url" is an absolute URL.' | |
categories_response = scrape(url, prompt, narrowing_css_selector) | |
categories = categories_response.json() | |
print(json.dumps(categories_response.json(), indent=4)) | |
print(categories[2]) | |
################################################################################ | |
# 2. Scrape asset list items in Unreal Engine Marketplace asset store category # | |
################################################################################ | |
url = categories[2]["url"] | |
narrowing_css_selector = "article.asset" | |
prompt = 'Return a JSON array of objects with the "creatorName", "creatorUrl", "assetUrl", "assetStatus", "assetRating", "assetPrice" and "assetDetails" keys. Make sure all URLs are absolute URLs relative to https://www.unrealengine.com/marketplace/en-US/store' | |
category_assets_response = scrape(url, prompt, narrowing_css_selector) | |
asset_list_items = category_assets_response.json() | |
print(json.dumps(asset_list_items, indent=4)) | |
################################################################################ | |
# 3. Scrape detailed asset in Unreal Engine Marketplace asset ################## | |
################################################################################ | |
url = asset_list_items[0]["assetUrl"] | |
narrowing_css_selector = ".asset-details-container" | |
prompt = 'Return a JSON array of objects with the "creatorName", "creatorUrl", "assetUrl", "assetStatus", "assetRating", "assetPrice", "assetDetails", "assetSupportedPlatforms", "assetSupportedEngineVersions", "assetDownloadType", "assetDescription" and "assetTags" keys. Make sure all URLs are absolute URLs' | |
asset_response = scrape(url, prompt, narrowing_css_selector) | |
asset = asset_response.json() | |
print(json.dumps(asset, indent=4)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment