Created
January 6, 2023 22:01
-
-
Save VIRUXE/857296b465d6f0e34299dd37c6506cd6 to your computer and use it in GitHub Desktop.
Scrape GTABase's GTA V Vehicle URLs using their search form
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import time | |
import json | |
from selenium import webdriver | |
from selenium.webdriver.chrome.options import Options | |
base_url = "https://www.gtabase.com/grand-theft-auto-v/vehicles/" | |
options = Options() | |
options.headless = False | |
options.add_argument("--window-size=1440,900") | |
driver = webdriver.Chrome(options=options, executable_path="C:\chromedriver\chromedriver.exe") | |
driver.get(base_url) | |
with open("models.ini", "r") as f: | |
models = [line.strip() for line in f.readlines()] | |
urls = [] | |
failed = [] | |
current_model = 0 | |
for model in models: | |
current_model += 1 | |
print(f"\n[{len(urls)}] ({current_model}/{len(models)}) Searching for {model}...") | |
# Find the model id search input with the name attribute "attr.ct348.value" | |
search_input = driver.find_element(by="name", value="attr.ct348.value") | |
# Clear the input | |
search_input.clear() | |
# Send the model id to the input | |
search_input.send_keys(model) | |
# Hit enter to search | |
search_input.send_keys(u'\ue007') | |
time.sleep(1) | |
# Push to the top of the page | |
driver.execute_script("window.scrollTo(0, 1000);") | |
# Get divs with the class "product" | |
divs = driver.find_elements(by="class name", value="product") | |
if divs: | |
print(f"\033[92mFound {len(divs)} result(s) for {model}\033[0m") | |
# Loop through the divs | |
for div in divs: | |
# Get the anchor tag | |
anchor = div.find_element(by="tag name", value="a") | |
if anchor is not None: | |
# Get the href attribute | |
href = anchor.get_attribute("href") | |
# Append the href to the urls list | |
if href not in urls: | |
urls.append(href) | |
print(f"\033[1m{href}\033[0m") | |
else: | |
print(f"\033[93m{href} already in list\033[0m") | |
else: | |
# If no anchor was found, append the model to the failed list and alert the user | |
failed.append(model) | |
print(div.text) | |
print(f"\033[91mFailed to find {model}\033[0m") | |
else: # If no divs were found, append the model to the failed list and alert the user | |
failed.append(model) | |
print(f"\033[91mFailed to find {model}\033[0m") | |
# Save URLs to a JSON file | |
with open("gtabase_model_urls.json", "w") as f: | |
json.dump(urls, f) | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment