Skip to content

Instantly share code, notes, and snippets.

@VIRUXE
Created January 6, 2023 22:01
Show Gist options
  • Save VIRUXE/857296b465d6f0e34299dd37c6506cd6 to your computer and use it in GitHub Desktop.
Save VIRUXE/857296b465d6f0e34299dd37c6506cd6 to your computer and use it in GitHub Desktop.
Scrape GTABase's GTA V Vehicle URLs using their search form
import requests
import time
import json
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
base_url = "https://www.gtabase.com/grand-theft-auto-v/vehicles/"
options = Options()
options.headless = False
options.add_argument("--window-size=1440,900")
driver = webdriver.Chrome(options=options, executable_path="C:\chromedriver\chromedriver.exe")
driver.get(base_url)
with open("models.ini", "r") as f:
models = [line.strip() for line in f.readlines()]
urls = []
failed = []
current_model = 0
for model in models:
current_model += 1
print(f"\n[{len(urls)}] ({current_model}/{len(models)}) Searching for {model}...")
# Find the model id search input with the name attribute "attr.ct348.value"
search_input = driver.find_element(by="name", value="attr.ct348.value")
# Clear the input
search_input.clear()
# Send the model id to the input
search_input.send_keys(model)
# Hit enter to search
search_input.send_keys(u'\ue007')
time.sleep(1)
# Push to the top of the page
driver.execute_script("window.scrollTo(0, 1000);")
# Get divs with the class "product"
divs = driver.find_elements(by="class name", value="product")
if divs:
print(f"\033[92mFound {len(divs)} result(s) for {model}\033[0m")
# Loop through the divs
for div in divs:
# Get the anchor tag
anchor = div.find_element(by="tag name", value="a")
if anchor is not None:
# Get the href attribute
href = anchor.get_attribute("href")
# Append the href to the urls list
if href not in urls:
urls.append(href)
print(f"\033[1m{href}\033[0m")
else:
print(f"\033[93m{href} already in list\033[0m")
else:
# If no anchor was found, append the model to the failed list and alert the user
failed.append(model)
print(div.text)
print(f"\033[91mFailed to find {model}\033[0m")
else: # If no divs were found, append the model to the failed list and alert the user
failed.append(model)
print(f"\033[91mFailed to find {model}\033[0m")
# Save URLs to a JSON file
with open("gtabase_model_urls.json", "w") as f:
json.dump(urls, f)
driver.quit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment