Created
August 23, 2025 03:07
-
-
Save JJTech0130/042109e61a6f292405187c568a6c066b to your computer and use it in GitHub Desktop.
Lookup Ford parts by scraping parts.ford.com
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
import json | |
import rich | |
from bs4 import BeautifulSoup, Tag | |
DEALER_STORE_ID=1405 # You can pretty much pick any dealer | |
CATALOG_ID=251 # Not sure | |
def get_wc_cookies(): | |
# curl -L 'https://parts.ford.com/shop/AjaxManageUserVehicles' -H 'Content-Type: application/x-www-form-urlencoded' -d 'storeId=1405' | |
response = requests.post('https://parts.ford.com/shop/AjaxManageUserVehicles', headers={ | |
'Content-Type': 'application/x-www-form-urlencoded' | |
}, data={ | |
'storeId': DEALER_STORE_ID | |
}) | |
# Check for cookies starting with WC_AUTHENTICATION and WC_USERACTIVITY | |
return {key: value for key, value in response.cookies.items() if key.startswith('WC_')} | |
def search_ui_single(term: str, cookies: dict): | |
# curl -L 'https://parts.ford.com/webapp/wcs/stores/servlet/en/us/SearchDisplay?storeId=1405&searchTerm=NU5Z-14G490-AT' | |
response = requests.get(f'https://parts.ford.com/webapp/wcs/stores/servlet/en/us/SearchDisplay?storeId={DEALER_STORE_ID}&searchTerm={term}', cookies=cookies) | |
redirected = response.url | |
# Make sure that we were redirected to https://parts.ford.com/shop/ (single part page rather than search results) | |
if not redirected.startswith('https://parts.ford.com/shop/'): | |
#raise ValueError(f"Expected redirect to parts.ford.com/shop/, got {redirected}") | |
raise ValueError(f"Part {term} not found") | |
# Extract the <script> inside <div id="Usages_pdp"> | |
soup = BeautifulSoup(response.text, 'html.parser') | |
usages_div = soup.find('div', id='Usages_pdp') | |
if not isinstance(usages_div, Tag): | |
raise ValueError("Could not find Usages_pdp div in the response") | |
script_tag = usages_div.find('script') | |
if not isinstance(script_tag, Tag): | |
raise ValueError("Could not find script tag in Usages_pdp div") | |
script = script_tag.string | |
if not script: | |
raise ValueError("Could not find script content in script tag") | |
# Parse for usageItemsList = [ until ]; | |
# Remove the first var usageItemsList = []; | |
script = script.replace('var usageItemsList = [];', '', 1) | |
data = script.split('usageItemsList = [', 1)[1].split('];', 1)[0] | |
if not data: | |
raise ValueError("Could not find usageItemsList in script") | |
# Parse as JSON | |
#print(data) | |
data = json.loads("[" + data + "]") | |
# Strip all of the values, recursively | |
def strip_values(obj): | |
if isinstance(obj, dict): | |
return {k: strip_values(v) for k, v in obj.items()} | |
elif isinstance(obj, list): | |
return [strip_values(item) for item in obj] | |
elif isinstance(obj, str): | |
return obj.strip() | |
return obj | |
data = strip_values(data) | |
return data | |
def lookup_service_part(part: str): | |
# curl -L 'https://www.fordservicecontent.dealerconnection.com/Ford_Content/PublicationRuntimeRefreshPTS//wiring/GetServicePartNumber?book=null&bookType=svg&Terminalpartnumber=PU5T-14H474-CFE' | |
response = requests.get(f'https://www.fordservicecontent.dealerconnection.com/Ford_Content/PublicationRuntimeRefreshPTS//wiring/GetServicePartNumber?book=null&bookType=svg&Terminalpartnumber={part}') | |
if response.status_code != 200: | |
raise ValueError(f"Failed to retrieve service part information for {part}") | |
# {'ServicePart': {'ServicePartList': [{'Prefix': ' PU5Z', 'Base': ' 14G490', 'Suffix': 'AU ', 'ShortDesc': 'MODULE ', 'LongDesc': 'MODULE '}]}} | |
resp = response.json() | |
if resp.get('ServicePart', {}) is None: | |
return [] | |
# Return a list of parts trimmed to eg "PU5Z-14G490-AU" | |
return [f"{part['Prefix'].strip()}-{part['Base'].strip()}-{part['Suffix'].strip()}" for part in resp.get('ServicePart', {}).get('ServicePartList', [])] | |
def main(part: str, verbose: bool): | |
# Try looking up the service part | |
service_parts = lookup_service_part(part) | |
if len(service_parts) > 0: | |
rich.print(f"Found service parts for {part}: {service_parts}") | |
part = service_parts[0] | |
cookies = get_wc_cookies() | |
#print(cookies) | |
search_results = search_ui_single(part, cookies) | |
for search_result in search_results: | |
assert isinstance(search_result, dict) | |
if search_result.get("longDescription"): | |
for l in search_result["longDescription"]: | |
rich.print(l) | |
else: | |
rich.print(search_result.get("displayDescription")) | |
if search_result.get("xillustration_full") and isinstance(search_result["xillustration_full"], str): | |
rich.print("https://parts.ford.com/images/section-images/" + search_result["xillustration_full"], end=' ') | |
if search_result.get("xads_3callout") and isinstance(search_result["xads_3callout"], str): | |
rich.print("(" + search_result["xads_3callout"] + ")") | |
else: | |
rich.print() | |
rich.print() | |
if verbose: | |
rich.print(search_results) | |
if __name__ == "__main__": | |
# Parse part from argument | |
import argparse | |
parser = argparse.ArgumentParser(description="Find Ford parts") | |
parser.add_argument("part", type=str, help="The part number to search for") | |
parser.add_argument("--verbose", action="store_true", help="Enable verbose output") | |
args = parser.parse_args() | |
main(args.part, args.verbose) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example output:
