kaenova · September 27, 2023 13:12
diff --git a/glints_scraping.py b/glints_scraping.py
 import csv
 import requests
 from bs4 import BeautifulSoup

 csv_file_path = 'jobs.csv'

 num_data = 500
 page_number = 1
 job_data = []

 while len(job_data) < num_data:
    # Build the URL with the current page number
    url = f"https://glints.com/id/lowongan-kerja?page={page_number}"
    
    # Make a GET request to the URL
    response = requests.get(url)
    soup = BeautifulSoup(response.text, "html.parser")
    
    # Perform your scraping code here for the current page
    results = soup.find(id="__next")
    job_elements = results.find_all("div", class_="JobCardsc__JobcardContainer-sc-hmqj50-0 kWccWU CompactOpportunityCardsc__CompactJobCardWrapper-sc-dkg8my-0 kwAlsu compact_job_card")
    
    for job_element in job_elements:
        job_title = job_element.find("h3", class_="CompactOpportunityCardsc__JobTitle-sc-dkg8my-7 jJvzUD").text.strip()
        company = job_element.find("a", class_="CompactOpportunityCardsc__CompanyLink-sc-dkg8my-8 btWyBR").text.strip()
        location = job_element.find("span", class_="CompactOpportunityCardsc__HierarchicalLocationSpan-sc-dkg8my-26 gWoWBv").text.strip()

        # Create a dictionary to store the job information
        job_info = {
            "Job Title": job_title,
            "Company": company,
            "Location": location
        }

        # Add the job information to the job_data list
        job_data.append(job_info)

        # Check if the desired number of records has been reached
        if len(job_data) >= num_data:
            break

    print(page_number)

    # Check if there are no more pages to scrape
    # if not soup.find("a", class_="Pagination__NextLink-sc-16r01zq-2 fLLujR"):
    #     break
    
    # Increment the page number for the next iteration
    page_number += 1



 # Open the CSV file in write mode
 with open(csv_file_path, "w", newline="") as csvfile:
    # Create a CSV writer
    writer = csv.DictWriter(csvfile, fieldnames=["Job Title", "Company", "Location"])

    # Write the header row
    writer.writeheader()

    # Write the job data rows
    writer.writerows(job_data)

 print("Data saved to jobs.csv")
	import csv
	import requests
	from bs4 import BeautifulSoup

	csv_file_path = 'jobs.csv'

	num_data = 500
	page_number = 1
	job_data = []

	while len(job_data) < num_data:
	# Build the URL with the current page number
	url = f"https://glints.com/id/lowongan-kerja?page={page_number}"

	# Make a GET request to the URL
	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")

	# Perform your scraping code here for the current page
	results = soup.find(id="__next")
	job_elements = results.find_all("div", class_="JobCardsc__JobcardContainer-sc-hmqj50-0 kWccWU CompactOpportunityCardsc__CompactJobCardWrapper-sc-dkg8my-0 kwAlsu compact_job_card")

	for job_element in job_elements:
	job_title = job_element.find("h3", class_="CompactOpportunityCardsc__JobTitle-sc-dkg8my-7 jJvzUD").text.strip()
	company = job_element.find("a", class_="CompactOpportunityCardsc__CompanyLink-sc-dkg8my-8 btWyBR").text.strip()
	location = job_element.find("span", class_="CompactOpportunityCardsc__HierarchicalLocationSpan-sc-dkg8my-26 gWoWBv").text.strip()

	# Create a dictionary to store the job information
	job_info = {
	"Job Title": job_title,
	"Company": company,
	"Location": location
	}

	# Add the job information to the job_data list
	job_data.append(job_info)

	# Check if the desired number of records has been reached
	if len(job_data) >= num_data:
	break

	print(page_number)

	# Check if there are no more pages to scrape
	# if not soup.find("a", class_="Pagination__NextLink-sc-16r01zq-2 fLLujR"):
	# break

	# Increment the page number for the next iteration
	page_number += 1



	# Open the CSV file in write mode
	with open(csv_file_path, "w", newline="") as csvfile:
	# Create a CSV writer
	writer = csv.DictWriter(csvfile, fieldnames=["Job Title", "Company", "Location"])

	# Write the header row
	writer.writeheader()

	# Write the job data rows
	writer.writerows(job_data)

	print("Data saved to jobs.csv")