Skip to content

Instantly share code, notes, and snippets.

@kaenova
Created September 27, 2023 13:12
Show Gist options
  • Save kaenova/a8e7d4ebed392289d59490bb64683a81 to your computer and use it in GitHub Desktop.
Save kaenova/a8e7d4ebed392289d59490bb64683a81 to your computer and use it in GitHub Desktop.
Example of Glints Data Scraping
import csv
import requests
from bs4 import BeautifulSoup
csv_file_path = 'jobs.csv'
num_data = 500
page_number = 1
job_data = []
while len(job_data) < num_data:
# Build the URL with the current page number
url = f"https://glints.com/id/lowongan-kerja?page={page_number}"
# Make a GET request to the URL
response = requests.get(url)
soup = BeautifulSoup(response.text, "html.parser")
# Perform your scraping code here for the current page
results = soup.find(id="__next")
job_elements = results.find_all("div", class_="JobCardsc__JobcardContainer-sc-hmqj50-0 kWccWU CompactOpportunityCardsc__CompactJobCardWrapper-sc-dkg8my-0 kwAlsu compact_job_card")
for job_element in job_elements:
job_title = job_element.find("h3", class_="CompactOpportunityCardsc__JobTitle-sc-dkg8my-7 jJvzUD").text.strip()
company = job_element.find("a", class_="CompactOpportunityCardsc__CompanyLink-sc-dkg8my-8 btWyBR").text.strip()
location = job_element.find("span", class_="CompactOpportunityCardsc__HierarchicalLocationSpan-sc-dkg8my-26 gWoWBv").text.strip()
# Create a dictionary to store the job information
job_info = {
"Job Title": job_title,
"Company": company,
"Location": location
}
# Add the job information to the job_data list
job_data.append(job_info)
# Check if the desired number of records has been reached
if len(job_data) >= num_data:
break
print(page_number)
# Check if there are no more pages to scrape
# if not soup.find("a", class_="Pagination__NextLink-sc-16r01zq-2 fLLujR"):
# break
# Increment the page number for the next iteration
page_number += 1
# Open the CSV file in write mode
with open(csv_file_path, "w", newline="") as csvfile:
# Create a CSV writer
writer = csv.DictWriter(csvfile, fieldnames=["Job Title", "Company", "Location"])
# Write the header row
writer.writeheader()
# Write the job data rows
writer.writerows(job_data)
print("Data saved to jobs.csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment