Created
September 27, 2023 13:12
-
-
Save kaenova/a8e7d4ebed392289d59490bb64683a81 to your computer and use it in GitHub Desktop.
Example of Glints Data Scraping
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import csv | |
| import requests | |
| from bs4 import BeautifulSoup | |
| csv_file_path = 'jobs.csv' | |
| num_data = 500 | |
| page_number = 1 | |
| job_data = [] | |
| while len(job_data) < num_data: | |
| # Build the URL with the current page number | |
| url = f"https://glints.com/id/lowongan-kerja?page={page_number}" | |
| # Make a GET request to the URL | |
| response = requests.get(url) | |
| soup = BeautifulSoup(response.text, "html.parser") | |
| # Perform your scraping code here for the current page | |
| results = soup.find(id="__next") | |
| job_elements = results.find_all("div", class_="JobCardsc__JobcardContainer-sc-hmqj50-0 kWccWU CompactOpportunityCardsc__CompactJobCardWrapper-sc-dkg8my-0 kwAlsu compact_job_card") | |
| for job_element in job_elements: | |
| job_title = job_element.find("h3", class_="CompactOpportunityCardsc__JobTitle-sc-dkg8my-7 jJvzUD").text.strip() | |
| company = job_element.find("a", class_="CompactOpportunityCardsc__CompanyLink-sc-dkg8my-8 btWyBR").text.strip() | |
| location = job_element.find("span", class_="CompactOpportunityCardsc__HierarchicalLocationSpan-sc-dkg8my-26 gWoWBv").text.strip() | |
| # Create a dictionary to store the job information | |
| job_info = { | |
| "Job Title": job_title, | |
| "Company": company, | |
| "Location": location | |
| } | |
| # Add the job information to the job_data list | |
| job_data.append(job_info) | |
| # Check if the desired number of records has been reached | |
| if len(job_data) >= num_data: | |
| break | |
| print(page_number) | |
| # Check if there are no more pages to scrape | |
| # if not soup.find("a", class_="Pagination__NextLink-sc-16r01zq-2 fLLujR"): | |
| # break | |
| # Increment the page number for the next iteration | |
| page_number += 1 | |
| # Open the CSV file in write mode | |
| with open(csv_file_path, "w", newline="") as csvfile: | |
| # Create a CSV writer | |
| writer = csv.DictWriter(csvfile, fieldnames=["Job Title", "Company", "Location"]) | |
| # Write the header row | |
| writer.writeheader() | |
| # Write the job data rows | |
| writer.writerows(job_data) | |
| print("Data saved to jobs.csv") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment