Skip to content

Instantly share code, notes, and snippets.

@Ashivudhi
Last active May 23, 2023 13:56
Show Gist options
  • Save Ashivudhi/16f90beb7b7c1f6c43f7791b4174f43c to your computer and use it in GitHub Desktop.
Save Ashivudhi/16f90beb7b7c1f6c43f7791b4174f43c to your computer and use it in GitHub Desktop.
Crawling Jobs on Neis site script
<?php
namespace App\Console\Commands;
use Illuminate\Console\Command;
use DOMDocument;
use DOMXPath;
use Illuminate\Support\Facades\Log;
class CrawlJobs extends Command
{
protected $signature = 'jobs:crawling';
protected $description = 'Crawl job posts and save as JSON';
public function handle()
{
// Function to fetch and parse the HTML content of a given URL
function fetchHTML($url): bool|string
{
$curl = curl_init();
curl_setopt($curl, CURLOPT_URL, $url);
curl_setopt($curl, CURLOPT_RETURNTRANSFER, true);
$html = curl_exec($curl);
curl_close($curl);
Log::info('content returned'.$html);
return $html;
}
// URL of the job search results page
$url = 'https://nieis.namibiaatwork.gov.na/search-results-jobs';
// Fetch the HTML content of the job search results page
$html = fetchHTML($url);
// Create a DOMDocument object and load the HTML
$dom = new DOMDocument();
libxml_use_internal_errors(true); // Disable libxml errors and warnings
$dom->loadHTML($html);
libxml_clear_errors();
// Create a DOMXPath object to query the DOMDocument
$xpath = new DOMXPath($dom);
// XPath query to select job posts
$jobPostXPath = '//div[contains(@class, "panel-heading")]';
// Get all job posts
$jobPosts = $xpath->query($jobPostXPath);
// Array to store job data
$jobs = [];
// Iterate over the job posts and extract the desired information
foreach ($jobPosts as $jobPost) {
// Extract job title
$titleNode = $xpath->query('.//h2/a', $jobPost)->item(0);
$title = $titleNode->textContent;
// Extract job description
$descriptionNode = $xpath->query('.//div[@class="col-md-8"]', $jobPost)->item(0);
$description = $descriptionNode->textContent;
// Create a job array
$job = [
'title' => $title,
'description' => $description
];
// Add the job to the jobs array
$jobs[] = $job;
}
// Convert the jobs array to JSON
$json = json_encode($jobs, JSON_PRETTY_PRINT);
// Save the JSON data to a file
$file = 'jobs.json';
file_put_contents($file, $json);
$this->info('Job data saved to ' . $file);
return null;
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment