Skip to content

Instantly share code, notes, and snippets.

@kundancool
Created August 21, 2017 13:34
Show Gist options
  • Save kundancool/ca1fcd38851f600696f37ccf0d657d49 to your computer and use it in GitHub Desktop.
Save kundancool/ca1fcd38851f600696f37ccf0d657d49 to your computer and use it in GitHub Desktop.
[PHP] dawnwing.co.za Tracking scrapper
<?php
/**
* Tracking package via cURL from www.dawnwing.co.za
*
* @author Kundan K Singh <[email protected]>
*/
define('DOMAIN_REQUEST_URL', 'http://www.dawnwing.co.za/business-tools/online-parcel-tracking/');
include_once 'simple_html_dom.php';
libxml_use_internal_errors(true);
/*
|--------------------------------------------------------------------------
| Helper functions
|--------------------------------------------------------------------------
*/
function getDelimiterValues($html)
{
$response = array();
foreach ($html as $k => $s) {
if( strpos( $s, ':' ) !== false ) {
$a = explode(' : ', trim($s));
$response[$a[0]] = $a[1];
}
}
return $response;
}
function extractTd($html)
{
$response = array();
$content = str_get_html($html);
$li_count = substr_count(strtoupper($content), strtoupper('<td'));
for ($i = 0; $i < $li_count; $i++) {
$response[] = $content->find('td', $i)->innertext;
}
return $response;
}
function extractTh($html)
{
$response = array();
$content = str_get_html($html);
$li_count = substr_count(strtoupper($content), strtoupper('<th'));
for ($i = 0; $i < $li_count; $i++) {
$response[] = $content->find('th', $i)->innertext;
}
return $response;
}
function getTableDelivery($html)
{
$response = array();
$content = str_get_html($html);
$row_th = $content->find('tr', 1)->innertext;
$row_td = $content->find('tr', 2)->innertext;
$td = extractTd($row_td);
$th = extractTh($row_th);
foreach ($th as $key => $c) {
$response[$th[$key]] = $td[$key];
}
return $response;
}
function getDetailedTrackTrace($html)
{
$response = array();
$content = str_get_html($html);
$tr_count = substr_count(strtoupper($content), strtoupper('<tr'));
$row_th = $content->find('tr', 0)->innertext;
$th = extractTh($row_th);
for ($i = 1; $i < $tr_count; $i++) {
$row_td = $content->find('tr', $i)->innertext;
$td = extractTd($row_td);
foreach ($th as $key => $c) {
$response[$i - 1][$th[$key]] = $td[$key];
}
}
return $response;
}
/*
|--------------------------------------------------------------------------
| Prepare request data and fetch tracking information
|--------------------------------------------------------------------------
*/
if (isset($_REQUEST['id'])) {
$waybill_no = $_REQUEST['id'];
// Prepare request data
$post_data = array(
'WaybillNo' => $waybill_no,
'parcel-search' => 'go',
);
// get tracking information form server
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, DOMAIN_REQUEST_URL);
curl_setopt($ch, CURLOPT_POST, true);
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data);
curl_setopt($ch, CURLOPT_HEADER, false);
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
$html_dom = curl_exec($ch);
/*
|--------------------------------------------------------------------------
| Filter Tracking data
|--------------------------------------------------------------------------
*/
$html = str_get_html($html_dom);
$track_data = $html->find('div#track-trace', 0)->innertext;
$html = str_get_html($track_data);
$track_summary = explode("\n", trim(strip_tags($html->find('div', 0)->innertext)));
$track_summary2 = explode("\n", trim(strip_tags($html->find('div', 1)->innertext)));
$response_data['status'] = array_merge(getDelimiterValues($track_summary), getDelimiterValues($track_summary2));
$response_data['delivery_information'] = getTableDelivery($html->find('div', 2)->innertext);
$response_data['delivery_trace'] = getDetailedTrackTrace($html->find('table', 1)->innertext);
header('Cache-Control: no-cache, must-revalidate');
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT');
header('Content-type: application/json');
echo json_encode($response_data, JSON_PRETTY_PRINT);
} else {
echo json_encode(['message' => 'Sorry no ID Specified'], JSON_PRETTY_PRINT);
exit();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment