Created
August 21, 2017 13:34
-
-
Save kundancool/ca1fcd38851f600696f37ccf0d657d49 to your computer and use it in GitHub Desktop.
[PHP] dawnwing.co.za Tracking scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Tracking package via cURL from www.dawnwing.co.za | |
* | |
* @author Kundan K Singh <[email protected]> | |
*/ | |
define('DOMAIN_REQUEST_URL', 'http://www.dawnwing.co.za/business-tools/online-parcel-tracking/'); | |
include_once 'simple_html_dom.php'; | |
libxml_use_internal_errors(true); | |
/* | |
|-------------------------------------------------------------------------- | |
| Helper functions | |
|-------------------------------------------------------------------------- | |
*/ | |
function getDelimiterValues($html) | |
{ | |
$response = array(); | |
foreach ($html as $k => $s) { | |
if( strpos( $s, ':' ) !== false ) { | |
$a = explode(' : ', trim($s)); | |
$response[$a[0]] = $a[1]; | |
} | |
} | |
return $response; | |
} | |
function extractTd($html) | |
{ | |
$response = array(); | |
$content = str_get_html($html); | |
$li_count = substr_count(strtoupper($content), strtoupper('<td')); | |
for ($i = 0; $i < $li_count; $i++) { | |
$response[] = $content->find('td', $i)->innertext; | |
} | |
return $response; | |
} | |
function extractTh($html) | |
{ | |
$response = array(); | |
$content = str_get_html($html); | |
$li_count = substr_count(strtoupper($content), strtoupper('<th')); | |
for ($i = 0; $i < $li_count; $i++) { | |
$response[] = $content->find('th', $i)->innertext; | |
} | |
return $response; | |
} | |
function getTableDelivery($html) | |
{ | |
$response = array(); | |
$content = str_get_html($html); | |
$row_th = $content->find('tr', 1)->innertext; | |
$row_td = $content->find('tr', 2)->innertext; | |
$td = extractTd($row_td); | |
$th = extractTh($row_th); | |
foreach ($th as $key => $c) { | |
$response[$th[$key]] = $td[$key]; | |
} | |
return $response; | |
} | |
function getDetailedTrackTrace($html) | |
{ | |
$response = array(); | |
$content = str_get_html($html); | |
$tr_count = substr_count(strtoupper($content), strtoupper('<tr')); | |
$row_th = $content->find('tr', 0)->innertext; | |
$th = extractTh($row_th); | |
for ($i = 1; $i < $tr_count; $i++) { | |
$row_td = $content->find('tr', $i)->innertext; | |
$td = extractTd($row_td); | |
foreach ($th as $key => $c) { | |
$response[$i - 1][$th[$key]] = $td[$key]; | |
} | |
} | |
return $response; | |
} | |
/* | |
|-------------------------------------------------------------------------- | |
| Prepare request data and fetch tracking information | |
|-------------------------------------------------------------------------- | |
*/ | |
if (isset($_REQUEST['id'])) { | |
$waybill_no = $_REQUEST['id']; | |
// Prepare request data | |
$post_data = array( | |
'WaybillNo' => $waybill_no, | |
'parcel-search' => 'go', | |
); | |
// get tracking information form server | |
$ch = curl_init(); | |
curl_setopt($ch, CURLOPT_URL, DOMAIN_REQUEST_URL); | |
curl_setopt($ch, CURLOPT_POST, true); | |
curl_setopt($ch, CURLOPT_POSTFIELDS, $post_data); | |
curl_setopt($ch, CURLOPT_HEADER, false); | |
curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); | |
$html_dom = curl_exec($ch); | |
/* | |
|-------------------------------------------------------------------------- | |
| Filter Tracking data | |
|-------------------------------------------------------------------------- | |
*/ | |
$html = str_get_html($html_dom); | |
$track_data = $html->find('div#track-trace', 0)->innertext; | |
$html = str_get_html($track_data); | |
$track_summary = explode("\n", trim(strip_tags($html->find('div', 0)->innertext))); | |
$track_summary2 = explode("\n", trim(strip_tags($html->find('div', 1)->innertext))); | |
$response_data['status'] = array_merge(getDelimiterValues($track_summary), getDelimiterValues($track_summary2)); | |
$response_data['delivery_information'] = getTableDelivery($html->find('div', 2)->innertext); | |
$response_data['delivery_trace'] = getDetailedTrackTrace($html->find('table', 1)->innertext); | |
header('Cache-Control: no-cache, must-revalidate'); | |
header('Expires: Mon, 26 Jul 1997 05:00:00 GMT'); | |
header('Content-type: application/json'); | |
echo json_encode($response_data, JSON_PRETTY_PRINT); | |
} else { | |
echo json_encode(['message' => 'Sorry no ID Specified'], JSON_PRETTY_PRINT); | |
exit(); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment