Created
August 18, 2020 01:50
-
-
Save vuthaihoc/87271218a9f45e3f050f582b4accaff8 to your computer and use it in GitHub Desktop.
Parse link dạng range
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* Url có dạng http://example.com/listing?page={1-100,0} | |
* trong đó 1-100 là loop page, | |
* 0 là tuỳ chọn check die link trước khi add, mặc định là 0 | |
* | |
* @param $url | |
* | |
* @return \Generator | |
*/ | |
public static function parseLinks($url){ | |
if(preg_match( "/\{(\d+)\-(\d+)(\,[^\}\,])?\}/", $url, $matches)){ | |
$min = $matches[1]; | |
$max = $matches[2]; | |
$check_available = $matches[3] ?? false; | |
$client = (bool)$check_available ? (new Client([ | |
'verify' => false, | |
'headers' => [ | |
'User-Agent' => 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36' | |
] | |
])) : false; | |
$url_template = preg_replace( "/\{(\d+)\-(\d)+(\,[^\}\,])?\}/", "__1__", $url ); | |
for ( ; $min < $max; $min ++ ) { | |
$_url = str_replace( "__1__", $min, $url_template); | |
if($client){ | |
try{ | |
$client->head( $_url, [ | |
'on_stats' => function (TransferStats $stats) use (&$_url) { | |
$_url = $stats->getEffectiveUri(); | |
} | |
] ); | |
yield $_url->__toString(); | |
}catch (\Exception $ex){ | |
continue; | |
} | |
}else{ | |
yield $_url; | |
} | |
} | |
}else{ | |
yield $url; | |
} | |
} |
Ví dụ 2
https://123doc.net/document/{418400-418420,1}-bang-cong-thuc-tich-phan-dao-ham-mu-logarit.htm
Output
"https://123doc.net/document/418400-bang-cong-thuc-tich-phan-dao-ham-mu-logarit.htm"
"https://123doc.net/document/418401-bang-cong-thuc-luong-giac-dung-cho-10-11-12.htm"
"https://123doc.net/document/418402-an-toan-dien.htm"
"https://123doc.net/document/418403-hack-wifi-wpa2.htm"
"https://123doc.net/document/418404-tin-hoc.htm"
"https://123doc.net/document/418405-chuong-iv-bai-3-hinh-cau-dien-tich-mat-cau-va-the-tich-hinh-cau.htm"
"https://123doc.net/document/418406-tin-hoc.htm"
"https://123doc.net/document/418407-tin-hoc.htm"
"https://123doc.net/document/418408-tin-hoc.htm"
"https://123doc.net/document/418409-tin-hoc.htm"
"https://123doc.net/document/418410-tin-hoc.htm"
"https://123doc.net/document/418411-cau-hoi-trac-nghiem-phan-uddt-vao-chon-giong.htm"
"https://123doc.net/document/418412-tom-tat-van-ban.htm"
"https://123doc.net/document/418413-tin-hoc.htm"
"https://123doc.net/document/418414-tin-hoc.htm"
"https://123doc.net/document/418415-bai-tap-dao-ham-ham-so-luong-giac.htm"
"https://123doc.net/document/418416-giao-an-vat-ly-6-tiet-26.htm"
"https://123doc.net/document/418417-adn.htm"
"https://123doc.net/document/418418-giao-an-vat-ly-6-tiet-27.htm"
"https://123doc.net/document/418419-giao-an-vat-ly-6-tiet-29.htm"
https://123doc.net/document/{418400-418420,1}-bang-cong-thuc-tich-phan-dao-ham-mu-logarit.htm
anh ơi cái đoạn này
$client->head($_url, [
'on_stats' => function (TransferStats $stats) use (&$_url) {
$_url = $stats->getEffectiveUri();
}
]);
sao có thể check ra các link sống như kia nhỉ ?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Ví dụ
Output