Last active
September 6, 2019 00:40
-
-
Save dimmduh/d4a4da4e25e82526da915fe4ced3bb4c to your computer and use it in GitHub Desktop.
[Possible DEPRECATED] parsing google play search result via web (search_results_cluster_apps)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| require 'vendor/autoload.php'; | |
| require 'utf8_replace.php'; | |
| echo "start \n"; | |
| $url = "https://play.google.com/store/search?q=racing&c=apps&authuser=0"; | |
| $client = new \GuzzleHttp\Client([ | |
| 'proxy' => '192.168.0.127:8888', | |
| 'verify' => false, | |
| 'defaults' => [ | |
| 'headers' =>[ | |
| 'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', | |
| 'content-type' => 'application/x-www-form-urlencoded;charset=UTF-8', | |
| 'accept-language' => 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4', | |
| 'referrer' => $url, | |
| ] | |
| ] | |
| ]); | |
| $res = $client->request('POST', $url, ['form_params' => [ | |
| 'ipf' => 1, | |
| 'xhr' => 1 | |
| ]]); | |
| $html = $res->getBody(); | |
| $packageNames = getPackageNames($html); | |
| //print_r($packageNames); | |
| //2 | |
| $tokens = getTokens($html); | |
| $nextUrl = 'https://play.google.com/store/apps/collection/search_results_cluster_apps?authuser=0'; | |
| $res = $client->request('POST', $nextUrl, [ | |
| 'form_params' => [ | |
| 'start' => 0, | |
| 'num' => 0, | |
| 'numChildren' => 0, | |
| 'pagTok' => $tokens['pagTok'], | |
| 'clp' => $tokens['clp'], | |
| 'pagtt' => 3, | |
| 'cctcss' => 'square-cover', | |
| 'cllayout' => 'NORMAL', | |
| 'ipf' => 1, | |
| 'xhr' => 1, | |
| ] | |
| ]); | |
| $html = $res->getBody(); | |
| $packageNames = array_merge($packageNames, getPackageNames($html)); | |
| print_r($packageNames); | |
| function getNextUrl($html){ | |
| $re = '/<a class="see-more play-button.*?href="(.*?)"/u'; | |
| preg_match_all($re, $html, $matches); | |
| print_r($matches); | |
| return 'https://play.google.com' . $matches[1][0]; | |
| } | |
| function js2php($str){ | |
| $out = preg_replace_callback( | |
| "(\\\\x([0-9a-f]{2}))i", | |
| function($a) {return chr(hexdec($a[1]));}, | |
| $str | |
| ); | |
| return $out; | |
| } | |
| function getTokens($html){ | |
| $re = '/var nbp=\'(.+?)\\\\n\'/'; | |
| preg_match($re, $html, $matches); | |
| print_r($matches); | |
| print_r(simple_unicode_decode(js2php($matches[1]))); | |
| $result = json_decode(simple_unicode_decode(js2php($matches[1]))); | |
| print_r($result); | |
| return [ | |
| 'clp' => $result[6], | |
| 'pagTok' => $result[1], | |
| ]; | |
| } | |
| function getPackageNames($html){ | |
| $re = '/data-docid="(.*?)"/u'; | |
| preg_match_all($re, $html, $matches); | |
| return array_values(array_unique($matches[1])); | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| <?php | |
| function simple_unicode_decode($str) { | |
| $str=str_ireplace("\\\u0001","?",$str); | |
| $str=str_ireplace("\\\u0002","?",$str); | |
| $str=str_ireplace("\\\u0003","?",$str); | |
| $str=str_ireplace("\\\u0004","?",$str); | |
| $str=str_ireplace("\\\u0005","?",$str); | |
| $str=str_ireplace("\\\u0006","?",$str); | |
| $str=str_ireplace("\\\u0007","•",$str); | |
| $str=str_ireplace("\\\u0008","?",$str); | |
| $str=str_ireplace("\\\u0009","?",$str); | |
| $str=str_ireplace("\\\u000A","?",$str); | |
| $str=str_ireplace("\\\u000B","?",$str); | |
| $str=str_ireplace("\\\u000C","?",$str); | |
| $str=str_ireplace("\\\u000D","?",$str); | |
| $str=str_ireplace("\\\u000E","?",$str); | |
| $str=str_ireplace("\\\u000F","¤",$str); | |
| $str=str_ireplace("\\\u0010","?",$str); | |
| $str=str_ireplace("\\\u0011","?",$str); | |
| $str=str_ireplace("\\\u0012","?",$str); | |
| $str=str_ireplace("\\\u0013","?",$str); | |
| $str=str_ireplace("\\\u0014","¶",$str); | |
| $str=str_ireplace("\\\u0015","§",$str); | |
| $str=str_ireplace("\\\u0016","?",$str); | |
| $str=str_ireplace("\\\u0017","?",$str); | |
| $str=str_ireplace("\\\u0018","?",$str); | |
| $str=str_ireplace("\\\u0019","?",$str); | |
| $str=str_ireplace("\\\u001A","?",$str); | |
| $str=str_ireplace("\\\u001B","?",$str); | |
| $str=str_ireplace("\\\u001C","?",$str); | |
| $str=str_ireplace("\\\u001D","?",$str); | |
| $str=str_ireplace("\\\u001E","?",$str); | |
| $str=str_ireplace("\\\u001F","?",$str); | |
| $str=str_ireplace("\\\u0020"," ",$str); | |
| $str=str_ireplace("\\\u0021","!",$str); | |
| $str=str_ireplace("\\\u0022","\"",$str); | |
| $str=str_ireplace("\\\u0023","#",$str); | |
| $str=str_ireplace("\\\u0024","$",$str); | |
| $str=str_ireplace("\\\u0025","%",$str); | |
| $str=str_ireplace("\\\u0026","&",$str); | |
| $str=str_ireplace("\\\u0027","'",$str); | |
| $str=str_ireplace("\\\u0028","(",$str); | |
| $str=str_ireplace("\\\u0029",")",$str); | |
| $str=str_ireplace("\\\u002A","*",$str); | |
| $str=str_ireplace("\\\u002B","+",$str); | |
| $str=str_ireplace("\\\u002C",",",$str); | |
| $str=str_ireplace("\\\u002D","-",$str); | |
| $str=str_ireplace("\\\u002E",".",$str); | |
| $str=str_ireplace("\\\u2026","…",$str); | |
| $str=str_ireplace("\\\u002F","/",$str); | |
| $str=str_ireplace("\\\u0030","0",$str); | |
| $str=str_ireplace("\\\u0031","1",$str); | |
| $str=str_ireplace("\\\u0032","2",$str); | |
| $str=str_ireplace("\\\u0033","3",$str); | |
| $str=str_ireplace("\\\u0034","4",$str); | |
| $str=str_ireplace("\\\u0035","5",$str); | |
| $str=str_ireplace("\\\u0036","6",$str); | |
| $str=str_ireplace("\\\u0037","7",$str); | |
| $str=str_ireplace("\\\u0038","8",$str); | |
| $str=str_ireplace("\\\u0039","9",$str); | |
| $str=str_ireplace("\\\u003A",":",$str); | |
| $str=str_ireplace("\\\u003B",";",$str); | |
| $str=str_ireplace("\\\u003C","<",$str); | |
| $str=str_ireplace("\\\u003D","=",$str); | |
| $str=str_ireplace("\\\u003E",">",$str); | |
| $str=str_ireplace("\\\u2264","=",$str); | |
| $str=str_ireplace("\\\u2265","=",$str); | |
| $str=str_ireplace("\\\u003F","?",$str); | |
| $str=str_ireplace("\\\u0040","@",$str); | |
| $str=str_ireplace("\\\u0041","A",$str); | |
| $str=str_ireplace("\\\u0042","B",$str); | |
| $str=str_ireplace("\\\u0043","C",$str); | |
| $str=str_ireplace("\\\u0044","D",$str); | |
| $str=str_ireplace("\\\u0045","E",$str); | |
| $str=str_ireplace("\\\u0046","F",$str); | |
| $str=str_ireplace("\\\u0047","G",$str); | |
| $str=str_ireplace("\\\u0048","H",$str); | |
| $str=str_ireplace("\\\u0049","I",$str); | |
| $str=str_ireplace("\\\u004A","J",$str); | |
| $str=str_ireplace("\\\u004B","K",$str); | |
| $str=str_ireplace("\\\u004C","L",$str); | |
| $str=str_ireplace("\\\u004D","M",$str); | |
| $str=str_ireplace("\\\u004E","N",$str); | |
| $str=str_ireplace("\\\u004F","O",$str); | |
| $str=str_ireplace("\\\u0050","P",$str); | |
| $str=str_ireplace("\\\u0051","Q",$str); | |
| $str=str_ireplace("\\\u0052","R",$str); | |
| $str=str_ireplace("\\\u0053","S",$str); | |
| $str=str_ireplace("\\\u0054","T",$str); | |
| $str=str_ireplace("\\\u0055","U",$str); | |
| $str=str_ireplace("\\\u0056","V",$str); | |
| $str=str_ireplace("\\\u0057","W",$str); | |
| $str=str_ireplace("\\\u0058","X",$str); | |
| $str=str_ireplace("\\\u0059","Y",$str); | |
| $str=str_ireplace("\\\u005A","Z",$str); | |
| $str=str_ireplace("\\\u005B","[",$str); | |
| $str=str_ireplace("\\\u005C","\\",$str); | |
| $str=str_ireplace("\\\u005D","]",$str); | |
| $str=str_ireplace("\\\u005E","^",$str); | |
| $str=str_ireplace("\\\u005F","_",$str); | |
| $str=str_ireplace("\\\u0060","`",$str); | |
| $str=str_ireplace("\\\u0061","a",$str); | |
| $str=str_ireplace("\\\u0062","b",$str); | |
| $str=str_ireplace("\\\u0063","c",$str); | |
| $str=str_ireplace("\\\u0064","d",$str); | |
| $str=str_ireplace("\\\u0065","e",$str); | |
| $str=str_ireplace("\\\u0066","f",$str); | |
| $str=str_ireplace("\\\u0067","g",$str); | |
| $str=str_ireplace("\\\u0068","h",$str); | |
| $str=str_ireplace("\\\u0069","i",$str); | |
| $str=str_ireplace("\\\u006A","j",$str); | |
| $str=str_ireplace("\\\u006B","k",$str); | |
| $str=str_ireplace("\\\u006C","l",$str); | |
| $str=str_ireplace("\\\u006D","m",$str); | |
| $str=str_ireplace("\\\u006E","n",$str); | |
| $str=str_ireplace("\\\u006F","o",$str); | |
| $str=str_ireplace("\\\u0070","p",$str); | |
| $str=str_ireplace("\\\u0071","q",$str); | |
| $str=str_ireplace("\\\u0072","r",$str); | |
| $str=str_ireplace("\\\u0073","s",$str); | |
| $str=str_ireplace("\\\u0074","t",$str); | |
| $str=str_ireplace("\\\u0075","u",$str); | |
| $str=str_ireplace("\\\u0076","v",$str); | |
| $str=str_ireplace("\\\u0077","w",$str); | |
| $str=str_ireplace("\\\u0078","x",$str); | |
| $str=str_ireplace("\\\u0079","y",$str); | |
| $str=str_ireplace("\\\u007A","z",$str); | |
| $str=str_ireplace("\\\u007B","{",$str); | |
| $str=str_ireplace("\\\u007C","|",$str); | |
| $str=str_ireplace("\\\u007D","}",$str); | |
| $str=str_ireplace("\\\u02DC","˜",$str); | |
| $str=str_ireplace("\\\u007E","~",$str); | |
| $str=str_ireplace("\\\u007F","",$str); | |
| $str=str_ireplace("\\\u00A2","¢",$str); | |
| $str=str_ireplace("\\\u00A3","£",$str); | |
| $str=str_ireplace("\\\u00A4","¤",$str); | |
| $str=str_ireplace("\\\u20AC","€",$str); | |
| $str=str_ireplace("\\\u00A5","¥",$str); | |
| $str=str_ireplace("\\\u0026quot;","\"",$str); | |
| $str=str_ireplace("\\\u0026gt;",">",$str); | |
| $str=str_ireplace("\\\u0026lt;",">",$str); | |
| return $str; | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment