Skip to content

Instantly share code, notes, and snippets.

@dimmduh
Last active September 6, 2019 00:40
Show Gist options
  • Save dimmduh/d4a4da4e25e82526da915fe4ced3bb4c to your computer and use it in GitHub Desktop.
Save dimmduh/d4a4da4e25e82526da915fe4ced3bb4c to your computer and use it in GitHub Desktop.
[Possible DEPRECATED] parsing google play search result via web (search_results_cluster_apps)
<?php
require 'vendor/autoload.php';
require 'utf8_replace.php';
echo "start \n";
$url = "https://play.google.com/store/search?q=racing&c=apps&authuser=0";
$client = new \GuzzleHttp\Client([
'proxy' => '192.168.0.127:8888',
'verify' => false,
'defaults' => [
'headers' =>[
'User-Agent' => 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
'content-type' => 'application/x-www-form-urlencoded;charset=UTF-8',
'accept-language' => 'ru-RU,ru;q=0.8,en-US;q=0.6,en;q=0.4',
'referrer' => $url,
]
]
]);
$res = $client->request('POST', $url, ['form_params' => [
'ipf' => 1,
'xhr' => 1
]]);
$html = $res->getBody();
$packageNames = getPackageNames($html);
//print_r($packageNames);
//2
$tokens = getTokens($html);
$nextUrl = 'https://play.google.com/store/apps/collection/search_results_cluster_apps?authuser=0';
$res = $client->request('POST', $nextUrl, [
'form_params' => [
'start' => 0,
'num' => 0,
'numChildren' => 0,
'pagTok' => $tokens['pagTok'],
'clp' => $tokens['clp'],
'pagtt' => 3,
'cctcss' => 'square-cover',
'cllayout' => 'NORMAL',
'ipf' => 1,
'xhr' => 1,
]
]);
$html = $res->getBody();
$packageNames = array_merge($packageNames, getPackageNames($html));
print_r($packageNames);
function getNextUrl($html){
$re = '/<a class="see-more play-button.*?href="(.*?)"/u';
preg_match_all($re, $html, $matches);
print_r($matches);
return 'https://play.google.com' . $matches[1][0];
}
function js2php($str){
$out = preg_replace_callback(
"(\\\\x([0-9a-f]{2}))i",
function($a) {return chr(hexdec($a[1]));},
$str
);
return $out;
}
function getTokens($html){
$re = '/var nbp=\'(.+?)\\\\n\'/';
preg_match($re, $html, $matches);
print_r($matches);
print_r(simple_unicode_decode(js2php($matches[1])));
$result = json_decode(simple_unicode_decode(js2php($matches[1])));
print_r($result);
return [
'clp' => $result[6],
'pagTok' => $result[1],
];
}
function getPackageNames($html){
$re = '/data-docid="(.*?)"/u';
preg_match_all($re, $html, $matches);
return array_values(array_unique($matches[1]));
}
<?php
function simple_unicode_decode($str) {
$str=str_ireplace("\\\u0001","?",$str);
$str=str_ireplace("\\\u0002","?",$str);
$str=str_ireplace("\\\u0003","?",$str);
$str=str_ireplace("\\\u0004","?",$str);
$str=str_ireplace("\\\u0005","?",$str);
$str=str_ireplace("\\\u0006","?",$str);
$str=str_ireplace("\\\u0007","•",$str);
$str=str_ireplace("\\\u0008","?",$str);
$str=str_ireplace("\\\u0009","?",$str);
$str=str_ireplace("\\\u000A","?",$str);
$str=str_ireplace("\\\u000B","?",$str);
$str=str_ireplace("\\\u000C","?",$str);
$str=str_ireplace("\\\u000D","?",$str);
$str=str_ireplace("\\\u000E","?",$str);
$str=str_ireplace("\\\u000F","¤",$str);
$str=str_ireplace("\\\u0010","?",$str);
$str=str_ireplace("\\\u0011","?",$str);
$str=str_ireplace("\\\u0012","?",$str);
$str=str_ireplace("\\\u0013","?",$str);
$str=str_ireplace("\\\u0014","¶",$str);
$str=str_ireplace("\\\u0015","§",$str);
$str=str_ireplace("\\\u0016","?",$str);
$str=str_ireplace("\\\u0017","?",$str);
$str=str_ireplace("\\\u0018","?",$str);
$str=str_ireplace("\\\u0019","?",$str);
$str=str_ireplace("\\\u001A","?",$str);
$str=str_ireplace("\\\u001B","?",$str);
$str=str_ireplace("\\\u001C","?",$str);
$str=str_ireplace("\\\u001D","?",$str);
$str=str_ireplace("\\\u001E","?",$str);
$str=str_ireplace("\\\u001F","?",$str);
$str=str_ireplace("\\\u0020"," ",$str);
$str=str_ireplace("\\\u0021","!",$str);
$str=str_ireplace("\\\u0022","\"",$str);
$str=str_ireplace("\\\u0023","#",$str);
$str=str_ireplace("\\\u0024","$",$str);
$str=str_ireplace("\\\u0025","%",$str);
$str=str_ireplace("\\\u0026","&",$str);
$str=str_ireplace("\\\u0027","'",$str);
$str=str_ireplace("\\\u0028","(",$str);
$str=str_ireplace("\\\u0029",")",$str);
$str=str_ireplace("\\\u002A","*",$str);
$str=str_ireplace("\\\u002B","+",$str);
$str=str_ireplace("\\\u002C",",",$str);
$str=str_ireplace("\\\u002D","-",$str);
$str=str_ireplace("\\\u002E",".",$str);
$str=str_ireplace("\\\u2026","…",$str);
$str=str_ireplace("\\\u002F","/",$str);
$str=str_ireplace("\\\u0030","0",$str);
$str=str_ireplace("\\\u0031","1",$str);
$str=str_ireplace("\\\u0032","2",$str);
$str=str_ireplace("\\\u0033","3",$str);
$str=str_ireplace("\\\u0034","4",$str);
$str=str_ireplace("\\\u0035","5",$str);
$str=str_ireplace("\\\u0036","6",$str);
$str=str_ireplace("\\\u0037","7",$str);
$str=str_ireplace("\\\u0038","8",$str);
$str=str_ireplace("\\\u0039","9",$str);
$str=str_ireplace("\\\u003A",":",$str);
$str=str_ireplace("\\\u003B",";",$str);
$str=str_ireplace("\\\u003C","<",$str);
$str=str_ireplace("\\\u003D","=",$str);
$str=str_ireplace("\\\u003E",">",$str);
$str=str_ireplace("\\\u2264","=",$str);
$str=str_ireplace("\\\u2265","=",$str);
$str=str_ireplace("\\\u003F","?",$str);
$str=str_ireplace("\\\u0040","@",$str);
$str=str_ireplace("\\\u0041","A",$str);
$str=str_ireplace("\\\u0042","B",$str);
$str=str_ireplace("\\\u0043","C",$str);
$str=str_ireplace("\\\u0044","D",$str);
$str=str_ireplace("\\\u0045","E",$str);
$str=str_ireplace("\\\u0046","F",$str);
$str=str_ireplace("\\\u0047","G",$str);
$str=str_ireplace("\\\u0048","H",$str);
$str=str_ireplace("\\\u0049","I",$str);
$str=str_ireplace("\\\u004A","J",$str);
$str=str_ireplace("\\\u004B","K",$str);
$str=str_ireplace("\\\u004C","L",$str);
$str=str_ireplace("\\\u004D","M",$str);
$str=str_ireplace("\\\u004E","N",$str);
$str=str_ireplace("\\\u004F","O",$str);
$str=str_ireplace("\\\u0050","P",$str);
$str=str_ireplace("\\\u0051","Q",$str);
$str=str_ireplace("\\\u0052","R",$str);
$str=str_ireplace("\\\u0053","S",$str);
$str=str_ireplace("\\\u0054","T",$str);
$str=str_ireplace("\\\u0055","U",$str);
$str=str_ireplace("\\\u0056","V",$str);
$str=str_ireplace("\\\u0057","W",$str);
$str=str_ireplace("\\\u0058","X",$str);
$str=str_ireplace("\\\u0059","Y",$str);
$str=str_ireplace("\\\u005A","Z",$str);
$str=str_ireplace("\\\u005B","[",$str);
$str=str_ireplace("\\\u005C","\\",$str);
$str=str_ireplace("\\\u005D","]",$str);
$str=str_ireplace("\\\u005E","^",$str);
$str=str_ireplace("\\\u005F","_",$str);
$str=str_ireplace("\\\u0060","`",$str);
$str=str_ireplace("\\\u0061","a",$str);
$str=str_ireplace("\\\u0062","b",$str);
$str=str_ireplace("\\\u0063","c",$str);
$str=str_ireplace("\\\u0064","d",$str);
$str=str_ireplace("\\\u0065","e",$str);
$str=str_ireplace("\\\u0066","f",$str);
$str=str_ireplace("\\\u0067","g",$str);
$str=str_ireplace("\\\u0068","h",$str);
$str=str_ireplace("\\\u0069","i",$str);
$str=str_ireplace("\\\u006A","j",$str);
$str=str_ireplace("\\\u006B","k",$str);
$str=str_ireplace("\\\u006C","l",$str);
$str=str_ireplace("\\\u006D","m",$str);
$str=str_ireplace("\\\u006E","n",$str);
$str=str_ireplace("\\\u006F","o",$str);
$str=str_ireplace("\\\u0070","p",$str);
$str=str_ireplace("\\\u0071","q",$str);
$str=str_ireplace("\\\u0072","r",$str);
$str=str_ireplace("\\\u0073","s",$str);
$str=str_ireplace("\\\u0074","t",$str);
$str=str_ireplace("\\\u0075","u",$str);
$str=str_ireplace("\\\u0076","v",$str);
$str=str_ireplace("\\\u0077","w",$str);
$str=str_ireplace("\\\u0078","x",$str);
$str=str_ireplace("\\\u0079","y",$str);
$str=str_ireplace("\\\u007A","z",$str);
$str=str_ireplace("\\\u007B","{",$str);
$str=str_ireplace("\\\u007C","|",$str);
$str=str_ireplace("\\\u007D","}",$str);
$str=str_ireplace("\\\u02DC","˜",$str);
$str=str_ireplace("\\\u007E","~",$str);
$str=str_ireplace("\\\u007F","",$str);
$str=str_ireplace("\\\u00A2","¢",$str);
$str=str_ireplace("\\\u00A3","£",$str);
$str=str_ireplace("\\\u00A4","¤",$str);
$str=str_ireplace("\\\u20AC","€",$str);
$str=str_ireplace("\\\u00A5","¥",$str);
$str=str_ireplace("\\\u0026quot;","\"",$str);
$str=str_ireplace("\\\u0026gt;",">",$str);
$str=str_ireplace("\\\u0026lt;",">",$str);
return $str;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment