Skip to content

Instantly share code, notes, and snippets.

@zhenghaoz
Created July 30, 2017 06:53
Show Gist options
  • Save zhenghaoz/ccd1382794decd4e214f5a5967dae94b to your computer and use it in GitHub Desktop.
Save zhenghaoz/ccd1382794decd4e214f5a5967dae94b to your computer and use it in GitHub Desktop.
import requests, json, sys
from bs4 import BeautifulSoup
FILE = 'comments.txt'
PAGE = 50
TYPE = ''
def get_comments(appid):
with open(FILE, 'a') as file:
for page in range(1, 101):
url = 'http://apk.hiapk.com/web/api.do?qt=1701&id=%s&pi=%d&ps=10' % (appid, page)
sys.stdout.write('.')
sys.stdout.flush()
response = requests.get(url)
if (response.status_code != 200):
continue
data = json.loads(response.text, strict=False)['data']
for row in data:
if (row.get('content')):
file.write(row['content'].encode('utf8') + '\n')
def get_appid(package):
url = 'http://apk.hiapk.com/appinfo/%s' % package
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html, 'lxml')
appid = soup.find(id='hidAppId')
return appid['value']
def get_packages(page):
packages = []
url = 'http://apk.hiapk.com/%s?sort=5&pi=%d' % (TYPE, page)
response = requests.get(url)
html = response.text
soup = BeautifulSoup(html, 'lxml')
items = soup.find_all('li', { 'class' : 'list_item' })
for item in items:
a = item.find('a')
link = a['href']
packages.append(link.split('/')[2])
return packages
TYPE = 'apps'
for page in range(1, PAGE+1):
packages = get_packages(page)
for package in packages:
sys.stdout.write('%-2d %-32s' % (page, package))
get_comments(get_appid(package))
sys.stdout.write('\n')
TYPE = 'games'
for page in range(1, PAGE+1):
packages = get_packages(page)
for package in packages:
sys.stdout.write('%-2d %-32s' % (page, package))
get_comments(get_appid(package))
sys.stdout.write('\n')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment