Created
May 23, 2022 16:50
-
-
Save lennier1/85ce30de1409b9d05570bb4db122d43d to your computer and use it in GitHub Desktop.
Scrape apps from app store when you already know the IDs
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from itunes_app_scraper.scraper import AppStoreScraper | |
from itunes_app_scraper.scraper import AppStoreCollections | |
import sys | |
import time | |
countryCode = "kr" | |
scraper = AppStoreScraper() | |
unprocessedAppIds = set() # identified but not queried ids kept here | |
#Id files to query from file | |
with open("krIds.txt") as idFile: | |
for text in idFile: | |
unprocessedAppIds.add(text) | |
processedAppIds = set() # already queried ids kept here | |
appDetailsFile = open("appDetailsKr.txt", "a") # data on apps | |
processedAppIdsFile = open("processedAppIdsKr.txt", "a") | |
appsWithErrorsFile = open("appsWithErrorsKr.txt", "a") | |
#idsToSkip = set() | |
#with open("processedAppIdsTw.txt") as skipFile: | |
# for text in skipFile: | |
# idsToSkip.add(text) | |
# Keep processing until no unqueried IDs are known. | |
while len(unprocessedAppIds) > 0: | |
#time.sleep(1) | |
appIdToProcess = unprocessedAppIds.pop() # randomly select id to query | |
# if appIdToProcess in idsToSkip: | |
# continue | |
processedAppIds.add(appIdToProcess) | |
# Query app and save data to file. | |
appDetailError = False | |
try: | |
app_details = scraper.get_app_details(appIdToProcess,country=countryCode) | |
except: | |
appDetailError = True | |
if appDetailError: | |
print("ERROR: Exception getting app details for app ID " + str(appIdToProcess)) | |
appsWithErrorsFile.write(str(appIdToProcess)) | |
appsWithErrorsFile.flush() | |
continue | |
appStr = str(app_details) | |
appStr = appStr.encode('utf-8').decode('ascii', 'ignore') | |
appDetailsFile.write(appStr + '\n') | |
appDetailsFile.flush() | |
processedAppIdsFile.write(str(appIdToProcess)) | |
processedAppIdsFile.flush() | |
#print(str(len(processedAppIds)) + " IDs processed, " + str(len(unprocessedAppIds)) + " IDs remain") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment