Last active
May 1, 2020 18:44
-
-
Save palindrom615/d16082ad8d9dd6841eaf20b6e9c1f6f7 to your computer and use it in GitHub Desktop.
script downloading naver × dispatch photos
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
.vscode | |
img | |
.DS_Store |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from pip._internal import main as pipmain | |
pipmain(['install', 'requests']) | |
import sys | |
import os | |
from queue import Queue | |
import threading | |
# import aiohttp | |
# import asyncio | |
import requests | |
import time | |
dlqueue = Queue() | |
class ProducerThread(threading.Thread): | |
def __init__(self, cid, search_query, total_thread, this_thread_num): | |
super(ProducerThread, self).__init__() | |
self.cid = cid | |
self.search_query = search_query | |
self.total_thread = total_thread | |
self.this_thread_num = this_thread_num | |
return | |
def run(self): | |
cid = self.cid | |
search_query = self.search_query | |
pager_url = "https://entertain.naver.com/photo/issueItemList.json" | |
page = self.this_thread_num | |
while True: | |
print('page: ' + str(page), end="\r") | |
params = {'cid': cid, 'page': page} | |
entries = requests.get(pager_url, params=params).json()[ | |
'results'][0]['thumbnails'] | |
if not entries: | |
break | |
page += self.total_thread | |
urls = [entry['thumbUrl'].split('?')[0] | |
for entry in entries if search_query in entry['title']] | |
for url in urls: | |
dlqueue.put(url) | |
return | |
class ConsumerThread(threading.Thread): | |
def __init__(self, download_path): | |
super(ConsumerThread, self).__init__() | |
self.download_path = download_path | |
return | |
def run(self): | |
while not dlqueue.empty(): | |
url = dlqueue.get() | |
download(url, self.download_path) | |
return | |
def download(url, download_path): | |
name = download_path + '/' + url.split('/')[-1] | |
with requests.get(url) as res: | |
with open(name, 'wb') as file: | |
for chunk in res: | |
file.write(chunk) | |
return name | |
async def download_async(url, download_path): | |
filename = download_path + '/' + url.split('/')[-1] | |
async with aiohttp.ClientSession() as session: | |
async with session.get(url) as res: | |
with open(filename, 'wb') as file: | |
for chunk in res: | |
file.write(chunk) | |
return filename | |
if __name__ == '__main__': | |
#cid = sys.argv[1] | |
CID = "1047153" | |
idol_name = sys.argv[1] | |
download_path = os.path.dirname( | |
os.path.realpath(__file__)) + '/img/' + idol_name | |
if not os.path.exists(download_path): | |
os.makedirs(download_path) | |
total_thread = 8 | |
for i in range(total_thread): | |
p = ProducerThread(cid=CID, search_query=idol_name, total_thread=total_thread, this_thread_num=i) | |
c = ConsumerThread(download_path=download_path) | |
p.start() | |
time.sleep(2) | |
c.start() | |
time.sleep(2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment