liuderchi · January 28, 2016 06:38
diff --git a/line_sticker_printer.py b/line_sticker_printer.py
 #!/usr/bin/env python3
 # Web Crawler for line stickers
 # USAGE:
 #     $ python line_sticker_printer.py <url>
 # example url:
 #    https://store.line.me/stickershop/home/user/zh-Hant
 #    https://store.line.me/stickershop/home/general/zh-Hant
 #
 # credit:
 #     jminh@github and hour of code
 #
 # Reference:
 #     main: http://pycontw.blogspot.tw/2015/12/hour-of-code-90.html
 #     code: https://github.com/jminh/hour_of_code_python_2015
 #     hackpad notes: https://hocpython.hackpad.com/Hour-of-Code-Python--oQL8j5m00dp
 #
 # Revised by D. Liu

 from bs4 import BeautifulSoup as BS
 import os
 import requests
 import sys

 def download_file(url, dir='.'):
    # NOTE filename is customize
    # url example:
    #   http://.../products/0/0/1/1239040/LINEStorePC/thumbnail_shop.png
    #   http://.../products/0/0/1/1235900/LINEStorePC/thumbnail_shop.png
    local_filename = url.split('/')[-3]

    # NOTE the stream=True parameter
    r = requests.get(url, stream=True)
    with open(os.path.join(dir, local_filename), 'wb') as f:
        for chunk in r.iter_content(chunk_size=1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()   # commented by recommendation from J.F.Sebastian
    return local_filename

 def main():
    stickers = []
    # make request and get response
    res = requests.get(sys.argv[1])

    # parse txt content of response
    # NOTE check response_snippet
    soup = BS(res.text, 'html.parser')

    # NOTE this way is customized to get div tags
    divs = soup.find_all('div', 'mdCMN05Img')

    # get image link from src attributes
    stickers = [ div.img['src'] for div in divs if div.img['src'] ]
    #for div in divs:
    #    src = div.img['src']
    #    if src:
    #        stickers.append(src)

    # make folder named as webpage title
    # get text content of <title>
    title = soup.title.text.split('-')[0].strip()
    if not os.path.exists(title):
        os.mkdir(title)

    # download image
    download_dir = os.path.join(os.getcwd(), title)
    for url in stickers:
        print(download_file(url, download_dir), "downloaded!")

 if __name__ == '__main__':
    if len(sys.argv) < 2:
        print("Usage: line_sticker_printer.py [URL]")
        sys.exit(0)
    main()


    response_snippet = u"""
    <div class="MdCMN02List">
      <ul class="mdCMN02Ul">

        <li class="mdCMN02Li">
          <a href="/stickershop/product/1236945/zh-Hant" data-gcl="sticker|click|sticker_item_1236945">
          <div class="MdCMN05Item mdCMN05Sticker">
            <div class="mdCMN05Img">
              <img src="https://sdl-stickershop.line.naver.jp/products/0/0/1/1236945/LINEStorePC/thumbnail_shop.png" height="120" width="120">
            </div>
            <p class="mdCMN05Ttl">title of the stickers</p>
          </div></a>
        </li>

        <li class="mdCMN02Li">
          <a href="/stickershop/product/1238025/zh-Hant" data-gcl="sticker|click|sticker_item_1238025">
          <div class="MdCMN05Item mdCMN05Sticker">
            <div class="mdCMN05Img">
              <img src="https://sdl-stickershop.line.naver.jp/products/0/0/1/1238025/LINEStorePC/thumbnail_shop.png" height="120" width="120">
            </div>
            <p class="mdCMN05Ttl">title of the stickers</p>
          </div></a>
        </li>

        <!--..More li tags here...-->

      </ul>
    </div>
    """
	#!/usr/bin/env python3
	# Web Crawler for line stickers
	# USAGE:
	# $ python line_sticker_printer.py <url>
	# example url:
	# https://store.line.me/stickershop/home/user/zh-Hant
	# https://store.line.me/stickershop/home/general/zh-Hant
	#
	# credit:
	# jminh@github and hour of code
	#
	# Reference:
	# main: http://pycontw.blogspot.tw/2015/12/hour-of-code-90.html
	# code: https://github.com/jminh/hour_of_code_python_2015
	# hackpad notes: https://hocpython.hackpad.com/Hour-of-Code-Python--oQL8j5m00dp
	#
	# Revised by D. Liu

	from bs4 import BeautifulSoup as BS
	import os
	import requests
	import sys

	def download_file(url, dir='.'):
	# NOTE filename is customize
	# url example:
	# http://.../products/0/0/1/1239040/LINEStorePC/thumbnail_shop.png
	# http://.../products/0/0/1/1235900/LINEStorePC/thumbnail_shop.png
	local_filename = url.split('/')[-3]

	# NOTE the stream=True parameter
	r = requests.get(url, stream=True)
	with open(os.path.join(dir, local_filename), 'wb') as f:
	for chunk in r.iter_content(chunk_size=1024):
	if chunk: # filter out keep-alive new chunks
	f.write(chunk)
	f.flush() # commented by recommendation from J.F.Sebastian
	return local_filename

	def main():
	stickers = []
	# make request and get response
	res = requests.get(sys.argv[1])

	# parse txt content of response
	# NOTE check response_snippet
	soup = BS(res.text, 'html.parser')

	# NOTE this way is customized to get div tags
	divs = soup.find_all('div', 'mdCMN05Img')

	# get image link from src attributes
	stickers = [ div.img['src'] for div in divs if div.img['src'] ]
	#for div in divs:
	# src = div.img['src']
	# if src:
	# stickers.append(src)

	# make folder named as webpage title
	# get text content of <title>
	title = soup.title.text.split('-')[0].strip()
	if not os.path.exists(title):
	os.mkdir(title)

	# download image
	download_dir = os.path.join(os.getcwd(), title)
	for url in stickers:
	print(download_file(url, download_dir), "downloaded!")

	if __name__ == '__main__':
	if len(sys.argv) < 2:
	print("Usage: line_sticker_printer.py [URL]")
	sys.exit(0)
	main()


	response_snippet = u"""
	<div class="MdCMN02List">
	<ul class="mdCMN02Ul">

	<li class="mdCMN02Li">
	<a href="/stickershop/product/1236945/zh-Hant" data-gcl="sticker\|click\|sticker_item_1236945">
	<div class="MdCMN05Item mdCMN05Sticker">
	<div class="mdCMN05Img">
	<img src="https://sdl-stickershop.line.naver.jp/products/0/0/1/1236945/LINEStorePC/thumbnail_shop.png" height="120" width="120">
	</div>
	<p class="mdCMN05Ttl">title of the stickers</p>
	</div></a>
	</li>

	<li class="mdCMN02Li">
	<a href="/stickershop/product/1238025/zh-Hant" data-gcl="sticker\|click\|sticker_item_1238025">
	<div class="MdCMN05Item mdCMN05Sticker">
	<div class="mdCMN05Img">
	<img src="https://sdl-stickershop.line.naver.jp/products/0/0/1/1238025/LINEStorePC/thumbnail_shop.png" height="120" width="120">
	</div>
	<p class="mdCMN05Ttl">title of the stickers</p>
	</div></a>
	</li>

	<!--..More li tags here...-->

	</ul>
	</div>
	"""