aajjbb · December 30, 2015 01:16
diff --git a/script.py b/script.py
 """
 aajjbb

 python3 script based on ricbit script to dowload all books from computer science section of Springer

 It has BeautifulSoup as dependency, easy installed with 'pip install beautifulsoup4'

 """

 import re, subprocess, time, sys

 from urllib.request import urlopen
 from bs4 import BeautifulSoup

 pmax = 456
 target_file = open("test.txt", "w")

 ROOT_URL = 'http://link.springer.com/search/page/%d?facet-discipline="Computer+Science"&showAll=false&facet-language="En"&facet-content-type="Book'

 def getBookInfo(url):
  info = BeautifulSoup(str(url), "html.parser").a

  book_title = info.string
  page_url = info['href']

  return { 'book_title': book_title if book_title else page_url, 'page_url': page_url } 

 def getDownloadUrl(url):
  page = BeautifulSoup(urlopen(url), "html.parser")

  name = page.find("h1", id="title").string
  pdf = page.find_all("a")

  for link in pdf:
    link_data = BeautifulSoup(str(link), "html.parser").a

    try:
      tag_id = str(link_data['id'])

      if tag_id.startswith("toc-download-book-pdf"):
        pdf = str(link_data['href'])
        break
    except:
      pass

  return None if type(pdf) != str else "http://link.springer.com" + pdf

 for i in range(1, pmax):
  index = ROOT_URL % i
  page = urlopen(index)

  soup = BeautifulSoup(page, 'html.parser')
  urls = soup.find_all("a", class_="title")

  for url in urls:
    book_info = getBookInfo(url)
      
    download_url = getDownloadUrl('http://link.springer.com' + book_info['page_url'])

    if download_url:
      ret = subprocess.Popen(str("wget %s -O %s" % (download_url, str(book_info['book_title']).replace(' ', '-'))).split())
      ret.wait()
	"""
	aajjbb

	python3 script based on ricbit script to dowload all books from computer science section of Springer

	It has BeautifulSoup as dependency, easy installed with 'pip install beautifulsoup4'

	"""

	import re, subprocess, time, sys

	from urllib.request import urlopen
	from bs4 import BeautifulSoup

	pmax = 456
	target_file = open("test.txt", "w")

	ROOT_URL = 'http://link.springer.com/search/page/%d?facet-discipline="Computer+Science"&showAll=false&facet-language="En"&facet-content-type="Book'

	def getBookInfo(url):
	info = BeautifulSoup(str(url), "html.parser").a

	book_title = info.string
	page_url = info['href']

	return { 'book_title': book_title if book_title else page_url, 'page_url': page_url }

	def getDownloadUrl(url):
	page = BeautifulSoup(urlopen(url), "html.parser")

	name = page.find("h1", id="title").string
	pdf = page.find_all("a")

	for link in pdf:
	link_data = BeautifulSoup(str(link), "html.parser").a

	try:
	tag_id = str(link_data['id'])

	if tag_id.startswith("toc-download-book-pdf"):
	pdf = str(link_data['href'])
	break
	except:
	pass

	return None if type(pdf) != str else "http://link.springer.com" + pdf

	for i in range(1, pmax):
	index = ROOT_URL % i
	page = urlopen(index)

	soup = BeautifulSoup(page, 'html.parser')
	urls = soup.find_all("a", class_="title")

	for url in urls:
	book_info = getBookInfo(url)

	download_url = getDownloadUrl('http://link.springer.com' + book_info['page_url'])

	if download_url:
	ret = subprocess.Popen(str("wget %s -O %s" % (download_url, str(book_info['book_title']).replace(' ', '-'))).split())
	ret.wait()