mickeypash · August 29, 2015 14:25
diff --git a/search_book.py b/search_book.py
 #!/usr/bin/env python
 import requests
 from bs4 import BeautifulSoup

 def make_soup(url):
    r = requests.get(url)
    soup = BeautifulSoup(r.content, 'lxml')

    return soup


 def get_chapters_links(url):
    chapter_list = make_soup(url).find('div', class_='inside')
    ignored_chaps = ['Index', 'Errata', 'References']

    links = chapter_list.find_all('a')

    chapter_links = [i.get('href') for i in links if i.get_text() not in ignored_chaps]

    return chapter_links


 def get_sections(url):
    chapter = make_soup(url).find('div', class_='chapter')

    print chapter.h1.get_text()

    section_list = chapter.find_all('div', class_='section')

    section_titles = [sec.h2.get_text() for sec in section_list]

    for section in section_list:
        section_title = section.h2.get_text()
        print "    %s" % section_title


 def get_paragraph():
    pass


 def save_to_file(filename='book_sections.txt'):
    with open(filename, 'w+') as bs:
        pass


 if __name__ == '__main__':

    url = 'http://searchuserinterfaces.com/book/'
    
    chapter_links = get_chapters_links(url)

    for link in chapter_links[:3]:
        get_sections(link)
	#!/usr/bin/env python
	import requests
	from bs4 import BeautifulSoup

	def make_soup(url):
	r = requests.get(url)
	soup = BeautifulSoup(r.content, 'lxml')

	return soup


	def get_chapters_links(url):
	chapter_list = make_soup(url).find('div', class_='inside')
	ignored_chaps = ['Index', 'Errata', 'References']

	links = chapter_list.find_all('a')

	chapter_links = [i.get('href') for i in links if i.get_text() not in ignored_chaps]

	return chapter_links


	def get_sections(url):
	chapter = make_soup(url).find('div', class_='chapter')

	print chapter.h1.get_text()

	section_list = chapter.find_all('div', class_='section')

	section_titles = [sec.h2.get_text() for sec in section_list]

	for section in section_list:
	section_title = section.h2.get_text()
	print " %s" % section_title


	def get_paragraph():
	pass


	def save_to_file(filename='book_sections.txt'):
	with open(filename, 'w+') as bs:
	pass


	if __name__ == '__main__':

	url = 'http://searchuserinterfaces.com/book/'

	chapter_links = get_chapters_links(url)

	for link in chapter_links[:3]:
	get_sections(link)