onlurking · April 17, 2018 03:08
diff --git a/plato.py b/plato.py
 from requests import get
 from bs4 import BeautifulSoup
 from tomd import Tomd
 from collections import namedtuple


 def parse_page(url):
    html = get(url).content
    soup = BeautifulSoup(html, "html5lib")
    return soup


 def get_toc(url):
    base = "http://plato.stanford.edu/"
    links = ([base + link['href']
              for link in parse_page(url)
              .find('div', {'id': 'content'})
              .find_all('a')
              if link.has_attr('href') and 'entries/' in link['href']])
    return links


 def process_page(url):
    soup = parse_page(url)
    article = soup.find('div', {'id': 'article'})

    Article = namedtuple('Article', 'title markdown author pubdate')

    article = (Article(title=article.find('h1').text,
                       markdown=Tomd(str(article)).markdown,
                       author=([info for info in soup
                                .find('div', {'id': 'article-copyright'})
                                .text.strip().rstrip()
                                .split('\n') if len(info) > 0][1]),
                       pubdate=soup.find('div', {'id': 'pubinfo'}).text))
    return article


 def write_article(article):
    file = open("{}.md".format(article.title), 'w')
    for line in article.markdown:
        file.write(line)
    file.close()


 toc_links = get_toc("https://plato.stanford.edu/contents.html")

 work = (write_article(process_page(page)) for page in toc_links)

 next(work)
	from requests import get
	from bs4 import BeautifulSoup
	from tomd import Tomd
	from collections import namedtuple


	def parse_page(url):
	html = get(url).content
	soup = BeautifulSoup(html, "html5lib")
	return soup


	def get_toc(url):
	base = "http://plato.stanford.edu/"
	links = ([base + link['href']
	for link in parse_page(url)
	.find('div', {'id': 'content'})
	.find_all('a')
	if link.has_attr('href') and 'entries/' in link['href']])
	return links


	def process_page(url):
	soup = parse_page(url)
	article = soup.find('div', {'id': 'article'})

	Article = namedtuple('Article', 'title markdown author pubdate')

	article = (Article(title=article.find('h1').text,
	markdown=Tomd(str(article)).markdown,
	author=([info for info in soup
	.find('div', {'id': 'article-copyright'})
	.text.strip().rstrip()
	.split('\n') if len(info) > 0][1]),
	pubdate=soup.find('div', {'id': 'pubinfo'}).text))
	return article


	def write_article(article):
	file = open("{}.md".format(article.title), 'w')
	for line in article.markdown:
	file.write(line)
	file.close()


	toc_links = get_toc("https://plato.stanford.edu/contents.html")

	work = (write_article(process_page(page)) for page in toc_links)

	next(work)
No results found