Skip to content

Instantly share code, notes, and snippets.

@vucalur
Created September 17, 2018 10:45
Show Gist options
  • Save vucalur/fce130d9a686fd5774d7994f1b4babcb to your computer and use it in GitHub Desktop.
Save vucalur/fce130d9a686fd5774d7994f1b4babcb to your computer and use it in GitHub Desktop.
from calibre.web.feeds.recipes import BasicNewsRecipe
class JavaTutorialsRecipe(BasicNewsRecipe):
title = 'Java Tutorials. Trail: Essential Classes - Concurrency'
description = ''
cover_url = 'https://docs.oracle.com/javase/tutorial/images/oracle-java-logo.png'
url_prefix = 'https://docs.oracle.com/javase/tutorial/essential/'
no_stylesheets = True
# remove sideNav and other stuff repeated on every singe page:
keep_only_tags = [dict(id=['MainFlow'])]
remove_tags = [{
'name': 'div',
'attrs': {'id': 'BreadCrumbs'}
}, {
'name': 'div',
'attrs': {'class': 'NavBit'}
}, {
'name': 'div',
'attrs': {'class': 'Banner'}
}]
def get_title(self, a):
return a.contents[0].strip()
def parse_index(self):
toc = self.index_to_soup(self.url_prefix + 'TOC.html')
main_div = toc.find('div', attrs={'id': 'PageContent'})
articles = []
for li in main_div.findAll('li', attrs={'class': 'tocli'}):
a = li.find('a')
href = a['href']
if href.startswith('concurrency/'):
title = self.get_title(a)
url = self.url_prefix + href
article = {'title': title, 'url': url}
articles.append(article)
ans = [('Java Tutorials', articles)]
return ans
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment