Created
September 17, 2018 10:45
-
-
Save vucalur/fce130d9a686fd5774d7994f1b4babcb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from calibre.web.feeds.recipes import BasicNewsRecipe | |
class JavaTutorialsRecipe(BasicNewsRecipe): | |
title = 'Java Tutorials. Trail: Essential Classes - Concurrency' | |
description = '' | |
cover_url = 'https://docs.oracle.com/javase/tutorial/images/oracle-java-logo.png' | |
url_prefix = 'https://docs.oracle.com/javase/tutorial/essential/' | |
no_stylesheets = True | |
# remove sideNav and other stuff repeated on every singe page: | |
keep_only_tags = [dict(id=['MainFlow'])] | |
remove_tags = [{ | |
'name': 'div', | |
'attrs': {'id': 'BreadCrumbs'} | |
}, { | |
'name': 'div', | |
'attrs': {'class': 'NavBit'} | |
}, { | |
'name': 'div', | |
'attrs': {'class': 'Banner'} | |
}] | |
def get_title(self, a): | |
return a.contents[0].strip() | |
def parse_index(self): | |
toc = self.index_to_soup(self.url_prefix + 'TOC.html') | |
main_div = toc.find('div', attrs={'id': 'PageContent'}) | |
articles = [] | |
for li in main_div.findAll('li', attrs={'class': 'tocli'}): | |
a = li.find('a') | |
href = a['href'] | |
if href.startswith('concurrency/'): | |
title = self.get_title(a) | |
url = self.url_prefix + href | |
article = {'title': title, 'url': url} | |
articles.append(article) | |
ans = [('Java Tutorials', articles)] | |
return ans |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment