Last active
June 29, 2023 08:32
-
-
Save gerald-kim/264ad62c9ef0ed1b1175b21375956cd6 to your computer and use it in GitHub Desktop.
Agile blog recipe for Calibre
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from calibre.web.feeds.news import BasicNewsRecipe | |
from calibre.ebooks.BeautifulSoup import BeautifulSoup | |
class AgileRecipe(BasicNewsRecipe): | |
title = u'Agile Blog' | |
__author__ = 'Gerald Kim' | |
publisher = 'agile.egloos.com' | |
category = 'info' | |
oldest_article = 9999 | |
max_articles_per_feed = 9999 | |
no_stylesheet = True | |
auto_cleanup = True | |
remove_javascript = True | |
feeds = [ | |
u'http://agile.egloos.com/archives/2021/03', | |
u'http://agile.egloos.com/archives/2020/05', | |
u'http://agile.egloos.com/archives/2020/02', | |
u'http://agile.egloos.com/archives/2019/09', | |
u'http://agile.egloos.com/archives/2018/12', | |
u'http://agile.egloos.com/archives/2018/04', | |
u'http://agile.egloos.com/archives/2018/03', | |
u'http://agile.egloos.com/archives/2018/02', | |
u'http://agile.egloos.com/archives/2017/07', | |
u'http://agile.egloos.com/archives/2017/05', | |
u'http://agile.egloos.com/archives/2017/04', | |
u'http://agile.egloos.com/archives/2017/03', | |
u'http://agile.egloos.com/archives/2016/12', | |
u'http://agile.egloos.com/archives/2016/07', | |
u'http://agile.egloos.com/archives/2015/12', | |
u'http://agile.egloos.com/archives/2015/11', | |
u'http://agile.egloos.com/archives/2015/09', | |
u'http://agile.egloos.com/archives/2015/08', | |
u'http://agile.egloos.com/archives/2015/06', | |
u'http://agile.egloos.com/archives/2015/05', | |
u'http://agile.egloos.com/archives/2015/04', | |
u'http://agile.egloos.com/archives/2015/03', | |
u'http://agile.egloos.com/archives/2015/02', | |
u'http://agile.egloos.com/archives/2014/12', | |
u'http://agile.egloos.com/archives/2014/11', | |
u'http://agile.egloos.com/archives/2014/09', | |
u'http://agile.egloos.com/archives/2014/08', | |
u'http://agile.egloos.com/archives/2014/03', | |
u'http://agile.egloos.com/archives/2014/01', | |
u'http://agile.egloos.com/archives/2013/12', | |
u'http://agile.egloos.com/archives/2013/10', | |
u'http://agile.egloos.com/archives/2013/08', | |
u'http://agile.egloos.com/archives/2013/06', | |
u'http://agile.egloos.com/archives/2013/05', | |
u'http://agile.egloos.com/archives/2013/04', | |
u'http://agile.egloos.com/archives/2013/02', | |
u'http://agile.egloos.com/archives/2012/09', | |
u'http://agile.egloos.com/archives/2012/08', | |
u'http://agile.egloos.com/archives/2012/06', | |
u'http://agile.egloos.com/archives/2012/05', | |
u'http://agile.egloos.com/archives/2012/03', | |
u'http://agile.egloos.com/archives/2011/12', | |
u'http://agile.egloos.com/archives/2011/11', | |
u'http://agile.egloos.com/archives/2011/10', | |
u'http://agile.egloos.com/archives/2011/09', | |
u'http://agile.egloos.com/archives/2011/04', | |
u'http://agile.egloos.com/archives/2011/03', | |
u'http://agile.egloos.com/archives/2011/02', | |
u'http://agile.egloos.com/archives/2011/01', | |
u'http://agile.egloos.com/archives/2010/12', | |
u'http://agile.egloos.com/archives/2010/10', | |
u'http://agile.egloos.com/archives/2010/09', | |
u'http://agile.egloos.com/archives/2010/08', | |
u'http://agile.egloos.com/archives/2010/07', | |
u'http://agile.egloos.com/archives/2010/06', | |
u'http://agile.egloos.com/archives/2010/05', | |
u'http://agile.egloos.com/archives/2010/04', | |
u'http://agile.egloos.com/archives/2010/03', | |
u'http://agile.egloos.com/archives/2010/02', | |
u'http://agile.egloos.com/archives/2010/01', | |
u'http://agile.egloos.com/archives/2009/12', | |
u'http://agile.egloos.com/archives/2009/11', | |
u'http://agile.egloos.com/archives/2009/10', | |
u'http://agile.egloos.com/archives/2009/09', | |
u'http://agile.egloos.com/archives/2009/08', | |
u'http://agile.egloos.com/archives/2009/07', | |
u'http://agile.egloos.com/archives/2009/06', | |
u'http://agile.egloos.com/archives/2009/05', | |
u'http://agile.egloos.com/archives/2009/04', | |
u'http://agile.egloos.com/archives/2009/03', | |
u'http://agile.egloos.com/archives/2009/02', | |
u'http://agile.egloos.com/archives/2009/01', | |
u'http://agile.egloos.com/archives/2008/12', | |
u'http://agile.egloos.com/archives/2008/11', | |
u'http://agile.egloos.com/archives/2008/10', | |
u'http://agile.egloos.com/archives/2008/09', | |
u'http://agile.egloos.com/archives/2008/08', | |
u'http://agile.egloos.com/archives/2008/07', | |
u'http://agile.egloos.com/archives/2008/06', | |
u'http://agile.egloos.com/archives/2008/05', | |
u'http://agile.egloos.com/archives/2008/04', | |
u'http://agile.egloos.com/archives/2008/03', | |
u'http://agile.egloos.com/archives/2008/02', | |
u'http://agile.egloos.com/archives/2008/01', | |
u'http://agile.egloos.com/archives/2007/12', | |
u'http://agile.egloos.com/archives/2007/11', | |
u'http://agile.egloos.com/archives/2007/10', | |
u'http://agile.egloos.com/archives/2007/09', | |
u'http://agile.egloos.com/archives/2007/08', | |
u'http://agile.egloos.com/archives/2007/07', | |
u'http://agile.egloos.com/archives/2007/06', | |
u'http://agile.egloos.com/archives/2007/05', | |
u'http://agile.egloos.com/archives/2007/04', | |
u'http://agile.egloos.com/archives/2007/03', | |
u'http://agile.egloos.com/archives/2007/02', | |
u'http://agile.egloos.com/archives/2007/01', | |
u'http://agile.egloos.com/archives/2006/12', | |
u'http://agile.egloos.com/archives/2006/11', | |
u'http://agile.egloos.com/archives/2006/10', | |
u'http://agile.egloos.com/archives/2006/09', | |
u'http://agile.egloos.com/archives/2006/08', | |
u'http://agile.egloos.com/archives/2006/07', | |
u'http://agile.egloos.com/archives/2006/06', | |
u'http://agile.egloos.com/archives/2006/05', | |
u'http://agile.egloos.com/archives/2006/04', | |
u'http://agile.egloos.com/archives/2006/03', | |
u'http://agile.egloos.com/archives/2006/02' | |
] | |
def parse_index(self): | |
self.log.warning("parse_index") | |
totalfeeds = [] | |
lfeeds = self.get_feeds() | |
lfeeds.reverse() | |
#lfeeds = [lfeeds[0]] | |
for feedurl in lfeeds: | |
articles = [] | |
self.report_progress(0, 'Fetching feed' + ' %s...' % | |
(feedurl)) | |
soup = self.index_to_soup(feedurl) | |
body = soup.find('div', attrs={'class': 'POST_BODY'}) | |
for item in body.findAll('a'): | |
if item['href'].startswith('/archives'): | |
self.log.warning("skipping archives" + item['href']) | |
continue | |
articles.append({ | |
'url': 'http://agile.egloos.com' + item['href'], | |
#'url': 'http://localhost:8080' + item['href'], | |
'title': item.contents[0] | |
}) | |
articles.reverse() | |
totalfeeds.append((feedurl[-7:], articles)) | |
return totalfeeds | |
# def preprocess_raw_html(self, raw_html, url): | |
# soup = BeautifulSoup(raw_html) | |
# post = soup.find('div', attrs={'class':'POST_BODY'}) | |
# posttaglist = post.find('div', attrs={'class':'posttaglist'}) | |
# if posttaglist: | |
# posttaglist.clear() | |
# soup = BeautifulSoup(u'''<html><body></body></html>''') | |
# soup.body.insert(0, post) | |
# return str(soup) |
이걸해야하다니.. 우울하네요. 이글루스 밉다..
pdf 158MB 코드 안적어둔 이유가 있었군요.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
osx 가 아닌 다른 os 사용자는 calibre 설치 후 ebook-convert 파일을 찾아서 실행하시면 됩니다.