uchida · December 24, 2015 09:59
diff --git a/aozora_epub.py b/aozora_epub.py
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 # by Akihiro Uchida, CC0 dedicated to the public domain
 # see http://creativecommons.org/publicdomain/zero/1.0/
 import os.path
 import time
 import re
 import subprocess
 import atexit
 import cPickle as pickle
 from datetime import datetime
 from zipfile import ZipFile
 from StringIO import StringIO
 import requests
 import unicodecsv

 basedir = os.path.dirname(__file__)
 pkl_fpath = os.path.join(basedir, 'exit.pkl')
 list_all_url = 'http://www.aozora.gr.jp/index_pages/list_person_all_extended_utf8.zip'
 datefmt = '%Y-%m-%d'


 def normalize_name(family, given):
    name = family + given
    katakana_pattern = re.compile(ur'[ァ-ヶー]+')
    if katakana_pattern.match(name):
        name = u'・'.join([s for s in [family, given] if s])
    return name


 class AozoraList(object):

    def __init__(self):
        self.last_exited = datetime.fromtimestamp(0)
        if os.path.exists(pkl_fpath):
            with open(pkl_fpath) as f:
                self.last_exited = pickle.load(f)
        req = requests.get(list_all_url)
        zipdata = StringIO(req.content)
        with ZipFile(zipdata) as zf:
            basename = os.path.splitext(os.path.basename(list_all_url))[0]
            encoding = 'utf-8' if basename.endswith('utf8') else cp932
            with zf.open(basename + '.csv') as csv:
                self.csv_reader = unicodecsv.DictReader(csv, encoding=encoding)
        return

    def __iter__(self):
        return self

    def next(self):
        while True:
            row = self.csv_reader.next()
            item = {'author': normalize_name(row[u'姓'], row[u'名']),
                    'title': row[u'作品名'],
                    'url': row[u'テキストファイルURL'],
                    'modified': row[u'テキストファイル最終更新日'],
                    'sylab': row[u'姓読みソート用'][0],
                    'exited': self.last_exited}
            if all(item.values()) and os.path.splitext(item['url'])[-1] == '.zip':
                item['modified'] = datetime.strptime(item['modified'], datefmt)
                return item
        return


 class ZipData(object):

    def __init__(self, item):
        self.fpath = os.path.join(basedir, 'zip', item['sylab'],
                                  item['author'], item['title'] + '.zip')
        if os.path.exists(self.fpath) and item['exited'] > item['modified']:
            return
        r = requests.get(item['url'])
        time.sleep(30)
        if r.status_code != 200:
            return
        parent_dir = os.path.dirname(self.fpath)
        if not os.path.isdir(parent_dir):
            os.makedirs(parent_dir)
        with open(self.fpath, 'w') as f:
            f.write(r.content)
        return

    def to_epub(self):
        fpath = os.path.join(basedir, 'epub', item['sylab'],
                             item['author'], item['title'] + '.epub')
        if os.path.exists(fpath) and item['exited'] > item['modified']:
            return
        parent_dir = os.path.dirname(fpath)
        if not os.path.isdir(parent_dir):
            os.makedirs(parent_dir)
        subprocess.check_call(['java', '-Dfile.encoding=UTF-8',
                               '-cp', 'AozoraEpub3/AozoraEpub3.jar', 'AozoraEpub3',
                               '-i', 'AozoraEpub3/presets/reader.ini',
                               '-d', parent_dir, '-of', self.fpath])
        return


 @atexit.register
 def save_exit_date():
    with open(pkl_fpath, 'w') as f:
        pickle.dump(datetime.now(), f)

 if __name__ == '__main__':
    for item in AozoraList():
        zipped = ZipData(item)
        zipped.to_epub()
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# by Akihiro Uchida, CC0 dedicated to the public domain
	# see http://creativecommons.org/publicdomain/zero/1.0/
	import os.path
	import time
	import re
	import subprocess
	import atexit
	import cPickle as pickle
	from datetime import datetime
	from zipfile import ZipFile
	from StringIO import StringIO
	import requests
	import unicodecsv

	basedir = os.path.dirname(__file__)
	pkl_fpath = os.path.join(basedir, 'exit.pkl')
	list_all_url = 'http://www.aozora.gr.jp/index_pages/list_person_all_extended_utf8.zip'
	datefmt = '%Y-%m-%d'


	def normalize_name(family, given):
	name = family + given
	katakana_pattern = re.compile(ur'[ァ-ヶー]+')
	if katakana_pattern.match(name):
	name = u'・'.join([s for s in [family, given] if s])
	return name


	class AozoraList(object):

	def __init__(self):
	self.last_exited = datetime.fromtimestamp(0)
	if os.path.exists(pkl_fpath):
	with open(pkl_fpath) as f:
	self.last_exited = pickle.load(f)
	req = requests.get(list_all_url)
	zipdata = StringIO(req.content)
	with ZipFile(zipdata) as zf:
	basename = os.path.splitext(os.path.basename(list_all_url))[0]
	encoding = 'utf-8' if basename.endswith('utf8') else cp932
	with zf.open(basename + '.csv') as csv:
	self.csv_reader = unicodecsv.DictReader(csv, encoding=encoding)
	return

	def __iter__(self):
	return self

	def next(self):
	while True:
	row = self.csv_reader.next()
	item = {'author': normalize_name(row[u'姓'], row[u'名']),
	'title': row[u'作品名'],
	'url': row[u'テキストファイルURL'],
	'modified': row[u'テキストファイル最終更新日'],
	'sylab': row[u'姓読みソート用'][0],
	'exited': self.last_exited}
	if all(item.values()) and os.path.splitext(item['url'])[-1] == '.zip':
	item['modified'] = datetime.strptime(item['modified'], datefmt)
	return item
	return


	class ZipData(object):

	def __init__(self, item):
	self.fpath = os.path.join(basedir, 'zip', item['sylab'],
	item['author'], item['title'] + '.zip')
	if os.path.exists(self.fpath) and item['exited'] > item['modified']:
	return
	r = requests.get(item['url'])
	time.sleep(30)
	if r.status_code != 200:
	return
	parent_dir = os.path.dirname(self.fpath)
	if not os.path.isdir(parent_dir):
	os.makedirs(parent_dir)
	with open(self.fpath, 'w') as f:
	f.write(r.content)
	return

	def to_epub(self):
	fpath = os.path.join(basedir, 'epub', item['sylab'],
	item['author'], item['title'] + '.epub')
	if os.path.exists(fpath) and item['exited'] > item['modified']:
	return
	parent_dir = os.path.dirname(fpath)
	if not os.path.isdir(parent_dir):
	os.makedirs(parent_dir)
	subprocess.check_call(['java', '-Dfile.encoding=UTF-8',
	'-cp', 'AozoraEpub3/AozoraEpub3.jar', 'AozoraEpub3',
	'-i', 'AozoraEpub3/presets/reader.ini',
	'-d', parent_dir, '-of', self.fpath])
	return


	@atexit.register
	def save_exit_date():
	with open(pkl_fpath, 'w') as f:
	pickle.dump(datetime.now(), f)

	if __name__ == '__main__':
	for item in AozoraList():
	zipped = ZipData(item)
	zipped.to_epub()
No results found