Created
November 22, 2010 08:53
-
-
Save oquno/709699 to your computer and use it in GitHub Desktop.
zip2mobi
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# -*- coding: utf-8 -*- | |
require 'cgi' | |
require 'open-uri' | |
require 'rubygems' | |
require 'zipruby' | |
require 'RMagick' | |
require 'kconv' | |
require 'logger' | |
require 'amazon/aws' | |
require 'amazon/aws/search' | |
ENV['AMAZONRCDIR'] = './' | |
# settings to get author of the book | |
ASSOCIATES_ID = "YOUR_ID" | |
AWS_SECRET = 'YOUR_SECRET' | |
AWS_KEY_ID = "YOUR_AWS_KEY_ID" | |
# includes zip2mobi.rb, log/*, kindlegen | |
SCRIPTS_DIR = "/path/to/scripts/dir/" | |
# mobi and epub files are saved | |
MOBI_DIR = "/path/to/mobidir/" | |
# image file format to convert | |
SUPPORT_FORMAT = /\.(png|jpg|gif)$/ | |
# image size | |
ROW = 1024 | |
COL = 768 | |
# log setting | |
$log = Logger.new("#{SCRIPTS_DIR}log/zip2mobi.log", 'daily') | |
$log.level = Logger::DEBUG | |
class Zip2mobi | |
include Amazon::AWS | |
include Amazon::AWS::Search | |
def convertZip(path, epubname) | |
Zip::Archive.open(epubname, Zip::CREATE, Zip::NO_COMPRESSION) do |epub| | |
epub.add_or_replace_buffer "mimetype", "application/epub+zip" | |
end | |
Zip::Archive.open(epubname) do |epub| | |
epub.add_or_replace_buffer "META-INF/container.xml", container_xml | |
xhtmls = [] | |
open(path) do | zipFile | | |
Zip::Archive.open_buffer(zipFile.read) do |zip| | |
images = zip.map do |f| | |
if f.name.match(SUPPORT_FORMAT) | |
[f.name] + f.name.scan(/(\d+)(\w?)/).map do |num, letter| | |
"0000000000000000#{num}"[-16,16]+(letter.empty? ? " " : letter) | |
end | |
else | |
nil | |
end | |
end | |
sorted_images = images.compact.sort do |a, b| | |
flg = nil | |
[a.length, b.length].min.times do |i| | |
flg = a[i+1]<=>b[i+1] if flg.nil? && a[i+1]!=b[i+1] | |
end | |
flg = 0 if flg.nil? | |
flg | |
end # sorted_images | |
images = [] | |
sorted_images.each_with_index do |name, i| | |
nname = name.first.gsub(/[^\w\/\.\-]/, '_') | |
$log.debug name.first | |
image = Magick::Image.from_blob(zip.fopen(name.first).read)[0] | |
if image.rows > image.columns | |
# comment out if like to change contrast | |
# image.contrast#.contrast.contrast | |
image.fuzz = '60%' | |
im = image.trim() | |
# trimed | |
image = im unless (im.rows == 1) | |
image.resize_to_fit!(COL, ROW) | |
epub.add_or_replace_buffer "OEBPS/#{nname}", image.to_blob | |
epub.add_or_replace_buffer "OEBPS/#{i}.xhtml", page_xhtml(nname, nname, image.columns, image.rows) | |
$log.debug "added #{nname}" | |
xhtmls << ["#{i}.xhtml", nname] | |
else | |
# split if landscape image | |
image.resize_to_fit!(COL*2, ROW) | |
[0, 1].each{|a| | |
d = image.crop((a>0)?0:(image.columns/2).floor, 0, image.columns/2, image.rows, true) | |
d.fuzz = '60%' | |
im = d.trim() | |
d = im unless (im.rows == 1) | |
d.resize_to_fit!(COL, ROW) | |
dname = nname.sub(/(\.[^\.]+)$/){"_#{a+1}#$1"} | |
epub.add_or_replace_buffer "OEBPS/#{dname}", d.to_blob | |
epub.add_or_replace_buffer "OEBPS/#{i}_#{a}.xhtml", page_xhtml(dname, dname, d.columns, d.rows) | |
$log.debug dname | |
xhtmls << ["#{i}_#{a}.xhtml", dname] | |
d.destroy! | |
i+=1 | |
break if i == 2 | |
} | |
end | |
image.destroy! | |
run_gc | |
end | |
end # open zipFile | |
end # open path | |
epub.add_or_replace_buffer "OEBPS/content.opf", content_opf(xhtmls) | |
epub.add_or_replace_buffer "OEBPS/toc.ncx", toc_ncx(xhtmls) | |
end # open epub | |
end | |
def run_gc | |
fDisabled = GC.enable | |
GC.start | |
GC.disable if fDisabled | |
end | |
def container_xml | |
<<__XML__ | |
<?xml version="1.0"?> | |
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container"> | |
<rootfiles> | |
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" /> | |
</rootfiles> | |
</container> | |
__XML__ | |
end | |
def content_opf(files) | |
<<__XML__ | |
<?xml version="1.0"?> | |
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0"> | |
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf"> | |
<dc:title>#{@title.toutf8}</dc:title> | |
<dc:creator opf:role="aut">#{@author.toutf8}</dc:creator> | |
<dc:language>ja</dc:language> | |
<dc:identifier id="BookID" opf:scheme="URL">http://example.com/epub/sample/sample1.epub</dc:identifier> | |
</metadata> | |
<manifest> | |
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/> | |
#{files.map{|f| ' <item id="%s" href="%s" media-type="application/xhtml+xml"/>' % [CGI.escapeHTML(f[0]), CGI.escapeHTML(f[0])]}.join("\n")} | |
</manifest> | |
<spine toc="ncx"> | |
#{files.map{|f| ' <itemref idref="%s"/>' % CGI.escapeHTML(f[0])}.join("\n")} | |
</spine> | |
</package> | |
__XML__ | |
end | |
def toc_ncx(files) | |
xml = <<__XML__ | |
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN" | |
"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd"> | |
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" dir="rtl"> | |
<head> | |
<meta name="dtb:uid" | |
content="http://example.com/epub/sample/sample1.epub"/> | |
<meta name="dtb:depth" content="1"/> | |
<meta name="dtb:totalPageCount" content="0"/> | |
<meta name="dtb:maxPageNumber" content="0"/> | |
</head> | |
<docTitle> | |
<text>sample</text> | |
</docTitle> | |
<navMap> | |
__XML__ | |
files.each_with_index do |f, i| | |
xml += <<__XML__ | |
<navPoint id="navPoint-#{i+1}" playOrder="#{i+1}"> | |
<navLabel> | |
<text>#{CGI.escapeHTML(f[1].to_s)}</text> | |
</navLabel> | |
<content src="#{CGI.escapeHTML(f[0])}"/> | |
</navPoint> | |
__XML__ | |
end | |
xml += <<__XML__ | |
</navMap> | |
</ncx> | |
__XML__ | |
end | |
def page_xhtml(title, img, w, h) | |
<<__XML__ | |
<?xml version="1.0"?> | |
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en"> | |
<head> | |
<title>#{CGI.escapeHTML title}</title> | |
<style> | |
img { vertical-align: middle; } | |
</style> | |
</head> | |
<body><img src="#{CGI.escapeHTML img}" width="#{CGI.escapeHTML w.to_s}" height="#{CGI.escapeHTML h.to_s}"/></body> | |
</html> | |
__XML__ | |
end | |
def makemobi(zip_path) | |
@title = File.basename(zip_path, ".zip") | |
epub_path = "#{MOBI_DIR}#{@title}.epub" | |
mobi_path = "#{MOBI_DIR}#{@title}.mobi" | |
$log.debug "title: #{@title}" | |
@author = get_author(@title) | |
$log.debug "convert to #{epub_path}" | |
$log.debug "convert to #{mobi_path}" | |
convertZip(zip_path, epub_path) unless File.exist?(epub_path) | |
command = "#{SCRIPTS_DIR}kindlegen \"#{epub_path}\" -unicode" | |
$log.debug command | |
$log.debug `#{command}` unless File.exist?(mobi_path) | |
end | |
def get_author(title) | |
$log.debug "get author: #{title}" | |
title = title.sub(/\s*(第\d+(巻|部)|(part\s*)?\d+)$/i, '') | |
$log.debug "search: #{title}" | |
request = Request.new(AWS_KEY_ID, ASSOCIATES_ID, 'jp', false) | |
request.config['secret_key_id'] = AWS_SECRET | |
il = ItemSearch.new('Books', {'Title' => title}) | |
response = request.search(il) | |
response.item_search_response[0].items.item.each do |i| | |
author = i.item_attributes.author if i.item_attributes.author | |
$log.debug author.to_s | |
return author if author | |
end | |
return "noname" | |
end | |
end | |
if ARGV.length>0 | |
start = Time.now | |
zip = ARGV[0] | |
$log.debug zip | |
begin | |
z2m = Zip2mobi.new() | |
z2m.makemobi(zip) | |
# settings to get author of the book | |
rescue => error | |
$log.error $! | |
epub_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.epub" | |
mobi_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.mobi" | |
File.delete(epub_path) if File.exists?(epub_path) | |
File.delete(mobi_path) if File.exists?(mobi_path) | |
end | |
cost = Time.now - start | |
min = cost.divmod(60)[0].to_i | |
sec = cost.divmod(60)[1].to_i | |
$log.debug "zip2mobi cost: #{min}min #{sec}sec" | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment