Skip to content

Instantly share code, notes, and snippets.

@oquno
Created November 22, 2010 08:53
Show Gist options
  • Save oquno/709699 to your computer and use it in GitHub Desktop.
Save oquno/709699 to your computer and use it in GitHub Desktop.
zip2mobi
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'cgi'
require 'open-uri'
require 'rubygems'
require 'zipruby'
require 'RMagick'
require 'kconv'
require 'logger'
require 'amazon/aws'
require 'amazon/aws/search'
ENV['AMAZONRCDIR'] = './'
# settings to get author of the book
ASSOCIATES_ID = "YOUR_ID"
AWS_SECRET = 'YOUR_SECRET'
AWS_KEY_ID = "YOUR_AWS_KEY_ID"
# includes zip2mobi.rb, log/*, kindlegen
SCRIPTS_DIR = "/path/to/scripts/dir/"
# mobi and epub files are saved
MOBI_DIR = "/path/to/mobidir/"
# image file format to convert
SUPPORT_FORMAT = /\.(png|jpg|gif)$/
# image size
ROW = 1024
COL = 768
# log setting
$log = Logger.new("#{SCRIPTS_DIR}log/zip2mobi.log", 'daily')
$log.level = Logger::DEBUG
class Zip2mobi
include Amazon::AWS
include Amazon::AWS::Search
def convertZip(path, epubname)
Zip::Archive.open(epubname, Zip::CREATE, Zip::NO_COMPRESSION) do |epub|
epub.add_or_replace_buffer "mimetype", "application/epub+zip"
end
Zip::Archive.open(epubname) do |epub|
epub.add_or_replace_buffer "META-INF/container.xml", container_xml
xhtmls = []
open(path) do | zipFile |
Zip::Archive.open_buffer(zipFile.read) do |zip|
images = zip.map do |f|
if f.name.match(SUPPORT_FORMAT)
[f.name] + f.name.scan(/(\d+)(\w?)/).map do |num, letter|
"0000000000000000#{num}"[-16,16]+(letter.empty? ? " " : letter)
end
else
nil
end
end
sorted_images = images.compact.sort do |a, b|
flg = nil
[a.length, b.length].min.times do |i|
flg = a[i+1]<=>b[i+1] if flg.nil? && a[i+1]!=b[i+1]
end
flg = 0 if flg.nil?
flg
end # sorted_images
images = []
sorted_images.each_with_index do |name, i|
nname = name.first.gsub(/[^\w\/\.\-]/, '_')
$log.debug name.first
image = Magick::Image.from_blob(zip.fopen(name.first).read)[0]
if image.rows > image.columns
# comment out if like to change contrast
# image.contrast#.contrast.contrast
image.fuzz = '60%'
im = image.trim()
# trimed
image = im unless (im.rows == 1)
image.resize_to_fit!(COL, ROW)
epub.add_or_replace_buffer "OEBPS/#{nname}", image.to_blob
epub.add_or_replace_buffer "OEBPS/#{i}.xhtml", page_xhtml(nname, nname, image.columns, image.rows)
$log.debug "added #{nname}"
xhtmls << ["#{i}.xhtml", nname]
else
# split if landscape image
image.resize_to_fit!(COL*2, ROW)
[0, 1].each{|a|
d = image.crop((a>0)?0:(image.columns/2).floor, 0, image.columns/2, image.rows, true)
d.fuzz = '60%'
im = d.trim()
d = im unless (im.rows == 1)
d.resize_to_fit!(COL, ROW)
dname = nname.sub(/(\.[^\.]+)$/){"_#{a+1}#$1"}
epub.add_or_replace_buffer "OEBPS/#{dname}", d.to_blob
epub.add_or_replace_buffer "OEBPS/#{i}_#{a}.xhtml", page_xhtml(dname, dname, d.columns, d.rows)
$log.debug dname
xhtmls << ["#{i}_#{a}.xhtml", dname]
d.destroy!
i+=1
break if i == 2
}
end
image.destroy!
run_gc
end
end # open zipFile
end # open path
epub.add_or_replace_buffer "OEBPS/content.opf", content_opf(xhtmls)
epub.add_or_replace_buffer "OEBPS/toc.ncx", toc_ncx(xhtmls)
end # open epub
end
def run_gc
fDisabled = GC.enable
GC.start
GC.disable if fDisabled
end
def container_xml
<<__XML__
<?xml version="1.0"?>
<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
<rootfiles>
<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" />
</rootfiles>
</container>
__XML__
end
def content_opf(files)
<<__XML__
<?xml version="1.0"?>
<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">
<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
<dc:title>#{@title.toutf8}</dc:title>
<dc:creator opf:role="aut">#{@author.toutf8}</dc:creator>
<dc:language>ja</dc:language>
<dc:identifier id="BookID" opf:scheme="URL">http://example.com/epub/sample/sample1.epub</dc:identifier>
</metadata>
<manifest>
<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
#{files.map{|f| ' <item id="%s" href="%s" media-type="application/xhtml+xml"/>' % [CGI.escapeHTML(f[0]), CGI.escapeHTML(f[0])]}.join("\n")}
</manifest>
<spine toc="ncx">
#{files.map{|f| ' <itemref idref="%s"/>' % CGI.escapeHTML(f[0])}.join("\n")}
</spine>
</package>
__XML__
end
def toc_ncx(files)
xml = <<__XML__
<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" dir="rtl">
<head>
<meta name="dtb:uid"
content="http://example.com/epub/sample/sample1.epub"/>
<meta name="dtb:depth" content="1"/>
<meta name="dtb:totalPageCount" content="0"/>
<meta name="dtb:maxPageNumber" content="0"/>
</head>
<docTitle>
<text>sample</text>
</docTitle>
<navMap>
__XML__
files.each_with_index do |f, i|
xml += <<__XML__
<navPoint id="navPoint-#{i+1}" playOrder="#{i+1}">
<navLabel>
<text>#{CGI.escapeHTML(f[1].to_s)}</text>
</navLabel>
<content src="#{CGI.escapeHTML(f[0])}"/>
</navPoint>
__XML__
end
xml += <<__XML__
</navMap>
</ncx>
__XML__
end
def page_xhtml(title, img, w, h)
<<__XML__
<?xml version="1.0"?>
<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
<head>
<title>#{CGI.escapeHTML title}</title>
<style>
img { vertical-align: middle; }
</style>
</head>
<body><img src="#{CGI.escapeHTML img}" width="#{CGI.escapeHTML w.to_s}" height="#{CGI.escapeHTML h.to_s}"/></body>
</html>
__XML__
end
def makemobi(zip_path)
@title = File.basename(zip_path, ".zip")
epub_path = "#{MOBI_DIR}#{@title}.epub"
mobi_path = "#{MOBI_DIR}#{@title}.mobi"
$log.debug "title: #{@title}"
@author = get_author(@title)
$log.debug "convert to #{epub_path}"
$log.debug "convert to #{mobi_path}"
convertZip(zip_path, epub_path) unless File.exist?(epub_path)
command = "#{SCRIPTS_DIR}kindlegen \"#{epub_path}\" -unicode"
$log.debug command
$log.debug `#{command}` unless File.exist?(mobi_path)
end
def get_author(title)
$log.debug "get author: #{title}"
title = title.sub(/\s*(第\d+(巻|部)|(part\s*)?\d+)$/i, '')
$log.debug "search: #{title}"
request = Request.new(AWS_KEY_ID, ASSOCIATES_ID, 'jp', false)
request.config['secret_key_id'] = AWS_SECRET
il = ItemSearch.new('Books', {'Title' => title})
response = request.search(il)
response.item_search_response[0].items.item.each do |i|
author = i.item_attributes.author if i.item_attributes.author
$log.debug author.to_s
return author if author
end
return "noname"
end
end
if ARGV.length>0
start = Time.now
zip = ARGV[0]
$log.debug zip
begin
z2m = Zip2mobi.new()
z2m.makemobi(zip)
# settings to get author of the book
rescue => error
$log.error $!
epub_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.epub"
mobi_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.mobi"
File.delete(epub_path) if File.exists?(epub_path)
File.delete(mobi_path) if File.exists?(mobi_path)
end
cost = Time.now - start
min = cost.divmod(60)[0].to_i
sec = cost.divmod(60)[1].to_i
$log.debug "zip2mobi cost: #{min}min #{sec}sec"
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment