oquno · November 22, 2010 08:53
diff --git a/zip2mobi.rb b/zip2mobi.rb
 #!/usr/bin/env ruby
 # -*- coding: utf-8 -*-
 require 'cgi'
 require 'open-uri'
 require 'rubygems'
 require 'zipruby'
 require 'RMagick'
 require 'kconv'
 require 'logger'
 require 'amazon/aws'
 require 'amazon/aws/search'

 ENV['AMAZONRCDIR'] = './'
 # settings to get author of the book
 ASSOCIATES_ID = "YOUR_ID"
 AWS_SECRET = 'YOUR_SECRET'
 AWS_KEY_ID = "YOUR_AWS_KEY_ID"
 # includes zip2mobi.rb, log/*, kindlegen
 SCRIPTS_DIR = "/path/to/scripts/dir/"
 # mobi and epub files are saved
 MOBI_DIR = "/path/to/mobidir/"
 # image file format to convert
 SUPPORT_FORMAT = /\.(png|jpg|gif)$/
 # image size
 ROW = 1024
 COL = 768
 # log setting
 $log = Logger.new("#{SCRIPTS_DIR}log/zip2mobi.log", 'daily')
 $log.level = Logger::DEBUG

 class Zip2mobi
 include Amazon::AWS
 include Amazon::AWS::Search

 def convertZip(path, epubname)
  Zip::Archive.open(epubname, Zip::CREATE, Zip::NO_COMPRESSION) do |epub|
    epub.add_or_replace_buffer "mimetype", "application/epub+zip"
  end
  Zip::Archive.open(epubname) do |epub|
    epub.add_or_replace_buffer "META-INF/container.xml", container_xml
    xhtmls = []
    open(path) do | zipFile |
      Zip::Archive.open_buffer(zipFile.read) do |zip|
        images = zip.map do |f|
          if f.name.match(SUPPORT_FORMAT)
            [f.name] + f.name.scan(/(\d+)(\w?)/).map do |num, letter|
              "0000000000000000#{num}"[-16,16]+(letter.empty? ? " " : letter)
            end
          else
            nil
          end
        end
        sorted_images = images.compact.sort do |a, b|
          flg = nil
          [a.length, b.length].min.times do |i|
            flg = a[i+1]<=>b[i+1] if flg.nil? && a[i+1]!=b[i+1]
          end
          flg = 0 if flg.nil?
          flg
        end # sorted_images
        images = []
        sorted_images.each_with_index do |name, i|
          nname = name.first.gsub(/[^\w\/\.\-]/, '_')
          $log.debug name.first
          image = Magick::Image.from_blob(zip.fopen(name.first).read)[0]
          if image.rows > image.columns
              # comment out if like to change contrast
              # image.contrast#.contrast.contrast
              image.fuzz = '60%'
              im = image.trim()
              # trimed
              image = im unless (im.rows == 1)
              image.resize_to_fit!(COL, ROW)
              epub.add_or_replace_buffer "OEBPS/#{nname}", image.to_blob
              epub.add_or_replace_buffer "OEBPS/#{i}.xhtml", page_xhtml(nname, nname, image.columns, image.rows)
              $log.debug "added #{nname}"
              xhtmls << ["#{i}.xhtml", nname]
          else
              # split if landscape image
              image.resize_to_fit!(COL*2, ROW)
              [0, 1].each{|a|
                  d = image.crop((a>0)?0:(image.columns/2).floor, 0, image.columns/2, image.rows, true)
                  d.fuzz = '60%'
                  im = d.trim()
                  d = im unless (im.rows == 1)
                  d.resize_to_fit!(COL, ROW)
                  dname = nname.sub(/(\.[^\.]+)$/){"_#{a+1}#$1"}
                  epub.add_or_replace_buffer "OEBPS/#{dname}", d.to_blob
                  epub.add_or_replace_buffer "OEBPS/#{i}_#{a}.xhtml", page_xhtml(dname, dname, d.columns, d.rows)
                  $log.debug dname
                  xhtmls << ["#{i}_#{a}.xhtml", dname]
                  d.destroy!
                  i+=1
                  break if i == 2
              }
          end
          image.destroy!
          run_gc
        end
      end # open zipFile
    end # open path
    epub.add_or_replace_buffer "OEBPS/content.opf", content_opf(xhtmls)
    epub.add_or_replace_buffer "OEBPS/toc.ncx", toc_ncx(xhtmls)
  end # open epub
 end

 def run_gc
    fDisabled = GC.enable
    GC.start
    GC.disable if fDisabled
 end

 def container_xml
  <<__XML__
 <?xml version="1.0"?>
 <container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
  <rootfiles>
    <rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" />
  </rootfiles>
 </container>
 __XML__
 end

 def content_opf(files)

  <<__XML__
 <?xml version="1.0"?>
 <package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">
    <metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
        <dc:title>#{@title.toutf8}</dc:title>
        <dc:creator opf:role="aut">#{@author.toutf8}</dc:creator>
        <dc:language>ja</dc:language>
        <dc:identifier id="BookID" opf:scheme="URL">http://example.com/epub/sample/sample1.epub</dc:identifier>
    </metadata>
    <manifest>
        <item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
 #{files.map{|f| '        <item id="%s" href="%s" media-type="application/xhtml+xml"/>' % [CGI.escapeHTML(f[0]), CGI.escapeHTML(f[0])]}.join("\n")}
    </manifest>
    <spine toc="ncx">
 #{files.map{|f| '        <itemref idref="%s"/>' % CGI.escapeHTML(f[0])}.join("\n")}
    </spine>
 </package>
 __XML__
 end

 def toc_ncx(files)
  xml = <<__XML__
 <!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
   "http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
 <ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" dir="rtl">
    <head>
        <meta name="dtb:uid"
              content="http://example.com/epub/sample/sample1.epub"/>
        <meta name="dtb:depth" content="1"/>
        <meta name="dtb:totalPageCount" content="0"/>
        <meta name="dtb:maxPageNumber" content="0"/>
    </head>
    <docTitle>
        <text>sample</text>
    </docTitle>
    <navMap>
 __XML__
  files.each_with_index do |f, i|
    xml += <<__XML__
        <navPoint id="navPoint-#{i+1}" playOrder="#{i+1}">
            <navLabel>
                <text>#{CGI.escapeHTML(f[1].to_s)}</text>
            </navLabel>
            <content src="#{CGI.escapeHTML(f[0])}"/>
        </navPoint>
 __XML__
  end
  xml += <<__XML__
    </navMap>
 </ncx>
 __XML__
 end

 def page_xhtml(title, img, w, h)

  <<__XML__
 <?xml version="1.0"?>
 <html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
 <head>
 <title>#{CGI.escapeHTML title}</title>
 <style>
 img { vertical-align: middle; }
 </style>
 </head>
 <body><img src="#{CGI.escapeHTML img}" width="#{CGI.escapeHTML w.to_s}" height="#{CGI.escapeHTML h.to_s}"/></body>
 </html>
 __XML__
 end

 def makemobi(zip_path)
    @title = File.basename(zip_path, ".zip")
    epub_path = "#{MOBI_DIR}#{@title}.epub"
    mobi_path = "#{MOBI_DIR}#{@title}.mobi"
    $log.debug "title: #{@title}"
    @author = get_author(@title)
    $log.debug "convert to #{epub_path}"
    $log.debug "convert to #{mobi_path}"
    convertZip(zip_path, epub_path) unless File.exist?(epub_path)
    command = "#{SCRIPTS_DIR}kindlegen \"#{epub_path}\" -unicode"
    $log.debug command
    $log.debug `#{command}` unless File.exist?(mobi_path)
 end

 def get_author(title)
    $log.debug "get author: #{title}"
    title = title.sub(/\s*(第\d+(巻|部)|(part\s*)?\d+)$/i, '')
    $log.debug "search: #{title}"
    request = Request.new(AWS_KEY_ID, ASSOCIATES_ID, 'jp', false)
    request.config['secret_key_id'] = AWS_SECRET
    il = ItemSearch.new('Books', {'Title' => title})
    response = request.search(il)
    response.item_search_response[0].items.item.each do |i|
        author = i.item_attributes.author if i.item_attributes.author
        $log.debug author.to_s
        return author if author
    end
    return "noname"
 end
 end

 if ARGV.length>0
    start = Time.now
    zip = ARGV[0]
    $log.debug zip
    begin
        z2m = Zip2mobi.new()
        z2m.makemobi(zip)
        # settings to get author of the book
    rescue => error
        $log.error $!
        epub_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.epub"
        mobi_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.mobi"
        File.delete(epub_path) if File.exists?(epub_path)
        File.delete(mobi_path) if File.exists?(mobi_path)
    end
    cost = Time.now - start
    min = cost.divmod(60)[0].to_i
    sec = cost.divmod(60)[1].to_i
    $log.debug "zip2mobi cost: #{min}min #{sec}sec"
 end
	#!/usr/bin/env ruby
	# -- coding: utf-8 --
	require 'cgi'
	require 'open-uri'
	require 'rubygems'
	require 'zipruby'
	require 'RMagick'
	require 'kconv'
	require 'logger'
	require 'amazon/aws'
	require 'amazon/aws/search'

	ENV['AMAZONRCDIR'] = './'
	# settings to get author of the book
	ASSOCIATES_ID = "YOUR_ID"
	AWS_SECRET = 'YOUR_SECRET'
	AWS_KEY_ID = "YOUR_AWS_KEY_ID"
	# includes zip2mobi.rb, log/*, kindlegen
	SCRIPTS_DIR = "/path/to/scripts/dir/"
	# mobi and epub files are saved
	MOBI_DIR = "/path/to/mobidir/"
	# image file format to convert
	SUPPORT_FORMAT = /\.(png\|jpg\|gif)$/
	# image size
	ROW = 1024
	COL = 768
	# log setting
	$log = Logger.new("#{SCRIPTS_DIR}log/zip2mobi.log", 'daily')
	$log.level = Logger::DEBUG

	class Zip2mobi
	include Amazon::AWS
	include Amazon::AWS::Search

	def convertZip(path, epubname)
	Zip::Archive.open(epubname, Zip::CREATE, Zip::NO_COMPRESSION) do \|epub\|
	epub.add_or_replace_buffer "mimetype", "application/epub+zip"
	end
	Zip::Archive.open(epubname) do \|epub\|
	epub.add_or_replace_buffer "META-INF/container.xml", container_xml
	xhtmls = []
	open(path) do \| zipFile \|
	Zip::Archive.open_buffer(zipFile.read) do \|zip\|
	images = zip.map do \|f\|
	if f.name.match(SUPPORT_FORMAT)
	[f.name] + f.name.scan(/(\d+)(\w?)/).map do \|num, letter\|
	"0000000000000000#{num}"[-16,16]+(letter.empty? ? " " : letter)
	end
	else
	nil
	end
	end
	sorted_images = images.compact.sort do \|a, b\|
	flg = nil
	[a.length, b.length].min.times do \|i\|
	flg = a[i+1]<=>b[i+1] if flg.nil? && a[i+1]!=b[i+1]
	end
	flg = 0 if flg.nil?
	flg
	end # sorted_images
	images = []
	sorted_images.each_with_index do \|name, i\|
	nname = name.first.gsub(/[^\w\/\.\-]/, '_')
	$log.debug name.first
	image = Magick::Image.from_blob(zip.fopen(name.first).read)[0]
	if image.rows > image.columns
	# comment out if like to change contrast
	# image.contrast#.contrast.contrast
	image.fuzz = '60%'
	im = image.trim()
	# trimed
	image = im unless (im.rows == 1)
	image.resize_to_fit!(COL, ROW)
	epub.add_or_replace_buffer "OEBPS/#{nname}", image.to_blob
	epub.add_or_replace_buffer "OEBPS/#{i}.xhtml", page_xhtml(nname, nname, image.columns, image.rows)
	$log.debug "added #{nname}"
	xhtmls << ["#{i}.xhtml", nname]
	else
	# split if landscape image
	image.resize_to_fit!(COL*2, ROW)
	[0, 1].each{\|a\|
	d = image.crop((a>0)?0:(image.columns/2).floor, 0, image.columns/2, image.rows, true)
	d.fuzz = '60%'
	im = d.trim()
	d = im unless (im.rows == 1)
	d.resize_to_fit!(COL, ROW)
	dname = nname.sub(/(\.[^\.]+)$/){"_#{a+1}#$1"}
	epub.add_or_replace_buffer "OEBPS/#{dname}", d.to_blob
	epub.add_or_replace_buffer "OEBPS/#{i}_#{a}.xhtml", page_xhtml(dname, dname, d.columns, d.rows)
	$log.debug dname
	xhtmls << ["#{i}_#{a}.xhtml", dname]
	d.destroy!
	i+=1
	break if i == 2
	}
	end
	image.destroy!
	run_gc
	end
	end # open zipFile
	end # open path
	epub.add_or_replace_buffer "OEBPS/content.opf", content_opf(xhtmls)
	epub.add_or_replace_buffer "OEBPS/toc.ncx", toc_ncx(xhtmls)
	end # open epub
	end

	def run_gc
	fDisabled = GC.enable
	GC.start
	GC.disable if fDisabled
	end

	def container_xml
	<<__XML__
	<?xml version="1.0"?>
	<container version="1.0" xmlns="urn:oasis:names:tc:opendocument:xmlns:container">
	<rootfiles>
	<rootfile full-path="OEBPS/content.opf" media-type="application/oebps-package+xml" />
	</rootfiles>
	</container>
	__XML__
	end

	def content_opf(files)

	<<__XML__
	<?xml version="1.0"?>
	<package xmlns="http://www.idpf.org/2007/opf" unique-identifier="BookID" version="2.0">
	<metadata xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:opf="http://www.idpf.org/2007/opf">
	<dc:title>#{@title.toutf8}</dc:title>
	<dc:creator opf:role="aut">#{@author.toutf8}</dc:creator>
	<dc:language>ja</dc:language>
	<dc:identifier id="BookID" opf:scheme="URL">http://example.com/epub/sample/sample1.epub</dc:identifier>
	</metadata>
	<manifest>
	<item id="ncx" href="toc.ncx" media-type="application/x-dtbncx+xml"/>
	#{files.map{\|f\| ' <item id="%s" href="%s" media-type="application/xhtml+xml"/>' % [CGI.escapeHTML(f[0]), CGI.escapeHTML(f[0])]}.join("\n")}
	</manifest>
	<spine toc="ncx">
	#{files.map{\|f\| ' <itemref idref="%s"/>' % CGI.escapeHTML(f[0])}.join("\n")}
	</spine>
	</package>
	__XML__
	end

	def toc_ncx(files)
	xml = <<__XML__
	<!DOCTYPE ncx PUBLIC "-//NISO//DTD ncx 2005-1//EN"
	"http://www.daisy.org/z3986/2005/ncx-2005-1.dtd">
	<ncx xmlns="http://www.daisy.org/z3986/2005/ncx/" version="2005-1" dir="rtl">
	<head>
	<meta name="dtb:uid"
	content="http://example.com/epub/sample/sample1.epub"/>
	<meta name="dtb:depth" content="1"/>
	<meta name="dtb:totalPageCount" content="0"/>
	<meta name="dtb:maxPageNumber" content="0"/>
	</head>
	<docTitle>
	<text>sample</text>
	</docTitle>
	<navMap>
	__XML__
	files.each_with_index do \|f, i\|
	xml += <<__XML__
	<navPoint id="navPoint-#{i+1}" playOrder="#{i+1}">
	<navLabel>
	<text>#{CGI.escapeHTML(f[1].to_s)}</text>
	</navLabel>
	<content src="#{CGI.escapeHTML(f[0])}"/>
	</navPoint>
	__XML__
	end
	xml += <<__XML__
	</navMap>
	</ncx>
	__XML__
	end

	def page_xhtml(title, img, w, h)

	<<__XML__
	<?xml version="1.0"?>
	<html xmlns="http://www.w3.org/1999/xhtml" lang="en" xml:lang="en">
	<head>
	<title>#{CGI.escapeHTML title}</title>
	<style>
	img { vertical-align: middle; }
	</style>
	</head>
	<body><img src="#{CGI.escapeHTML img}" width="#{CGI.escapeHTML w.to_s}" height="#{CGI.escapeHTML h.to_s}"/></body>
	</html>
	__XML__
	end

	def makemobi(zip_path)
	@title = File.basename(zip_path, ".zip")
	epub_path = "#{MOBI_DIR}#{@title}.epub"
	mobi_path = "#{MOBI_DIR}#{@title}.mobi"
	$log.debug "title: #{@title}"
	@author = get_author(@title)
	$log.debug "convert to #{epub_path}"
	$log.debug "convert to #{mobi_path}"
	convertZip(zip_path, epub_path) unless File.exist?(epub_path)
	command = "#{SCRIPTS_DIR}kindlegen \"#{epub_path}\" -unicode"
	$log.debug command
	$log.debug `#{command}` unless File.exist?(mobi_path)
	end

	def get_author(title)
	$log.debug "get author: #{title}"
	title = title.sub(/\s(第\d+(巻\|部)\|(part\s)?\d+)$/i, '')
	$log.debug "search: #{title}"
	request = Request.new(AWS_KEY_ID, ASSOCIATES_ID, 'jp', false)
	request.config['secret_key_id'] = AWS_SECRET
	il = ItemSearch.new('Books', {'Title' => title})
	response = request.search(il)
	response.item_search_response[0].items.item.each do \|i\|
	author = i.item_attributes.author if i.item_attributes.author
	$log.debug author.to_s
	return author if author
	end
	return "noname"
	end
	end

	if ARGV.length>0
	start = Time.now
	zip = ARGV[0]
	$log.debug zip
	begin
	z2m = Zip2mobi.new()
	z2m.makemobi(zip)
	# settings to get author of the book
	rescue => error
	$log.error $!
	epub_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.epub"
	mobi_path = "#{MOBI_DIR}#{File.basename(zip, ".zip")}.mobi"
	File.delete(epub_path) if File.exists?(epub_path)
	File.delete(mobi_path) if File.exists?(mobi_path)
	end
	cost = Time.now - start
	min = cost.divmod(60)[0].to_i
	sec = cost.divmod(60)[1].to_i
	$log.debug "zip2mobi cost: #{min}min #{sec}sec"
	end