Skip to content

Instantly share code, notes, and snippets.

@dkam
Last active April 10, 2025 01:09
Show Gist options
  • Save dkam/35a5197e79bc69047f0eb3a2f083588d to your computer and use it in GitHub Desktop.
Save dkam/35a5197e79bc69047f0eb3a2f083588d to your computer and use it in GitHub Desktop.
Read and write ePub metadata
#!/usr/bin/env ruby
require 'bundler/inline'
#gemfile do
# source 'https://rubygems.org'
# gem 'zip'
# gem 'nokogiri'
# gem 'tty-prompt'
# gem 'debug'
#end
require 'zip'
require 'nokogiri'
require 'tty-prompt'
require 'debug'
class Epub
def initialize(file)
@file = file
end
def read_epub
Zip::File.open(@file) do |zip_file|
opf_name = find_opf_file(zip_file)
opf_entry = zip_file.find_entry(opf_name)
opf_content = opf_entry.get_input_stream.read
opf_doc = Nokogiri::XML(opf_content)
yield zip_file, opf_doc, opf_name if block_given?
end
end
def write_epub
read_epub do |zip_file, opf_doc, opf_name|
yield zip_file, opf_doc, opf_name if block_given?
zip_file.get_output_stream(opf_name) do |output_stream|
output_stream.write(opf_doc.to_xml)
end
end
end
def opf
read_epub do |zip_file, opf_doc, opf_name|
opf_doc
end
end
def series
read_epub do |zip_file, opf_doc, opf_name|
series_element = opf_doc.at_css('meta[name="calibre:series"]')
series_element["content"] if series_element
end
end
def series=(new_series_name)
write_epub do |zip_file, opf_doc, opf_name|
series_element = opf_doc.at_css('meta[name="calibre:series"]')
if series_element && new_series_name.nil?
# Delete the <meta> element for the series
series_element.remove
elsif series_element
# Update the content of the <meta> element
series_element['content'] = new_series_name
else
# Create a new <meta> element for the series if it doesn't exist
opf_namespace_url = "http://www.idpf.org/2007/opf"
metadata_element = opf_doc.at_css("metadata, opf|metadata", 'opf' => opf_namespace_url)
series_element = Nokogiri::XML::Node.new('meta', opf_doc)
series_element['name'] = 'calibre:series'
series_element['content'] = new_series_name
metadata_element.add_child(series_element)
end
end
end
def series_index
read_epub do |zip_file, opf_doc, opf_name|
series_index = opf_doc.at_css('meta[name="calibre:series_index"]')
series_index['content'] if series_index
end
end
def series_index=(new_si)
write_epub do |zip_file, opf_doc, opf_name|
series_index = opf_doc.at_css('meta[name="calibre:series_index"]')
old_si = series_index['content'] if series_index
if series_index && new_si.nil?
series_index.remove
elsif series_index
series_index['content'] = new_si
else
opf_namespace_url = "http://www.idpf.org/2007/opf"
metadata_element = opf_doc.at_css("metadata, opf|metadata", 'opf' => opf_namespace_url)
si_element = Nokogiri::XML::Node.new('meta', opf_doc)
si_element['name'] = 'calibre:series_index'
si_element['content'] = new_si
metadata_element.add_child(si_element)
end
end
end
def find_opf_file(zip_file)
# Locate and read the container.xml file
container_xml_entry = zip_file.find_entry('META-INF/container.xml')
container_xml = container_xml_entry.get_input_stream.read
# Parse the container.xml file
doc = Nokogiri::XML(container_xml)
# Find the rootfile element and extract the 'full-path' attribute
# doc.at_xpath("xmlns:container/xmlns:rootfiles/xmlns:rootfile")['full-path']
ns = doc.root.namespaces
doc.at_xpath("//#{ns.key?('xmlns') ? 'xmlns:' : ''}rootfile", ns)['full-path']
end
end
def run(argv)
files = []
action = nil
attribute = nil
new_value = nil
argv.each_with_index do |arg, index|
if arg == 'get' || arg == 'set'
action = arg.to_sym
attribute = argv[index + 1]
new_value = argv[index + 2] if action == :set
elsif ["series", "series_index"].include?(arg)
next
else
files << arg if File.exist?(arg)
end
end
raise ArgumentError.new("Action must be :get or :set") unless [:get, :set].include?(action)
raise ArgumentError.new("Attribute must be 'series' or 'series_index'") unless ['series', 'series_index'].include?(attribute)
raise ArgumentError.new("No files provideded") if files.empty?
files.sort!
if attribute == 'series'
files.each do |file|
epub = Epub.new(file)
if action == :get
puts "#{file} : #{epub.series}"
else # action == :set
old_series = epub.series
epub.series = new_value
puts "#{file} : #{old_series} => #{new_value}"
end
end
elsif attribute == 'series_index'
files.each do |file|
epub = Epub.new(file)
if action == :get
puts "#{file} : #{epub.series_index}"
else # action == :set
old_index = epub.series_index
epub.series_index = new_value
puts "#{file} : #{old_index} => #{new_value}"
end
end
end
end
if __FILE__ == $PROGRAM_NAME
run(ARGV)
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment