mkuendig · December 31, 2016 02:36
diff --git a/Gemfile b/Gemfile
 source 'https://rubygems.org'

 gem 'nokogiri', '~>1.6'
 gem 'aws-sdk', '~> 2'
 gem 'open-uri'
 gem 'ruby-progressbar'
 gem "mp3info"
 gem 'streamio-ffmpeg'
diff --git a/polly_synthesizer.rb b/polly_synthesizer.rb
 # Goal of this ruby script is to input text (like a text file you exported from an epub in Calibre) in to AWS Polly, 
 # create an Audiobook with chapters and Subtitles.
 # Subtitles are not correctly working yet
 # This is run on OSX with ffmpeg and MP4Box tools

 require 'aws-sdk'
 require 'nokogiri'
 require 'open-uri'
 require 'ruby-progressbar'
 require "mp3info"
 require 'streamio-ffmpeg'


 album_title = "albumtitle"
 album_artist ="albumartist"
 album_genre = "albumgenre"
 album_year = "albumyear"

 class Synthesizer
 	def initialize(region='us-west-2')
    @polly = Aws::Polly::Client.new(region: region)
 	end
  def synthesize(text, file_name="./tmp.mp3", voice_id="Nicole")
    @polly.synthesize_speech(
      response_target: file_name,
      text: text,
      output_format: "mp3",
 			# You can use voice IDs http://docs.aws.amazon.com/polly/latest/dg/API_Voice.html
 			# If you want to synthesize Japanese voice, you can use "Mizuki"
      voice_id: voice_id,
      text_type: "ssml" 
    )
  end
 end

 def hms(seconds, decimals = 0)
  int   = seconds.floor
  decs  = [decimals, 8].min
  hms   = [int / 3600, (int / 60) % 60, int % 60].map { |t| t.to_s.rjust(2,'0') }.join(':')
  hms  << (seconds - int).round(decs).to_s[1..-1] if decs > 0
  hms
 end

 module TextFetcher
  def self.fetch_text_from(url, xpath)
  
  charset = "UTF-8"
    txt = open(url) do |f|
      #charset = f.charset
      f.read
    end

    txt.gsub! "\n\n\n\n\n\n\n\n" , "\n\n\n\n\n"
    txt.gsub! "\n\n\n\n\n\n\n" , "\n\n\n\n\n"
    txt.gsub! "\n\n\n\n\n\n" , "\n\n\n\n\n"  
    txt.gsub! "\n\n\n\n\n" , "\n\n\n\n\nBook Chapter "  

    txt.gsub!("&", 'and')

    puts "\n \n txt variable #{txt.inspect}" if $DEBUG  
    node_texts = txt.split(/(?<=[,?.!] )\s*/)    

    puts "\n \n total variable #{$total}" if $DEBUG 
    p node_texts.inspect if $DEBUG
    
    text =""
    combined_texts = []
    tmp_string = ""
  
    node_texts.each do |text|
    #puts text.size

      if tmp_string.size + text.size > 1490
        combined_texts << tmp_string
        tmp_string = ""
      end
       tmp_string << " #{text}"
    end
    combined_texts << tmp_string
    $total = combined_texts.count
    combined_texts = combined_texts
  end
 end

 if __FILE__ == $0

 toc =""

  synthesizer = Synthesizer.new

 url = "/Users/xxx/Documents/divers/scripts/aws_polly_txt/test.txt"
 
  # This XPath assumes any contents
  xpath = '//text()'

  filename = url.split(File::SEPARATOR)
  input_texts = TextFetcher.fetch_text_from(url, xpath)
  progressbar = ProgressBar.create(:title => "Progress", :starting_at => 0, :total => $total, :format => "%a %e %P% Processed: %c from %C")

 #create srt file for subtitles

 File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
 File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write("\n0 \n00:00:00,000 --> 00:00:10,000 \n Book Start \n\n0\n00:00:10,000") }
 srt_position = 0
  
  input_texts.each.with_index do |text, i|

  puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG
    
      progressbar.increment

  if text.size > 1505
    text = text[0,text.rindex(/\s/,1497)].rstrip + '.'
    puts "Length error fired"
    puts text.size
    puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG
    end

  if (text=~/Book Chapter /)
   toc <<  "#{i} #{text[/Book Chapter \b\w*/]} \n"
  end
   
  puts "\n \n Status index variable #{i}\n#{text}\n" if $DEBUG
  
 synthesizer.synthesize("<speak><prosody rate='slow'><p>" + text + "</p></prosody></speak>", "./#{filename[-1]}_tmp_#{i}.mp3")
    
    #sleep(0.05)

  Mp3Info.open("./#{filename[-1]}_tmp_#{i}.mp3") do |mp3info|
   srt_position += mp3info.length
  end
 
  File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\n#{text}\n\n#{i}\n#{hms(srt_position,3).sub("." , ",")}" ) }
 
 end

 File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\nEND\n" ) }

 puts "\n \n toc variable #{toc}" if $DEBUG
 
 #get MP3 lengths and create chapter file
 Puts "\nCreate Table of Contents (TOC)\n"

 File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
 File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("00:00:00.000 Book Start \n") }
 
 toc_ary = toc.split(/\s*?\n\s*/)

 p toc_ary if $DEBUG

 toc_ary.each do |toc_line|
  toc_line_number_position = 0
  toc_line_number = toc_line.split.first.to_i
  puts toc_line_number if $DEBUG

  for toc_i in 0..toc_line_number do
    Mp3Info.open("./#{filename[-1]}_tmp_#{toc_i}.mp3") do |mp3info|
    toc_line_number_position += mp3info.length
    end
    end 
  File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("#{hms(toc_line_number_position,3)} #{toc_line.split(' ')[1..-1].join(' ')} \n") }
    
  end

 # create final mp4 file

 # You can combine mp3 with cat on Linux based system
 `cat ./"#{filename[-1]}"_tmp_*.mp3 > ./"#{filename[-1]}".mp3`

 movie = FFMPEG::Movie.new("./#{filename[-1]}.mp3")

 options = {
  audio_codec: "aac", audio_bitrate: 48, audio_sample_rate: 22050, audio_channels: 1,
  threads: 4, custom: %w( )
 }

 movie.transcode("./#{filename[-1]}.mp4", options)

 `ffmpeg -i "./#{filename[-1]}.mp4" -f srt -i "./#{filename[-1]}.srt" -c:a copy -c:s mov_text "./#{filename[-1]}_sub.mp4"`
 `MP4Box -chap ./"#{filename[-1]}.toc" -add ./"#{filename[-1]}_sub.mp4" -new ./"#{filename[-1]}.m4b"`
 `mp4tags -album "#{album_title}" -artist "#{album_artist}" -genre "#{album_genre}" -year "#{album_year}" "./#{filename[-1]}.m4b"`
 `mp4file --optimize "./#{filename[-1]}.m4b"`

 if File.size?("./#{filename[-1]}.m4b") >= 1000000000000000000
  File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
  File.delete("./#{filename[-1]}.mp4") if File.exist?("./#{filename[-1]}.mp4")
  File.delete("./#{filename[-1]}_sub.mp4") if File.exist?("./#{filename[-1]}_sub.mp4")
  File.delete("./#{filename[-1]}.mp3") if File.exist?("./#{filename[-1]}.mp3")
  File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
  `rm ./"#{filename[-1]}"_tmp_*.mp3`
 end

 `open ./"#{filename[-1]}.m4b"`

 end
	source 'https://rubygems.org'

	gem 'nokogiri', '~>1.6'
	gem 'aws-sdk', '~> 2'
	gem 'open-uri'
	gem 'ruby-progressbar'
	gem "mp3info"
	gem 'streamio-ffmpeg'
	# Goal of this ruby script is to input text (like a text file you exported from an epub in Calibre) in to AWS Polly,
	# create an Audiobook with chapters and Subtitles.
	# Subtitles are not correctly working yet
	# This is run on OSX with ffmpeg and MP4Box tools

	require 'aws-sdk'
	require 'nokogiri'
	require 'open-uri'
	require 'ruby-progressbar'
	require "mp3info"
	require 'streamio-ffmpeg'


	album_title = "albumtitle"
	album_artist ="albumartist"
	album_genre = "albumgenre"
	album_year = "albumyear"

	class Synthesizer
	def initialize(region='us-west-2')
	@polly = Aws::Polly::Client.new(region: region)
	end
	def synthesize(text, file_name="./tmp.mp3", voice_id="Nicole")
	@polly.synthesize_speech(
	response_target: file_name,
	text: text,
	output_format: "mp3",
	# You can use voice IDs http://docs.aws.amazon.com/polly/latest/dg/API_Voice.html
	# If you want to synthesize Japanese voice, you can use "Mizuki"
	voice_id: voice_id,
	text_type: "ssml"
	)
	end
	end

	def hms(seconds, decimals = 0)
	int = seconds.floor
	decs = [decimals, 8].min
	hms = [int / 3600, (int / 60) % 60, int % 60].map { \|t\| t.to_s.rjust(2,'0') }.join(':')
	hms << (seconds - int).round(decs).to_s[1..-1] if decs > 0
	hms
	end

	module TextFetcher
	def self.fetch_text_from(url, xpath)

	charset = "UTF-8"
	txt = open(url) do \|f\|
	#charset = f.charset
	f.read
	end

	txt.gsub! "\n\n\n\n\n\n\n\n" , "\n\n\n\n\n"
	txt.gsub! "\n\n\n\n\n\n\n" , "\n\n\n\n\n"
	txt.gsub! "\n\n\n\n\n\n" , "\n\n\n\n\n"
	txt.gsub! "\n\n\n\n\n" , "\n\n\n\n\nBook Chapter "

	txt.gsub!("&", 'and')

	puts "\n \n txt variable #{txt.inspect}" if $DEBUG
	node_texts = txt.split(/(?<=[,?.!] )\s*/)

	puts "\n \n total variable #{$total}" if $DEBUG
	p node_texts.inspect if $DEBUG

	text =""
	combined_texts = []
	tmp_string = ""

	node_texts.each do \|text\|
	#puts text.size

	if tmp_string.size + text.size > 1490
	combined_texts << tmp_string
	tmp_string = ""
	end
	tmp_string << " #{text}"
	end
	combined_texts << tmp_string
	$total = combined_texts.count
	combined_texts = combined_texts
	end
	end

	if __FILE__ == $0

	toc =""

	synthesizer = Synthesizer.new

	url = "/Users/xxx/Documents/divers/scripts/aws_polly_txt/test.txt"

	# This XPath assumes any contents
	xpath = '//text()'

	filename = url.split(File::SEPARATOR)
	input_texts = TextFetcher.fetch_text_from(url, xpath)
	progressbar = ProgressBar.create(:title => "Progress", :starting_at => 0, :total => $total, :format => "%a %e %P% Processed: %c from %C")

	#create srt file for subtitles

	File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
	File.open("./#{filename[-1]}.srt", 'a+') {\|f\| f.write("\n0 \n00:00:00,000 --> 00:00:10,000 \n Book Start \n\n0\n00:00:10,000") }
	srt_position = 0

	input_texts.each.with_index do \|text, i\|

	puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG

	progressbar.increment

	if text.size > 1505
	text = text[0,text.rindex(/\s/,1497)].rstrip + '.'
	puts "Length error fired"
	puts text.size
	puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG
	end

	if (text=~/Book Chapter /)
	toc << "#{i} #{text[/Book Chapter \b\w*/]} \n"
	end

	puts "\n \n Status index variable #{i}\n#{text}\n" if $DEBUG

	synthesizer.synthesize("<speak><prosody rate='slow'><p>" + text + "</p></prosody></speak>", "./#{filename[-1]}_tmp_#{i}.mp3")

	#sleep(0.05)

	Mp3Info.open("./#{filename[-1]}_tmp_#{i}.mp3") do \|mp3info\|
	srt_position += mp3info.length
	end

	File.open("./#{filename[-1]}.srt", 'a+') {\|f\| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\n#{text}\n\n#{i}\n#{hms(srt_position,3).sub("." , ",")}" ) }

	end

	File.open("./#{filename[-1]}.srt", 'a+') {\|f\| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\nEND\n" ) }

	puts "\n \n toc variable #{toc}" if $DEBUG

	#get MP3 lengths and create chapter file
	Puts "\nCreate Table of Contents (TOC)\n"

	File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
	File.open("./#{filename[-1]}.toc", 'a+') {\|f\| f.write("00:00:00.000 Book Start \n") }

	toc_ary = toc.split(/\s?\n\s/)

	p toc_ary if $DEBUG

	toc_ary.each do \|toc_line\|
	toc_line_number_position = 0
	toc_line_number = toc_line.split.first.to_i
	puts toc_line_number if $DEBUG

	for toc_i in 0..toc_line_number do
	Mp3Info.open("./#{filename[-1]}_tmp_#{toc_i}.mp3") do \|mp3info\|
	toc_line_number_position += mp3info.length
	end
	end
	File.open("./#{filename[-1]}.toc", 'a+') {\|f\| f.write("#{hms(toc_line_number_position,3)} #{toc_line.split(' ')[1..-1].join(' ')} \n") }

	end

	# create final mp4 file

	# You can combine mp3 with cat on Linux based system
	`cat ./"#{filename[-1]}"_tmp_*.mp3 > ./"#{filename[-1]}".mp3`

	movie = FFMPEG::Movie.new("./#{filename[-1]}.mp3")

	options = {
	audio_codec: "aac", audio_bitrate: 48, audio_sample_rate: 22050, audio_channels: 1,
	threads: 4, custom: %w( )
	}

	movie.transcode("./#{filename[-1]}.mp4", options)

	`ffmpeg -i "./#{filename[-1]}.mp4" -f srt -i "./#{filename[-1]}.srt" -c:a copy -c:s mov_text "./#{filename[-1]}_sub.mp4"`
	`MP4Box -chap ./"#{filename[-1]}.toc" -add ./"#{filename[-1]}_sub.mp4" -new ./"#{filename[-1]}.m4b"`
	`mp4tags -album "#{album_title}" -artist "#{album_artist}" -genre "#{album_genre}" -year "#{album_year}" "./#{filename[-1]}.m4b"`
	`mp4file --optimize "./#{filename[-1]}.m4b"`

	if File.size?("./#{filename[-1]}.m4b") >= 1000000000000000000
	File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc")
	File.delete("./#{filename[-1]}.mp4") if File.exist?("./#{filename[-1]}.mp4")
	File.delete("./#{filename[-1]}_sub.mp4") if File.exist?("./#{filename[-1]}_sub.mp4")
	File.delete("./#{filename[-1]}.mp3") if File.exist?("./#{filename[-1]}.mp3")
	File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt")
	`rm ./"#{filename[-1]}"_tmp_*.mp3`
	end

	`open ./"#{filename[-1]}.m4b"`

	end