-
-
Save mkuendig/075f7f672f37bdb589747b91a4bf2192 to your computer and use it in GitHub Desktop.
Example code of ruby with Amazon Polly
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
source 'https://rubygems.org' | |
gem 'nokogiri', '~>1.6' | |
gem 'aws-sdk', '~> 2' | |
gem 'open-uri' | |
gem 'ruby-progressbar' | |
gem "mp3info" | |
gem 'streamio-ffmpeg' |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Goal of this ruby script is to input text (like a text file you exported from an epub in Calibre) in to AWS Polly, | |
# create an Audiobook with chapters and Subtitles. | |
# Subtitles are not correctly working yet | |
# This is run on OSX with ffmpeg and MP4Box tools | |
require 'aws-sdk' | |
require 'nokogiri' | |
require 'open-uri' | |
require 'ruby-progressbar' | |
require "mp3info" | |
require 'streamio-ffmpeg' | |
album_title = "albumtitle" | |
album_artist ="albumartist" | |
album_genre = "albumgenre" | |
album_year = "albumyear" | |
class Synthesizer | |
def initialize(region='us-west-2') | |
@polly = Aws::Polly::Client.new(region: region) | |
end | |
def synthesize(text, file_name="./tmp.mp3", voice_id="Nicole") | |
@polly.synthesize_speech( | |
response_target: file_name, | |
text: text, | |
output_format: "mp3", | |
# You can use voice IDs http://docs.aws.amazon.com/polly/latest/dg/API_Voice.html | |
# If you want to synthesize Japanese voice, you can use "Mizuki" | |
voice_id: voice_id, | |
text_type: "ssml" | |
) | |
end | |
end | |
def hms(seconds, decimals = 0) | |
int = seconds.floor | |
decs = [decimals, 8].min | |
hms = [int / 3600, (int / 60) % 60, int % 60].map { |t| t.to_s.rjust(2,'0') }.join(':') | |
hms << (seconds - int).round(decs).to_s[1..-1] if decs > 0 | |
hms | |
end | |
module TextFetcher | |
def self.fetch_text_from(url, xpath) | |
charset = "UTF-8" | |
txt = open(url) do |f| | |
#charset = f.charset | |
f.read | |
end | |
txt.gsub! "\n\n\n\n\n\n\n\n" , "\n\n\n\n\n" | |
txt.gsub! "\n\n\n\n\n\n\n" , "\n\n\n\n\n" | |
txt.gsub! "\n\n\n\n\n\n" , "\n\n\n\n\n" | |
txt.gsub! "\n\n\n\n\n" , "\n\n\n\n\nBook Chapter " | |
txt.gsub!("&", 'and') | |
puts "\n \n txt variable #{txt.inspect}" if $DEBUG | |
node_texts = txt.split(/(?<=[,?.!] )\s*/) | |
puts "\n \n total variable #{$total}" if $DEBUG | |
p node_texts.inspect if $DEBUG | |
text ="" | |
combined_texts = [] | |
tmp_string = "" | |
node_texts.each do |text| | |
#puts text.size | |
if tmp_string.size + text.size > 1490 | |
combined_texts << tmp_string | |
tmp_string = "" | |
end | |
tmp_string << " #{text}" | |
end | |
combined_texts << tmp_string | |
$total = combined_texts.count | |
combined_texts = combined_texts | |
end | |
end | |
if __FILE__ == $0 | |
toc ="" | |
synthesizer = Synthesizer.new | |
url = "/Users/xxx/Documents/divers/scripts/aws_polly_txt/test.txt" | |
# This XPath assumes any contents | |
xpath = '//text()' | |
filename = url.split(File::SEPARATOR) | |
input_texts = TextFetcher.fetch_text_from(url, xpath) | |
progressbar = ProgressBar.create(:title => "Progress", :starting_at => 0, :total => $total, :format => "%a %e %P% Processed: %c from %C") | |
#create srt file for subtitles | |
File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt") | |
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write("\n0 \n00:00:00,000 --> 00:00:10,000 \n Book Start \n\n0\n00:00:10,000") } | |
srt_position = 0 | |
input_texts.each.with_index do |text, i| | |
puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG | |
progressbar.increment | |
if text.size > 1505 | |
text = text[0,text.rindex(/\s/,1497)].rstrip + '.' | |
puts "Length error fired" | |
puts text.size | |
puts "\n \n Status index variable #{i} \n#{text}\n" if $DEBUG | |
end | |
if (text=~/Book Chapter /) | |
toc << "#{i} #{text[/Book Chapter \b\w*/]} \n" | |
end | |
puts "\n \n Status index variable #{i}\n#{text}\n" if $DEBUG | |
synthesizer.synthesize("<speak><prosody rate='slow'><p>" + text + "</p></prosody></speak>", "./#{filename[-1]}_tmp_#{i}.mp3") | |
#sleep(0.05) | |
Mp3Info.open("./#{filename[-1]}_tmp_#{i}.mp3") do |mp3info| | |
srt_position += mp3info.length | |
end | |
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\n#{text}\n\n#{i}\n#{hms(srt_position,3).sub("." , ",")}" ) } | |
end | |
File.open("./#{filename[-1]}.srt", 'a+') {|f| f.write(" --> #{hms(srt_position,3).sub("." , ",")}\nEND\n" ) } | |
puts "\n \n toc variable #{toc}" if $DEBUG | |
#get MP3 lengths and create chapter file | |
Puts "\nCreate Table of Contents (TOC)\n" | |
File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc") | |
File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("00:00:00.000 Book Start \n") } | |
toc_ary = toc.split(/\s*?\n\s*/) | |
p toc_ary if $DEBUG | |
toc_ary.each do |toc_line| | |
toc_line_number_position = 0 | |
toc_line_number = toc_line.split.first.to_i | |
puts toc_line_number if $DEBUG | |
for toc_i in 0..toc_line_number do | |
Mp3Info.open("./#{filename[-1]}_tmp_#{toc_i}.mp3") do |mp3info| | |
toc_line_number_position += mp3info.length | |
end | |
end | |
File.open("./#{filename[-1]}.toc", 'a+') {|f| f.write("#{hms(toc_line_number_position,3)} #{toc_line.split(' ')[1..-1].join(' ')} \n") } | |
end | |
# create final mp4 file | |
# You can combine mp3 with cat on Linux based system | |
`cat ./"#{filename[-1]}"_tmp_*.mp3 > ./"#{filename[-1]}".mp3` | |
movie = FFMPEG::Movie.new("./#{filename[-1]}.mp3") | |
options = { | |
audio_codec: "aac", audio_bitrate: 48, audio_sample_rate: 22050, audio_channels: 1, | |
threads: 4, custom: %w( ) | |
} | |
movie.transcode("./#{filename[-1]}.mp4", options) | |
`ffmpeg -i "./#{filename[-1]}.mp4" -f srt -i "./#{filename[-1]}.srt" -c:a copy -c:s mov_text "./#{filename[-1]}_sub.mp4"` | |
`MP4Box -chap ./"#{filename[-1]}.toc" -add ./"#{filename[-1]}_sub.mp4" -new ./"#{filename[-1]}.m4b"` | |
`mp4tags -album "#{album_title}" -artist "#{album_artist}" -genre "#{album_genre}" -year "#{album_year}" "./#{filename[-1]}.m4b"` | |
`mp4file --optimize "./#{filename[-1]}.m4b"` | |
if File.size?("./#{filename[-1]}.m4b") >= 1000000000000000000 | |
File.delete("./#{filename[-1]}.toc") if File.exist?("./#{filename[-1]}.toc") | |
File.delete("./#{filename[-1]}.mp4") if File.exist?("./#{filename[-1]}.mp4") | |
File.delete("./#{filename[-1]}_sub.mp4") if File.exist?("./#{filename[-1]}_sub.mp4") | |
File.delete("./#{filename[-1]}.mp3") if File.exist?("./#{filename[-1]}.mp3") | |
File.delete("./#{filename[-1]}.srt") if File.exist?("./#{filename[-1]}.srt") | |
`rm ./"#{filename[-1]}"_tmp_*.mp3` | |
end | |
`open ./"#{filename[-1]}.m4b"` | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment