Created
July 28, 2016 06:10
-
-
Save matthewjackowski/9c86cf9a61f3184e2f1f4fe85cda53e3 to your computer and use it in GitHub Desktop.
For XLIFF files - remove segmentation markup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'optparse' | |
require 'nokogiri' | |
# This will hold the options we parse | |
options = {} | |
# Build command line parser | |
OptionParser.new do |p| | |
# Take a filename for the input | |
p.on("-i", "--infile INFILE", "The name of the file to process") do |v| | |
options[:infile] = v | |
end | |
# Take a filename for the output file | |
p.on("-o", "--outfile OUTFILE", "The name of the file to save") do |u| | |
options[:outfile] = u | |
end | |
end.parse! | |
# Open file with Nokogiri and set file encoding to UTF-8 | |
doc = File.open(options[:infile]) { |f| Nokogiri::XML(f) } | |
doc.encoding = 'utf-8' | |
# Iterate through the trans-unit elements, get content of source and the target, remove seg-source | |
doc.xpath('//trans-unit').each do |t| | |
t.at_xpath('source').content = t.at_xpath('source').content | |
t.at_xpath('target').content = t.at_xpath('target').content | |
t.at_xpath('seg-source').remove | |
end | |
# Write the final file | |
File.write(options[:outfile], doc.to_xml) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment