Created
February 9, 2021 12:32
-
-
Save nnslvp/bfa29787d8058e244579b1621fce48ad to your computer and use it in GitHub Desktop.
Concat large text files tool
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# frozen_string_literal: true | |
require 'optparse' | |
require 'sys/filesystem' # gem install sys-filesystem | |
class ConcatOptionsParser | |
attr_reader :parts, :output, :keep | |
def initialize | |
opts = OptionParser.new do |parser| | |
parser.banner = "\nUsage: ruby sims_concat.rb <pats_path> -o " | |
parser.separator '' | |
parser.separator 'Options:' | |
parser.on('-k', '--keep', TrueClass, 'The keep parts files flag.') do |keep_flag| | |
@keep = keep_flag | |
end | |
parser.on('-o', '--output STR', String, 'The output path.') do |output| | |
@output = output | |
end | |
parser.on_tail('-h', '--help', '--usage', 'Show this usage message and quit.') do | |
puts parser.help | |
puts '' | |
exit | |
end | |
end | |
opts.parse!(ARGV) | |
@parts = ARGV | |
unless @output | |
puts '' | |
puts 'Error: -o --output is required' | |
puts 'For help: ruby sim.rb -h' | |
puts '' | |
exit | |
end | |
unless @parts | |
puts '' | |
puts 'Error: <pats_path> is required' | |
puts 'For help: ruby sim.rb -h' | |
puts '' | |
exit | |
end | |
end | |
end | |
class Concat | |
def initialize(parts_paths, result_path, keep) | |
@parts_paths = parts_paths | |
@result_path = result_path | |
@keep = keep | |
end | |
def call | |
raise 'Output file already exist' if File.exists? @result_path | |
result_file = File.new(@result_path, 'w') | |
result_file.close | |
@parts_paths. | |
sort.sort_by(&:length). | |
map { |i| i.gsub(/ /, '\ ') }. | |
each do |part_path| | |
print part_path | |
check_free_space!(result_file.path, part_path) | |
if @keep | |
`cat #{part_path} >> #{result_file.path} ` | |
else | |
`cat #{part_path} >> #{result_file.path} && rm #{part_path}` | |
end | |
puts ' Ready.' | |
end | |
puts 'Output: ' | |
puts result_file.path | |
end | |
def check_free_space!(folder_path, part_path) | |
info = Sys::Filesystem.stat(folder_path) | |
available = info.block_size * info.blocks_available | |
part_size = File.size(part_path) | |
raise "Not enough free space for concat part: #{part_path}" if part_size > available | |
end | |
end | |
options = ConcatOptionsParser.new | |
Concat.new(options.parts, options.output, options.keep).call |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Example using: ruby concat.rb parts_folder/* -o merged_data.json