Created
February 1, 2021 13:15
-
-
Save reizist/346ec9b37e347c602ba1fcb89dc94f74 to your computer and use it in GitHub Desktop.
Output bigquery schema from TD
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# TODO: | |
# 1. gem install td-client | |
# 2. Set TD_API_KEY, TD_DATABASE | |
# 3. Prepare table_list.txt including table name list | |
# 4. Execute like this: ruby generate-schema.rb table_list.txt | |
require 'td-client' | |
DB = ENV["TD_DATABASE"] | |
TYPE_TABLE = { | |
"boolean" => "BOOLEAN", | |
"tinyint" => "INTEGER", | |
"integer" => "INTEGER", | |
"bigint" => "INTEGER", | |
"long" => "INTEGER", | |
"double" => "FLOAT", | |
"string" => "STRING", | |
"timestamp" => "STRING" | |
} | |
def get_bq_type(type) | |
TYPE_TABLE[type] || raise("detect unknown column type: #{type}. Add type to TYPE_TABLE.") | |
end | |
def generate_string_from_columns(columns) | |
JSON.pretty_generate(columns) | |
end | |
def execute | |
apikey = ENV['TD_API_KEY'] | |
cli = TreasureData::Client.new(apikey) | |
columns = [] | |
begin | |
table_list = File.open(ARGV[0]).readlines.map(&:chomp) | |
rescue => e | |
puts "Just specify valid table list file.\n\n #{e}" | |
end | |
table_list.each do |table| | |
puts "print table: #{table}\n\n" | |
if table.include? "_masked" | |
puts "Skip #{table} because it has sensitive data.\n\n" | |
next | |
end | |
begin | |
schema = cli.table(DB, table).schema | |
rescue => e | |
puts "Encountered error while loadign the schema.\n#{e}\n\n" | |
next | |
end | |
columns = [] | |
schema.fields.each do |field| | |
columns << { "name" => field.name, "type" => get_bq_type(field.type) } | |
end | |
if ENV['FILE'] | |
table_name = ENV['TRIM'] ? table.gsub(ENV['TRIM'], "") : table | |
File.open("#{table_name}.json", mode: "w") do |f| | |
f.puts generate_string_from_columns(columns) | |
end | |
else | |
puts generate_string_from_columns(columns) | |
end | |
end | |
end | |
execute |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment