Last active
July 11, 2022 14:47
-
-
Save pcreux/7a5ce5f8ee0467ec3e882d26fc78a2ec to your computer and use it in GitHub Desktop.
Parse dbt logs and turn them into a CSV
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
# Usage: ruby dbt-log-to-csv.rb PATH_TO_DBT_LOGS | |
Model = Struct.new(:schema, :table, :rows_str, :size_str, :duration_str) do | |
def rows | |
rows = rows_str.to_f | |
rows = case rows_str | |
when /\d$/ | |
rows | |
when /k$/ | |
rows * 1_000 | |
when /m$/ | |
rows * 1_000_000 | |
when /b$/ | |
rows * 1_000_000_000 | |
else | |
raise "unexpected rows format: #{rows_str.inspect}" | |
end | |
rows.to_i | |
end | |
def size | |
size = size_str.to_f | |
size = case size_str | |
when /\d$/ | |
size | |
when /KB$/ | |
size * 1_000 | |
when /MB$/ | |
size * 1_000_000 | |
when /GB$/ | |
size * 1_000_000_000 | |
when /TB$/ | |
size * 1_000_000_000_000 | |
else | |
raise "unexpected size format: #{size_str.inspect}" | |
end | |
size.to_i | |
end | |
def duration | |
duration_str.to_f.to_i | |
end | |
def cost | |
(size.to_f / 1000 / 1000 / 1000 / 1000 * 5).round(2) | |
end | |
def inspect | |
super.gsub(/>$/, ", rows=#{rows}, size=#{size}, duration=#{duration}, cost=$#{cost}>") | |
end | |
def to_a | |
[ schema, table, cost, rows_str, rows, size_str, size, duration ] | |
end | |
end | |
content = ARGF.read | |
lines = content.split("\n") | |
statements = lines.select { _1["OK created table model"] } | |
regex = /(?<schema>\w+)\.(?<table>\w+)\.* \[CREATE TABLE \((?<rows>.+) rows, (?<size>.+) processed\) in (?<duration>.+)s\]/ | |
models = statements.map do |line| | |
_, *attrs = line.match(regex).to_a | |
Model.new(*attrs) | |
end | |
puts "schema,table,cost,rows,rows,size,size,duration" | |
models.each do |model| | |
puts model.to_a.join(',') | |
end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment