Created
July 2, 2022 21:53
-
-
Save andynu/88cf6e14908ae0eb256e640ec8d21d79 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env ruby | |
require 'chronic' | |
require 'set' | |
if ARGV.empty? | |
ARGV.replace ['log/production.log'] | |
end | |
# From facets gem | |
module Enumerable | |
def squeeze(*limited_to) | |
first = true | |
r = [] # result | |
c = nil # current | |
each do |e| | |
if !limited_to.empty? && !limited_to.include?(e) | |
r << e | |
elsif first || c != e | |
r << e | |
first = false | |
c = e | |
end | |
end | |
r | |
end | |
end | |
Transaction = Struct.new(:start_time, :end_time, :tags, :messages) do | |
def split_url | |
url = messages.grep(/Started/).first.gsub(/Started /, '').gsub(/ for .*/, '') | |
url, querystring = url.split(/\?/) | |
url.gsub!(/"/, '') | |
return url, querystring | |
end | |
def url | |
split_url.first | |
end | |
def user | |
tags[0] | |
end | |
def session_id | |
tags[1] | |
end | |
def request_id | |
tags[2] | |
end | |
end | |
class Node | |
attr_reader :value, :children, :count | |
attr_accessor :latest_time | |
def self.instance(value) | |
@@nodes ||= {} | |
@@nodes[value] ||= Node.new value | |
@@nodes[value] | |
end | |
def initialize(value) | |
@value = value | |
@children = Set.new | |
@count = 0 | |
end | |
def add(node) | |
@children << node | |
end | |
def increment | |
@count += 1 | |
end | |
end | |
Sess = Struct.new(:transactions) do | |
def start_time | |
transactions.first.start_time | |
end | |
def user | |
transactions.first.user | |
end | |
def urls(squeeze: true, scrub: true) | |
url_arr = transactions.map(&:url) | |
url_arr = url_arr.map{|url| url.gsub(/\d+/, '#').squeeze('#')} if scrub | |
url_arr = url_arr.squeeze if squeeze | |
url_arr | |
end | |
end | |
def url_trie(transactions) | |
root = nil | |
transactions.each do |trans| | |
url = trans.url | |
url = url.gsub(/\d+/, '#').squeeze('#') | |
url.squeeze!('/') | |
if url[-1] == '/' | |
url = url[0..-2] | |
end | |
# trans.urls(squeeze: false).each do |url| | |
meth, path = url.split(/ /) | |
path = path + '-' + meth | |
path_parts = path.split(/\//) | |
node = Node.instance path_parts.shift | |
root ||= node | |
path_parts.each do |path_part| | |
next_node = Node.instance(path_part) | |
node.add next_node | |
node = next_node | |
end | |
node.increment | |
if node.latest_time.nil? || node.latest_time < trans.start_time | |
node.latest_time = trans.start_time | |
end | |
end | |
root | |
end | |
def load_transactions | |
transactions = {} | |
ARGF.each_line do |line| | |
time, log = line.split(/\|/) | |
if log =~ /\A ((?:\[[^\]]+\] )+)(.*)\Z/ | |
tags = $1 | |
msg = $2 | |
tags = tags.strip.split(/ /).map{|tag| tag.gsub(/[\[\]]/, '')} | |
transactions[tags] ||= Transaction.new(Chronic.parse(time), nil, tags, []) | |
transactions[tags].messages << msg | |
transactions[tags].end_time = Chronic.parse(time) | |
end | |
end | |
transactions.values | |
end | |
transactions = load_transactions | |
#pp transactions.map(&:url).tally | |
root = url_trie(transactions) | |
# sessions = {} | |
# transactions.each do |trans| | |
# sessions[trans.session_id] ||= Sess.new([]) | |
# sessions[trans.session_id].transactions << trans | |
# end | |
#pp sessions.values.map{|s| [s.start_time, s.user, s.urls] } | |
# pp sessions.values.map{|s| [s.url_trie] } | |
#puts requests | |
def print_tree(node, depth=0) | |
puts ("\t" * depth) + node.value + (node.children.empty? ? " (#{node.count} - #{node.latest_time&.to_date})" : '') | |
node.children.each do |child| | |
print_tree(child, depth + 1) | |
end | |
end | |
print_tree(root) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment