Created
September 10, 2025 05:31
-
-
Save niaeashes/d4e0db9d9316706d6a1f811d810592a9 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| require 'date' | |
| require 'optparse' | |
| require 'pathname' | |
| require 'json' | |
| require 'fileutils' | |
| # --- | |
| # 1. Option Parsing and Help | |
| # --- | |
| options = {} | |
| OptionParser.new do |opts| | |
| opts.banner = "Usage: #{$0} --start <time> [--end <time>] [--raw] [--flush]" | |
| opts.on("--start TIME", "Start time of the log range (e.g., '7 days ago').") do |time| | |
| options[:start_time] = time | |
| end | |
| opts.on("--end TIME", "End time of the log range (e.g., '1 day ago').") do |time| | |
| options[:end_time] = time | |
| end | |
| opts.on("--raw", "Outputs the unzipped log content to stdout, suppressing all other logs.") do | |
| options[:raw_mode] = true | |
| end | |
| opts.on("--flush", "Force a re-download and remove existing log files.") do | |
| options[:flush] = true | |
| end | |
| opts.on("-h", "--help", "Prints this help") do | |
| puts opts | |
| exit | |
| end | |
| end.parse! | |
| # --- | |
| # 2. Environment Variable Checks | |
| # --- | |
| required_vars = ['S3_PREFIX', 'DISTRIBUTION_ID'] | |
| missing_vars = required_vars.select { |var| ENV[var].nil? || ENV[var].empty? } | |
| unless missing_vars.empty? | |
| unless options[:raw_mode] | |
| STDERR.puts "Error: The following environment variables are not set: #{missing_vars.join(', ')}" | |
| STDERR.puts "Please set them before running the script." | |
| end | |
| exit 1 | |
| end | |
| # --- | |
| # 3. Date Parsing Logic | |
| # --- | |
| def parse_time(time_str) | |
| case time_str | |
| when 'now' | |
| Date.today | |
| when /^([0-9]+)\s+(days|day|d)s? ago$/ | |
| Date.today - $1.to_i | |
| when /^([0-9]+)\s+(weeks|week|w)s? ago$/ | |
| Date.today - $1.to_i * 7 | |
| when /^([0-9]+)\s+(months|month|m)s? ago$/ | |
| Date.today << $1.to_i | |
| when /^([0-9]+)\s+(years|year|y)s? ago$/ | |
| Date.today << $1.to_i * 12 | |
| when /^[0-9]{4}-[0-9]{2}-[0-9]{2}/ | |
| Date.parse(time_str) | |
| else | |
| raise "Unsupported time format: '#{time_str}'" | |
| end | |
| rescue => e | |
| STDERR.puts "Error parsing time: #{e.message}" | |
| exit 1 | |
| end | |
| begin | |
| start_date = parse_time(options[:start_time]) | |
| end_date = parse_time(options[:end_time] || 'now') | |
| rescue | |
| STDERR.puts "Error: Invalid date format provided." | |
| exit 1 | |
| end | |
| # --- | |
| # 4. AWS CLI Command Execution with Caching | |
| # --- | |
| s3_prefix = ENV['S3_PREFIX'] | |
| distribution_id = ENV['DISTRIBUTION_ID'] | |
| CACHE_FILE = '.cloudfront_log_cache.json' | |
| cache_data = {} | |
| if File.exist?(CACHE_FILE) | |
| cache_data = JSON.parse(File.read(CACHE_FILE), symbolize_names: true) | |
| end | |
| current_options = { | |
| start_time: options[:start_time], | |
| end_time: options[:end_time] || 'now', | |
| s3_prefix: s3_prefix, | |
| distribution_id: distribution_id, | |
| raw_mode: options[:raw_mode] | |
| } | |
| cache_match = false | |
| if cache_data[:options] == current_options | |
| last_run_time = DateTime.parse(cache_data[:timestamp]) | |
| if (DateTime.now - last_run_time) * 24 * 60 < 15 # 15 minutes | |
| cache_match = true | |
| end | |
| end | |
| unless options[:raw_mode] | |
| puts "---" | |
| puts "Starting CloudFront log download process." | |
| puts "S3 Prefix: #{s3_prefix}" | |
| puts "Distribution ID: #{distribution_id}" | |
| puts "Requested time range: start='#{options[:start_time]}', end='#{options[:end_time] || 'now'}'" | |
| puts "Converted absolute date range: #{start_date.strftime('%Y-%m-%d')} to #{end_date.strftime('%Y-%m-%d')}" | |
| end | |
| if cache_match && !options[:flush] | |
| unless options[:raw_mode] | |
| puts "Cache hit! Skipping download. Last run was less than 15 minutes ago with the same parameters." | |
| puts "Use --flush to force a re-download." | |
| puts "---" | |
| end | |
| else | |
| unless options[:raw_mode] | |
| puts "---" | |
| puts "Downloading logs..." | |
| puts "---" | |
| end | |
| # Cleanup command | |
| if options[:flush] | |
| unless options[:raw_mode] | |
| puts "Flushing existing log files (--flush option is active)." | |
| puts "---" | |
| end | |
| system("rm *.gz") | |
| end | |
| current_date = start_date | |
| while current_date <= end_date | |
| date_str = current_date.strftime('%Y-%m-%d') | |
| unless options[:raw_mode] | |
| puts "Syncing logs for #{date_str}..." | |
| end | |
| unless system "aws s3 sync #{s3_prefix}/ . --exclude \"*\" --include \"#{distribution_id}.#{date_str}-*\"" | |
| unless options[:raw_mode] | |
| STDERR.puts "Error: Failed to sync logs for date #{date_str}." | |
| end | |
| exit 1 | |
| end | |
| current_date = current_date.next_day | |
| end | |
| # Update cache file | |
| cache_data = { | |
| options: current_options, | |
| timestamp: DateTime.now.to_s | |
| } | |
| File.write(CACHE_FILE, JSON.pretty_generate(cache_data)) | |
| end | |
| # --- | |
| # 5. Output and Post-Processing | |
| # --- | |
| if options[:raw_mode] | |
| system "gunzip -c #{distribution_id}.*.gz" | |
| else | |
| puts "---" | |
| system "ls -la" | |
| puts "The log files are now available in the current directory." | |
| puts "---" | |
| end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment