pvdb · February 16, 2025 17:40
diff --git a/README.md b/README.md
diff --git a/gloc b/gloc
 #!/usr/bin/env -S ruby -s
 # frozen_string_literal: true

 #
 # INSTALLATION
 #
 #   ln -s ${PWD}/gloc $(brew --prefix)/bin/
 #   sudo ln -s ${PWD}/gloc /usr/local/bin/
 #
 # DEPENDENCIES
 #
 #   brew install cmark-gfm
 #

 if $help
  require 'tempfile'

  readme = File.join(__dir__, 'README.md')
  html = Tempfile.new(['gloc.', '.html'])
  `cmark-gfm #{readme} > #{html.path}`
  exec "open #{html.path}"
 end

 #
 # rubocop:disable Layout/IndentationWidth
 # rubocop:disable Layout/HashAlignment
 # rubocop:disable Layout/ElseAlignment
 # rubocop:disable Layout/EndAlignment
 #
 # rubocop:disable Style/TrailingCommaInHashLiteral
 # rubocop:disable Style/TrailingCommaInArguments
 # rubocop:disable Style/EmptyCaseCondition
 # rubocop:disable Style/BlockDelimiters
 # rubocop:disable Style/Documentation
 # rubocop:disable Style/RegexpLiteral
 # rubocop:disable Style/GlobalVars
 #

 require 'English'
 require 'ostruct'

 source_files = if $stdin.tty? || $tty
  if `git rev-parse --is-inside-work-tree`.chomp == 'true'
    # we're inside a git repo so
    # get list of files from git
    `git ls-files -z #{ARGV.join(' ')}`.split("\0")
  else
    # we are not inside a git repo:
    # find all files in current dir
    `find #{ARGV.empty? ? Dir.pwd : ARGV.join(' ')} -print0`.split("\0")
  end
 else
  # assume we're running it in a pipeline
  # and read list of filenames from $stdin
  $stdin.read.split($RS).map(&:chomp)
 end

 # exclude binary files from stats
 # (files with NUL in file header)
 #
 # much slower alternative:
 #
 # `egrep -q '\\x00' #{file}` ; $? == 0
 #
 # note: git itself uses the first
 # 8,000 characters of a file, but
 # looking at the first 16 is fine
 # for our purposes... for now :-)
 # see buffer_is_binary() function
 # in the "git" source repository!
 source_files.delete_if { |file|
  (
    File.extname(file) == '.pdf'      || # skip bl**dy PDF documents
    File.basename(file) =~ /\A\..*\z/ || # skip hidden ".*" files
    !File.exist?(file)                || # skip non-existent paths
    !File.file?(file)                 || # skip directories
    !File.size?(file)                 || # skip empty files
    !File.read(file, 16)["\0"].nil?      # skip binary files
  ) && ($verbose && warn("SKIPPING #{file}..."); true)
 }

 BLANKS = Hash.new(%r{\A\s*\Z}.freeze) # TODO: ext-specific regex for blanks?

 COMMENTS = {
  # FIXME: does not work for multi-line comments
  #        (for the languages that support them)
  '*.rb'   => %r{\A\s*(#.*)\s*\Z},
  '*.sh'   => %r{\A\s*(#.*)\s*\Z},
  '*.xml'  => %r{\A\s*(<!--.*-->)\s*\Z},
  '*.html' => %r{\A\s*(<!--.*-->)\s*\Z},
  '*.css'  => %r{\A\s*(/\*.*\*/)\s*\Z},
  '*.js'   => %r{\A\s*(//.*|/\*.*\*/)\s*\Z},
  '*.php'  => %r{\A\s*(//.*|#.*)\s*\Z},
 }.freeze

 STATS_FOR_FILE = Hash.new do |stats_for_file, (file, blank_re, comment_re)|
  file_content = File.read(file, encoding: 'UTF-8')
  unless file_content.valid_encoding?
    file_content = File.read(file, encoding: 'ISO-8859-1')
    # FIXME: what about file encodings other than these two???
  end

  lines = file_content.each_line

  stats_for_file[[file, blank_re, comment_re]] = OpenStruct.new(
    line_count:    line_count = lines.count,
    blank_count:   blank_count = lines.grep(blank_re).count,
    comment_count: comment_count = lines.grep(comment_re).count,
    code_count:    (line_count - blank_count - comment_count),
  )
 end

 STATS_FOR = Hash.new do |stats_for_ext, ext|
  stats_for_ext[ext] = OpenStruct.new(
    file_count:    0,
    line_count:    0,
    blank_count:   0,
    comment_count: 0,
    code_count:    0,
  )
 end

 source_files.each do |file|
  ext = File.extname(file).prepend('*') # e.g. '*.rb' or '*' if no ext!

  blank_regex   = BLANKS[ext]
  comment_regex = COMMENTS[ext]

  stats_for_file = STATS_FOR_FILE[[file, blank_regex, comment_regex]]
  stats_for_ext  = STATS_FOR[ext]

  stats_for_ext.file_count    += 1
  stats_for_ext.line_count    += stats_for_file.line_count
  stats_for_ext.blank_count   += stats_for_file.blank_count
  stats_for_ext.comment_count += stats_for_file.comment_count
  stats_for_ext.code_count    += stats_for_file.code_count
 end

 sort_metric = case
              when $files   then :file_count
              when $lines   then :line_count
              when $blank   then :blank_count
              when $comment then :comment_count
              when $code    then :code_count
              else :code_count
              end

 file_stats = STATS_FOR_FILE.sort_by { |_, stats|
  stats.send(sort_metric)
 }.reverse.to_h

 source_stats = STATS_FOR.sort_by { |_, stats|
  stats.send(sort_metric)
 }.reverse.to_h

 source_stats['TOTAL'] = OpenStruct.new(
  file_count:    source_stats.values.map(&:file_count).reduce(:+)    || 0,
  line_count:    source_stats.values.map(&:line_count).reduce(:+)    || 0,
  blank_count:   source_stats.values.map(&:blank_count).reduce(:+)   || 0,
  comment_count: source_stats.values.map(&:comment_count).reduce(:+) || 0,
  code_count:    source_stats.values.map(&:code_count).reduce(:+)    || 0,
 )

 #
 # JSON formatting for non-TTY output
 #

 unless $stdout.tty? || $tty || $visual
  require 'json'

  class OpenStruct
    def to_json(*args)
      to_h.to_json(args)
    end
  end

  puts source_stats.to_json

  exit
 end

 unless $visual
  class String
    def commify
      gsub(/(\d)(?=(\d{3})+(\..*)?$)/, '\1,')
    end
  end

  class Numeric
    def commify
      to_s.commify
    end
  end

  #
  # fancy formatting for TTY output
  #

  source_stats.each_value do |stats_for_ext|
    stats_for_ext.file_count    = stats_for_ext.file_count.commify
    stats_for_ext.line_count    = stats_for_ext.line_count.commify
    stats_for_ext.blank_count   = stats_for_ext.blank_count.commify
    stats_for_ext.comment_count = stats_for_ext.comment_count.commify
    stats_for_ext.code_count    = stats_for_ext.code_count.commify
  end

  DIVIDER  = ('-' * 80) # because loc uses 80 columns
  TEMPLATE = ' %-13s %12s %12s %12s %12s %12s'

  puts format(
    "#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}",
    'Language', 'Files', 'Lines', 'Blank', 'Comment', 'Code'
  )

  source_stats.each do |file_ext, stats|
    next if file_ext == 'TOTAL'

    puts format(
      TEMPLATE,
      file_ext,
      stats.file_count,
      stats.line_count,
      stats.blank_count,
      stats.comment_count,
      stats.code_count,
    )
  end

  puts format(
    "#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}",
    'Total', *source_stats.fetch('TOTAL').to_h.values
  )
 end

 if $visual
  require 'io/console'

  class String
    def colorize(color_code)
      "\e[#{color_code}m#{self}\e[0m"
    end

    # rubocop:disable Style/SingleLineMethods
    def red()    colorize('31'); end
    def green()  colorize('32'); end
    def yellow() colorize('33'); end
    def blue()   colorize('34'); end
    # rubocop:enable Style/SingleLineMethods
  end

  max_line_count   = file_stats.values.map(&:line_count).max
  longest_filename = file_stats.keys.map(&:first).map(&:length).max
  _, console_width = IO.console.winsize
  available_width  = Float(console_width - longest_filename - 5)

  abort 'Terminal not wide enough... aborting!' if available_width.negative?

  file_stats.each_pair do |(file, _, _), stats|
    code_width    = (available_width * stats.code_count / max_line_count)
    comment_width = (available_width * stats.comment_count / max_line_count)
    blank_width   = (available_width * stats.blank_count / max_line_count)

    puts format(
      " %-#{longest_filename}<file>s | %<code>s%<comment>s%<blank>s",
      file: file,
      code:    ('+' * code_width).green,
      comment: ('-' * comment_width).red,
      blank:   ('_' * blank_width).blue
    )
  end
 end

 #
 # rubocop:enable Style/GlobalVars
 # rubocop:enable Style/RegexpLiteral
 # rubocop:enable Style/Documentation
 # rubocop:enable Style/BlockDelimiters
 # rubocop:enable Style/EmptyCaseCondition
 # rubocop:enable Style/TrailingCommaInArguments
 # rubocop:enable Style/TrailingCommaInHashLiteral
 #
 # rubocop:enable Layout/EndAlignment
 # rubocop:enable Layout/ElseAlignment
 # rubocop:enable Layout/HashAlignment
 # rubocop:enable Layout/IndentationWidth
 #

 # That's all Folks!
	#!/usr/bin/env -S ruby -s
	# frozen_string_literal: true

	#
	# INSTALLATION
	#
	# ln -s ${PWD}/gloc $(brew --prefix)/bin/
	# sudo ln -s ${PWD}/gloc /usr/local/bin/
	#
	# DEPENDENCIES
	#
	# brew install cmark-gfm
	#

	if $help
	require 'tempfile'

	readme = File.join(__dir__, 'README.md')
	html = Tempfile.new(['gloc.', '.html'])
	`cmark-gfm #{readme} > #{html.path}`
	exec "open #{html.path}"
	end

	#
	# rubocop:disable Layout/IndentationWidth
	# rubocop:disable Layout/HashAlignment
	# rubocop:disable Layout/ElseAlignment
	# rubocop:disable Layout/EndAlignment
	#
	# rubocop:disable Style/TrailingCommaInHashLiteral
	# rubocop:disable Style/TrailingCommaInArguments
	# rubocop:disable Style/EmptyCaseCondition
	# rubocop:disable Style/BlockDelimiters
	# rubocop:disable Style/Documentation
	# rubocop:disable Style/RegexpLiteral
	# rubocop:disable Style/GlobalVars
	#

	require 'English'
	require 'ostruct'

	source_files = if $stdin.tty? \|\| $tty
	if `git rev-parse --is-inside-work-tree`.chomp == 'true'
	# we're inside a git repo so
	# get list of files from git
	`git ls-files -z #{ARGV.join(' ')}`.split("\0")
	else
	# we are not inside a git repo:
	# find all files in current dir
	`find #{ARGV.empty? ? Dir.pwd : ARGV.join(' ')} -print0`.split("\0")
	end
	else
	# assume we're running it in a pipeline
	# and read list of filenames from $stdin
	$stdin.read.split($RS).map(&:chomp)
	end

	# exclude binary files from stats
	# (files with NUL in file header)
	#
	# much slower alternative:
	#
	# `egrep -q '\\x00' #{file}` ; $? == 0
	#
	# note: git itself uses the first
	# 8,000 characters of a file, but
	# looking at the first 16 is fine
	# for our purposes... for now :-)
	# see buffer_is_binary() function
	# in the "git" source repository!
	source_files.delete_if { \|file\|
	(
	File.extname(file) == '.pdf' \|\| # skip bl**dy PDF documents
	File.basename(file) =~ /\A\..\z/ \|\| # skip hidden "." files
	!File.exist?(file) \|\| # skip non-existent paths
	!File.file?(file) \|\| # skip directories
	!File.size?(file) \|\| # skip empty files
	!File.read(file, 16)["\0"].nil? # skip binary files
	) && ($verbose && warn("SKIPPING #{file}..."); true)
	}

	BLANKS = Hash.new(%r{\A\s*\Z}.freeze) # TODO: ext-specific regex for blanks?

	COMMENTS = {
	# FIXME: does not work for multi-line comments
	# (for the languages that support them)
	'.rb' => %r{\A\s(#.)\s\Z},
	'.sh' => %r{\A\s(#.)\s\Z},
	'.xml' => %r{\A\s(<!--.-->)\s\Z},
	'.html' => %r{\A\s(<!--.-->)\s\Z},
	'.css' => %r{\A\s(/\.\/)\s\Z},
	'.js' => %r{\A\s(//.\|/\.\/)\s*\Z},
	'.php' => %r{\A\s(//.\|#.)\s*\Z},
	}.freeze

	STATS_FOR_FILE = Hash.new do \|stats_for_file, (file, blank_re, comment_re)\|
	file_content = File.read(file, encoding: 'UTF-8')
	unless file_content.valid_encoding?
	file_content = File.read(file, encoding: 'ISO-8859-1')
	# FIXME: what about file encodings other than these two???
	end

	lines = file_content.each_line

	stats_for_file[[file, blank_re, comment_re]] = OpenStruct.new(
	line_count: line_count = lines.count,
	blank_count: blank_count = lines.grep(blank_re).count,
	comment_count: comment_count = lines.grep(comment_re).count,
	code_count: (line_count - blank_count - comment_count),
	)
	end

	STATS_FOR = Hash.new do \|stats_for_ext, ext\|
	stats_for_ext[ext] = OpenStruct.new(
	file_count: 0,
	line_count: 0,
	blank_count: 0,
	comment_count: 0,
	code_count: 0,
	)
	end

	source_files.each do \|file\|
	ext = File.extname(file).prepend('') # e.g. '.rb' or '*' if no ext!

	blank_regex = BLANKS[ext]
	comment_regex = COMMENTS[ext]

	stats_for_file = STATS_FOR_FILE[[file, blank_regex, comment_regex]]
	stats_for_ext = STATS_FOR[ext]

	stats_for_ext.file_count += 1
	stats_for_ext.line_count += stats_for_file.line_count
	stats_for_ext.blank_count += stats_for_file.blank_count
	stats_for_ext.comment_count += stats_for_file.comment_count
	stats_for_ext.code_count += stats_for_file.code_count
	end

	sort_metric = case
	when $files then :file_count
	when $lines then :line_count
	when $blank then :blank_count
	when $comment then :comment_count
	when $code then :code_count
	else :code_count
	end

	file_stats = STATS_FOR_FILE.sort_by { \|_, stats\|
	stats.send(sort_metric)
	}.reverse.to_h

	source_stats = STATS_FOR.sort_by { \|_, stats\|
	stats.send(sort_metric)
	}.reverse.to_h

	source_stats['TOTAL'] = OpenStruct.new(
	file_count: source_stats.values.map(&:file_count).reduce(:+) \|\| 0,
	line_count: source_stats.values.map(&:line_count).reduce(:+) \|\| 0,
	blank_count: source_stats.values.map(&:blank_count).reduce(:+) \|\| 0,
	comment_count: source_stats.values.map(&:comment_count).reduce(:+) \|\| 0,
	code_count: source_stats.values.map(&:code_count).reduce(:+) \|\| 0,
	)

	#
	# JSON formatting for non-TTY output
	#

	unless $stdout.tty? \|\| $tty \|\| $visual
	require 'json'

	class OpenStruct
	def to_json(*args)
	to_h.to_json(args)
	end
	end

	puts source_stats.to_json

	exit
	end

	unless $visual
	class String
	def commify
	gsub(/(\d)(?=(\d{3})+(\..*)?$)/, '\1,')
	end
	end

	class Numeric
	def commify
	to_s.commify
	end
	end

	#
	# fancy formatting for TTY output
	#

	source_stats.each_value do \|stats_for_ext\|
	stats_for_ext.file_count = stats_for_ext.file_count.commify
	stats_for_ext.line_count = stats_for_ext.line_count.commify
	stats_for_ext.blank_count = stats_for_ext.blank_count.commify
	stats_for_ext.comment_count = stats_for_ext.comment_count.commify
	stats_for_ext.code_count = stats_for_ext.code_count.commify
	end

	DIVIDER = ('-' * 80) # because loc uses 80 columns
	TEMPLATE = ' %-13s %12s %12s %12s %12s %12s'

	puts format(
	"#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}",
	'Language', 'Files', 'Lines', 'Blank', 'Comment', 'Code'
	)

	source_stats.each do \|file_ext, stats\|
	next if file_ext == 'TOTAL'

	puts format(
	TEMPLATE,
	file_ext,
	stats.file_count,
	stats.line_count,
	stats.blank_count,
	stats.comment_count,
	stats.code_count,
	)
	end

	puts format(
	"#{DIVIDER}\n#{TEMPLATE}\n#{DIVIDER}",
	'Total', *source_stats.fetch('TOTAL').to_h.values
	)
	end

	if $visual
	require 'io/console'

	class String
	def colorize(color_code)
	"\e[#{color_code}m#{self}\e[0m"
	end

	# rubocop:disable Style/SingleLineMethods
	def red() colorize('31'); end
	def green() colorize('32'); end
	def yellow() colorize('33'); end
	def blue() colorize('34'); end
	# rubocop:enable Style/SingleLineMethods
	end

	max_line_count = file_stats.values.map(&:line_count).max
	longest_filename = file_stats.keys.map(&:first).map(&:length).max
	_, console_width = IO.console.winsize
	available_width = Float(console_width - longest_filename - 5)

	abort 'Terminal not wide enough... aborting!' if available_width.negative?

	file_stats.each_pair do \|(file, _, _), stats\|
	code_width = (available_width * stats.code_count / max_line_count)
	comment_width = (available_width * stats.comment_count / max_line_count)
	blank_width = (available_width * stats.blank_count / max_line_count)

	puts format(
	" %-#{longest_filename}<file>s \| %<code>s%<comment>s%<blank>s",
	file: file,
	code: ('+' * code_width).green,
	comment: ('-' * comment_width).red,
	blank: ('_' * blank_width).blue
	)
	end
	end

	#
	# rubocop:enable Style/GlobalVars
	# rubocop:enable Style/RegexpLiteral
	# rubocop:enable Style/Documentation
	# rubocop:enable Style/BlockDelimiters
	# rubocop:enable Style/EmptyCaseCondition
	# rubocop:enable Style/TrailingCommaInArguments
	# rubocop:enable Style/TrailingCommaInHashLiteral
	#
	# rubocop:enable Layout/EndAlignment
	# rubocop:enable Layout/ElseAlignment
	# rubocop:enable Layout/HashAlignment
	# rubocop:enable Layout/IndentationWidth
	#

	# That's all Folks!