ericboehs · April 25, 2026 02:10 · ericboehs · Apr 25, 2026
diff --git a/vtt-to-md b/vtt-to-md
 #!/usr/bin/env ruby
 # Convert WebVTT to anchored markdown with HH:MM:SS audio-fragment links.

 require 'optparse'

 VERSION = '0.1.0'

 options = { audio: nil, output: nil, title: nil, no_audio: false }

 parser = OptionParser.new do |opts|
  opts.banner = <<~BANNER
    Usage: vtt-to-md [options] <input.vtt>

    Converts a WebVTT transcript to markdown with per-utterance HTML anchors
    (`<a id="tHHMMSS"></a>`) and timestamps that link to the corresponding
    moment in the audio file via HTML5 media-fragment URIs (`audio.m4a#t=Xs`).
    Consecutive cues from the same speaker are merged.

    By default the audio sibling (same basename, .m4a) is auto-discovered and
    output is written to the matching .md next to the .vtt.
  BANNER

  opts.separator ''
  opts.separator 'Options:'

  opts.on('-a', '--audio PATH', 'Audio file basename to link to (default: sibling .m4a)') { |v| options[:audio] = v }
  opts.on('--no-audio', 'Skip audio linking even if a sibling .m4a exists') { options[:no_audio] = true }
  opts.on('-o', '--output PATH', 'Output path; use - for stdout (default: sibling .md)') { |v| options[:output] = v }
  opts.on('-t', '--title TITLE', 'Human title (default: Title-Cased filename without date)') { |v| options[:title] = v }
  opts.on('-h', '--help', 'Show this help') { puts opts; exit 0 }
  opts.on('-v', '--version', 'Show version') { puts "vtt-to-md #{VERSION}"; exit 0 }
 end

 parser.parse!

 if ARGV.empty?
  puts parser
  exit 1
 end

 input = ARGV[0]
 abort "vtt-to-md: input not found: #{input}" unless File.exist?(input)

 dir  = File.dirname(input)
 stem = File.basename(input, '.vtt')

 audio = options[:audio]
 if !audio && !options[:no_audio]
  candidate = File.join(dir, "#{stem}.m4a")
  if File.exist?(candidate)
    audio = "#{stem}.m4a"
  else
    warn "vtt-to-md: no sibling audio at #{candidate} -- emitting timestamps without audio links (use --audio PATH to override)"
  end
 end
 audio = nil if options[:no_audio]

 title = options[:title] || stem.sub(/^\d{4}-\d{2}-\d{2}-/, '').gsub('-', ' ').split.map(&:capitalize).join(' ')

 output_path = options[:output] || File.join(dir, "#{stem}.md")
 out = (output_path == '-') ? $stdout : File.open(output_path, 'w')

 vtt  = File.read(input)
 date = File.basename(input)[/^\d{4}-\d{2}-\d{2}/] || 'unknown date'

 out.puts "# #{date} — #{title}"
 out.puts
 out.puts "> Source: Microsoft Teams meeting transcript (`.vtt`). Generated from `#{File.basename(input)}`."
 if audio
  out.puts ">"
  out.puts "> Each timestamp links to the corresponding moment in the meeting audio (`#{audio}`). The audio file is not committed; if you have a local copy alongside this markdown, the links will jump there directly in IINA / VLC / Safari."
 end
 out.puts
 out.puts '---'
 out.puts

 cues = []
 vtt.split(/\n\n+/).each do |block|
  lines = block.strip.split("\n")
  next if lines.empty? || lines.first == 'WEBVTT'
  lines.shift if lines.first =~ /^\d+$/
  next unless lines.first =~ /^(\d{2}):(\d{2}):(\d{2})\.\d{3}\s+-->/
  h, m, s = $1.to_i, $2.to_i, $3.to_i
  text = lines[1..].join(' ').strip
  speaker = nil
  if text =~ /\A<v\s*([^>]*)>(.*?)<\/v>\z/m
    raw = $1.strip
    speaker = raw.empty? ? nil : raw
    text = $2.strip
  end
  next if text.empty?
  cues << {
    anchor:  format('t%02d%02d%02d', h, m, s),
    label:   format('%02d:%02d:%02d', h, m, s),
    seconds: h * 3600 + m * 60 + s,
    speaker: speaker,
    text:    text
  }
 end

 merged = cues.each_with_object([]) do |c, acc|
  if !acc.empty? && acc.last[:speaker] == c[:speaker]
    acc.last[:text] += ' ' + c[:text]
  else
    acc << c.dup
  end
 end

 merged.each do |c|
  out.puts %(<a id="#{c[:anchor]}"></a>)
  ts = audio ? "[#{c[:label]}](#{audio}#t=#{c[:seconds]})" : c[:label]
  header = "**#{ts}"
  header += " — #{c[:speaker]}" if c[:speaker]
  out.puts "#{header}**"
  out.puts
  out.puts "> #{c[:text]}"
  out.puts
 end

 out.close unless out == $stdout
 warn "vtt-to-md: wrote #{output_path}" unless output_path == '-'
	#!/usr/bin/env ruby
	# Convert WebVTT to anchored markdown with HH:MM:SS audio-fragment links.

	require 'optparse'

	VERSION = '0.1.0'

	options = { audio: nil, output: nil, title: nil, no_audio: false }

	parser = OptionParser.new do \|opts\|
	opts.banner = <<~BANNER
	Usage: vtt-to-md [options] <input.vtt>

	Converts a WebVTT transcript to markdown with per-utterance HTML anchors
	(`<a id="tHHMMSS"></a>`) and timestamps that link to the corresponding
	moment in the audio file via HTML5 media-fragment URIs (`audio.m4a#t=Xs`).
	Consecutive cues from the same speaker are merged.

	By default the audio sibling (same basename, .m4a) is auto-discovered and
	output is written to the matching .md next to the .vtt.
	BANNER

	opts.separator ''
	opts.separator 'Options:'

	opts.on('-a', '--audio PATH', 'Audio file basename to link to (default: sibling .m4a)') { \|v\| options[:audio] = v }
	opts.on('--no-audio', 'Skip audio linking even if a sibling .m4a exists') { options[:no_audio] = true }
	opts.on('-o', '--output PATH', 'Output path; use - for stdout (default: sibling .md)') { \|v\| options[:output] = v }
	opts.on('-t', '--title TITLE', 'Human title (default: Title-Cased filename without date)') { \|v\| options[:title] = v }
	opts.on('-h', '--help', 'Show this help') { puts opts; exit 0 }
	opts.on('-v', '--version', 'Show version') { puts "vtt-to-md #{VERSION}"; exit 0 }
	end

	parser.parse!

	if ARGV.empty?
	puts parser
	exit 1
	end

	input = ARGV[0]
	abort "vtt-to-md: input not found: #{input}" unless File.exist?(input)

	dir = File.dirname(input)
	stem = File.basename(input, '.vtt')

	audio = options[:audio]
	if !audio && !options[:no_audio]
	candidate = File.join(dir, "#{stem}.m4a")
	if File.exist?(candidate)
	audio = "#{stem}.m4a"
	else
	warn "vtt-to-md: no sibling audio at #{candidate} -- emitting timestamps without audio links (use --audio PATH to override)"
	end
	end
	audio = nil if options[:no_audio]

	title = options[:title] \|\| stem.sub(/^\d{4}-\d{2}-\d{2}-/, '').gsub('-', ' ').split.map(&:capitalize).join(' ')

	output_path = options[:output] \|\| File.join(dir, "#{stem}.md")
	out = (output_path == '-') ? $stdout : File.open(output_path, 'w')

	vtt = File.read(input)
	date = File.basename(input)[/^\d{4}-\d{2}-\d{2}/] \|\| 'unknown date'

	out.puts "# #{date} — #{title}"
	out.puts
	out.puts "> Source: Microsoft Teams meeting transcript (`.vtt`). Generated from `#{File.basename(input)}`."
	if audio
	out.puts ">"
	out.puts "> Each timestamp links to the corresponding moment in the meeting audio (`#{audio}`). The audio file is not committed; if you have a local copy alongside this markdown, the links will jump there directly in IINA / VLC / Safari."
	end
	out.puts
	out.puts '---'
	out.puts

	cues = []
	vtt.split(/\n\n+/).each do \|block\|
	lines = block.strip.split("\n")
	next if lines.empty? \|\| lines.first == 'WEBVTT'
	lines.shift if lines.first =~ /^\d+$/
	next unless lines.first =~ /^(\d{2}):(\d{2}):(\d{2})\.\d{3}\s+-->/
	h, m, s = $1.to_i, $2.to_i, $3.to_i
	text = lines[1..].join(' ').strip
	speaker = nil
	if text =~ /\A<v\s([^>])>(.*?)<\/v>\z/m
	raw = $1.strip
	speaker = raw.empty? ? nil : raw
	text = $2.strip
	end
	next if text.empty?
	cues << {
	anchor: format('t%02d%02d%02d', h, m, s),
	label: format('%02d:%02d:%02d', h, m, s),
	seconds: h * 3600 + m * 60 + s,
	speaker: speaker,
	text: text
	}
	end

	merged = cues.each_with_object([]) do \|c, acc\|
	if !acc.empty? && acc.last[:speaker] == c[:speaker]
	acc.last[:text] += ' ' + c[:text]
	else
	acc << c.dup
	end
	end

	merged.each do \|c\|
	out.puts %(<a id="#{c[:anchor]}"></a>)
	ts = audio ? "[#{c[:label]}](#{audio}#t=#{c[:seconds]})" : c[:label]
	header = "**#{ts}"
	header += " — #{c[:speaker]}" if c[:speaker]
	out.puts "#{header}**"
	out.puts
	out.puts "> #{c[:text]}"
	out.puts
	end

	out.close unless out == $stdout
	warn "vtt-to-md: wrote #{output_path}" unless output_path == '-'
Flag	Description
`-a, --audio PATH`	Audio file basename to link to (default: sibling `.m4a`)
`--no-audio`	Skip audio linking even if a sibling `.m4a` exists
`-o, --output PATH`	Output path; use `-` for stdout (default: sibling `.md`)
`-t, --title TITLE`	Human title (default: Title-Cased filename, date prefix stripped)
`-h, --help`	Show help
`-v, --version`	Show version