bshyong · February 20, 2025 09:42
diff --git a/wiki_description.rb b/wiki_description.rb
 require 'open-uri'
 require 'nokogiri'
 require 'csv'
 require 'cgi'

 # BASE_URL = 'http://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content&titles='
 BASE_URL = 'http://en.wikipedia.org/wiki/'

 while($input_filename.nil?)
  puts "Enter input filename (should be a CSV)"
  $input_filename = gets.chomp!
 end

 while($filename.nil?)
  puts "Enter a filename (output will be stored as a CSV)"
  $filename = gets.chomp!
 end

 output = File.new("#{$filename}.csv", "a+")

 CSV.foreach($input_filename) do |row|
  puts "processing #{row[0]}"
  puts url = BASE_URL + row[0].gsub(' ', '_')
  begin
    doc = Nokogiri::HTML(open(url))
    description = doc.css('#mw-content-text').css('p')[0].content.split('.')[0,2].join('. ')
    output.puts(CSV.generate_line([row[0], description]))
  rescue
    puts "something went wrong!  skipping"
    puts $!.message
  end
 end
	require 'open-uri'
	require 'nokogiri'
	require 'csv'
	require 'cgi'

	# BASE_URL = 'http://en.wikipedia.org/w/api.php?format=json&action=query&prop=revisions&rvprop=content&titles='
	BASE_URL = 'http://en.wikipedia.org/wiki/'

	while($input_filename.nil?)
	puts "Enter input filename (should be a CSV)"
	$input_filename = gets.chomp!
	end

	while($filename.nil?)
	puts "Enter a filename (output will be stored as a CSV)"
	$filename = gets.chomp!
	end

	output = File.new("#{$filename}.csv", "a+")

	CSV.foreach($input_filename) do \|row\|
	puts "processing #{row[0]}"
	puts url = BASE_URL + row[0].gsub(' ', '_')
	begin
	doc = Nokogiri::HTML(open(url))
	description = doc.css('#mw-content-text').css('p')[0].content.split('.')[0,2].join('. ')
	output.puts(CSV.generate_line([row[0], description]))
	rescue
	puts "something went wrong! skipping"
	puts $!.message
	end
	end
No results found