monfresh · May 14, 2013 01:39
diff --git a/extract_tld_from_url.rb b/extract_tld_from_url.rb
 urls = ["http://brigade.codeforamerica.org/index.html", "brigade.codeforamerica.org", "http://codeforamerica.org", "codeforamerica.org/index.html", "brigade.codeforamerica.org/index.html"]

 urls.each do |url|
  # clean up the URLs so they all start off with the same format
  if url.include?("http://")
 		url.gsub!("http://","")
 	end
  # we're only interested in the part that comes before the first slash,
  # so we split the string into the part before the slash, and the part after the slash
 	a = url.split("/")
  # the part before the first slash is the first element of the a array, i.e a[0]
  # we then break up that string into the parts to the left and right of each period (split("."))
  # this results in a new array that contains either 2 or 3 strings, but the TLD is always
  # the last element of the array, which can be accessed via [-1]
 	tld = a[0].split(".")[-1]
 	puts tld
 end

 # the tld string can be checked for validity by comparing it to this list: http://data.iana.org/TLD/tlds-alpha-by-domain.txt 
 # you can create an array that contains all the TLDs, then check if the array includes the string you're validating.
 # for example if the array is called "valid_tlds", and the string you're validating is called "tld", you would do:
 # valid_tlds.include?(tld)
	urls = ["http://brigade.codeforamerica.org/index.html", "brigade.codeforamerica.org", "http://codeforamerica.org", "codeforamerica.org/index.html", "brigade.codeforamerica.org/index.html"]

	urls.each do \|url\|
	# clean up the URLs so they all start off with the same format
	if url.include?("http://")
	url.gsub!("http://","")
	end
	# we're only interested in the part that comes before the first slash,
	# so we split the string into the part before the slash, and the part after the slash
	a = url.split("/")
	# the part before the first slash is the first element of the a array, i.e a[0]
	# we then break up that string into the parts to the left and right of each period (split("."))
	# this results in a new array that contains either 2 or 3 strings, but the TLD is always
	# the last element of the array, which can be accessed via [-1]
	tld = a[0].split(".")[-1]
	puts tld
	end

	# the tld string can be checked for validity by comparing it to this list: http://data.iana.org/TLD/tlds-alpha-by-domain.txt
	# you can create an array that contains all the TLDs, then check if the array includes the string you're validating.
	# for example if the array is called "valid_tlds", and the string you're validating is called "tld", you would do:
	# valid_tlds.include?(tld)
No results found