Created
January 24, 2009 18:29
-
-
Save satococoa/51504 to your computer and use it in GitHub Desktop.
サイトマップ生成
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #!/usr/bin/env ruby | |
| require "rubygems" | |
| require "open-uri" | |
| require "mechanize" | |
| module Sitemap | |
| class UriList | |
| def initialize | |
| @url_list = [] | |
| @agent = WWW::Mechanize.new | |
| end | |
| def gather(origin) | |
| begin | |
| page = @agent.get origin | |
| rescue StandardError => e | |
| puts e.message | |
| puts origin | |
| abort 'error...' | |
| end | |
| page.links.each do |link| | |
| abs_url = (page.uri + link.uri).to_s | |
| host = URI.parse(abs_url).host | |
| next if abs_url.match(/(#.+|jpg|png|gif|jpeg)$/i) || !abs_url.include?(host) | |
| if [email protected]?(link.uri) | |
| gather abs_url | |
| else | |
| @url_list << abs_url | |
| end | |
| end if page.class == WWW::Mechanize::Page | |
| @url_list.uniq | |
| end | |
| end | |
| class Writer | |
| include Nokogiri::XML | |
| def initialize | |
| @doc = Document.new | |
| @doc.root = Element.new('urlset', @doc) | |
| @doc.root.set_attribute('xmlns', "http://www.sitemaps.org/schemas/sitemap/0.9") | |
| end | |
| def to_sitemap(url_list) | |
| url_list.each do |url| | |
| elm = Element.new('url', @doc) | |
| loc = Element.new('loc', @doc) | |
| loc.content = url | |
| elm.add_child(loc) | |
| @doc.root.add_child(elm) | |
| end | |
| @doc.to_xml | |
| end | |
| end | |
| end | |
| if __FILE__ == $0 | |
| uris = Sitemap::UriList.new | |
| writer = Sitemap::Writer.new | |
| puts writer.to_sitemap(uris.gather('http://www.example.com/')) | |
| end | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment