zudochkin · January 30, 2012 20:09
diff --git a/gistfile1.rb b/gistfile1.rb
 # coding: utf-8
 
 require 'pp'
 require 'rubygems'
 require 'hpricot'
 require 'iconv'
 require 'net/http'
 
 xml = File.read('9001274.xml')
 
 ic = Iconv.new('UTF-8','WINDOWS-1251')
 xml = ic.iconv(xml)
 
 doc = Hpricot.XML(xml)
 
 =begin
 (doc/'//category').each do |item|
    #title = (item/:id)
    print item.inner_html
    print "\n"
    print "id = #{item[:id]}, parentId = #{item[:parentId]}"
    print "\n"
 end
 =end
 
 (doc/'//offer').each do |book|
 
  resp = Net::HTTP.get(URI.parse(book.at('picture').to_plain_text))
    open("./book-images/#{book[:id]}.jpg", "wb") { |file|
         begin
                       
         file.write(resp)
                                                 rescue SocketError
                                                 
                                                 end
      }
 
 
  ['url', 'price', 'categoryId', 'picture', 'author', 'name', 'description', 'year', 'ISBN'].each do |el|
    #puts "#{book[:id]}"
   
 
    puts "#{book.at(el).to_plain_text}"
    #puts "#{el}: #{book.at(el).to_plain_text}"# #{book.at(el).innerHTML}"
    #puts book.find_element(el).to_s
    #puts book.at(el).to_plain_text
    puts "\n"
  end
 end
	# coding: utf-8

	require 'pp'
	require 'rubygems'
	require 'hpricot'
	require 'iconv'
	require 'net/http'

	xml = File.read('9001274.xml')

	ic = Iconv.new('UTF-8','WINDOWS-1251')
	xml = ic.iconv(xml)

	doc = Hpricot.XML(xml)

	=begin
	(doc/'//category').each do \|item\|
	#title = (item/:id)
	print item.inner_html
	print "\n"
	print "id = #{item[:id]}, parentId = #{item[:parentId]}"
	print "\n"
	end
	=end

	(doc/'//offer').each do \|book\|

	resp = Net::HTTP.get(URI.parse(book.at('picture').to_plain_text))
	open("./book-images/#{book[:id]}.jpg", "wb") { \|file\|
	begin

	file.write(resp)
	rescue SocketError

	end
	}


	['url', 'price', 'categoryId', 'picture', 'author', 'name', 'description', 'year', 'ISBN'].each do \|el\|
	#puts "#{book[:id]}"


	puts "#{book.at(el).to_plain_text}"
	#puts "#{el}: #{book.at(el).to_plain_text}"# #{book.at(el).innerHTML}"
	#puts book.find_element(el).to_s
	#puts book.at(el).to_plain_text
	puts "\n"
	end
	end