automata · April 20, 2012 23:27
diff --git a/scraper.py b/scraper.py
 # -*- coding: utf-8 -*-

 import urllib
 import urllib2
 import string
 import sys
 from bs4 import BeautifulSoup

 # faking a browser
 user_agent = "Mozilla/5.0 (Linux x86_64) Gecko/20120324 Firefox/14.0a1"
 headers = { 'User-Agent' : user_agent }

 # making the HTTP request
 data = None
 request = urllib2.Request("http://en.wikipedia.org/wiki/Music", data, headers)
 response = urllib2.urlopen(request)

 # reading the response from HTTP GET and parsing with beautifulsoup
 htmlpage = response.read()
 soup = BeautifulSoup(htmlpage)

 # we want just the <p> elements
 ps = soup.find_all("p")

 # printing just the text inside <p> elements already found
 for p in ps:
    print p.text
	# -- coding: utf-8 --

	import urllib
	import urllib2
	import string
	import sys
	from bs4 import BeautifulSoup

	# faking a browser
	user_agent = "Mozilla/5.0 (Linux x86_64) Gecko/20120324 Firefox/14.0a1"
	headers = { 'User-Agent' : user_agent }

	# making the HTTP request
	data = None
	request = urllib2.Request("http://en.wikipedia.org/wiki/Music", data, headers)
	response = urllib2.urlopen(request)

	# reading the response from HTTP GET and parsing with beautifulsoup
	htmlpage = response.read()
	soup = BeautifulSoup(htmlpage)

	# we want just the <p> elements
	ps = soup.find_all("p")

	# printing just the text inside <p> elements already found
	for p in ps:
	print p.text