Jack2 · November 22, 2014 09:29
diff --git a/skt_wrapper.py b/skt_wrapper.py
 #-*- coding: utf-8 -*-
 import urllib
 import csv
 from bs4 import BeautifulSoup
 #----------------------------------------------------------------------
 ## save all links from URL
 def grap_type_from_url(url,tag1,tag2):
 	soup = BeautifulSoup(urllib.urlopen(url))
 	links=soup.findAll(tag1)
 	x=[]
 	#
 	if links is not None :
 		try:
 			for u in links:
 				type_url = u[tag2].lower()
 				if type_url is not None:
 					l = type_url.encode('ascii','ignore')
 					#print l
 					f3 = open('test_grap_url.csv','a')
 					f3.write(l+"\n")
 					f3.close()
 					#grap_type_from_url(l,tag1,tag2)
 		except Exception, e:
 			print e

 ## save all links from a .html
 def save_type_from_file(fpath,tag1,tag2):
 	f = open(fpath,'r')
 	f2 = open(fpath+'_convert.csv', 'w')
 	lines = f.readlines()
 	x=[]
 	for line in lines:
 		soup = BeautifulSoup(line)
 		links=soup.findAll(tag1)
 		if links is not None :
 			try:
 				for u in links:
 					type_url = u[tag2].lower()
 					if type_url is not None:
 						l = type_url.encode('ascii','ignore')
 						f2.write(l+"\n")
 						grap_type_from_url(l,tag1,tag2)
 			except Exception, e:
 				print e
 	f2.close()
 	f.close()


 #----------------------------------------------------------------------
 if __name__ == "__main__":
 	fpath = 'index_real.html'
 	#url = "http://www.skt-lte.co.kr/" 
 	save_type_from_file(fpath,'a','href')
 	#grap_type_from_url (url,'a','href')
	#-- coding: utf-8 --
	import urllib
	import csv
	from bs4 import BeautifulSoup
	#----------------------------------------------------------------------
	## save all links from URL
	def grap_type_from_url(url,tag1,tag2):
	soup = BeautifulSoup(urllib.urlopen(url))
	links=soup.findAll(tag1)
	x=[]
	#
	if links is not None :
	try:
	for u in links:
	type_url = u[tag2].lower()
	if type_url is not None:
	l = type_url.encode('ascii','ignore')
	#print l
	f3 = open('test_grap_url.csv','a')
	f3.write(l+"\n")
	f3.close()
	#grap_type_from_url(l,tag1,tag2)
	except Exception, e:
	print e

	## save all links from a .html
	def save_type_from_file(fpath,tag1,tag2):
	f = open(fpath,'r')
	f2 = open(fpath+'_convert.csv', 'w')
	lines = f.readlines()
	x=[]
	for line in lines:
	soup = BeautifulSoup(line)
	links=soup.findAll(tag1)
	if links is not None :
	try:
	for u in links:
	type_url = u[tag2].lower()
	if type_url is not None:
	l = type_url.encode('ascii','ignore')
	f2.write(l+"\n")
	grap_type_from_url(l,tag1,tag2)
	except Exception, e:
	print e
	f2.close()
	f.close()


	#----------------------------------------------------------------------
	if __name__ == "__main__":
	fpath = 'index_real.html'
	#url = "http://www.skt-lte.co.kr/"
	save_type_from_file(fpath,'a','href')
	#grap_type_from_url (url,'a','href')