Skip to content

Instantly share code, notes, and snippets.

@sarathlal-old
Last active July 2, 2016 12:48
Show Gist options
  • Save sarathlal-old/c3361f9ea2443085ac103b415044ee78 to your computer and use it in GitHub Desktop.
Save sarathlal-old/c3361f9ea2443085ac103b415044ee78 to your computer and use it in GitHub Desktop.
from bs4 import BeautifulSoup
import urllib
f = open('jan.csv','w')
for i in range(120,130):
openurl = 'http://192.168.1.64/demo/test/fold/'+str(i)+'.html'
r = urllib.urlopen(openurl).read()
soup = BeautifulSoup(r)
title = soup.find("div", class_="row1 content conAddres").find("h3").string
author = soup.find("div", class_="row1 content conAddres").find("h4").find("a").string
phone = soup.find("div", class_="row1 content conAddres").find("div", class_="row1 phoneNo").find("p").string
email = soup.find("div", class_="row1 content conAddres").find("div", class_="row1 email").find("p").string
postdate = soup.find("div", class_="conAddressRight").find("span", class_="postDate1").string
postdate = postdate[14:-1]
qty1 = soup.find("div", class_="conAddressRight").find("div", class_="row1 quantity").find("span").find("p").string
qty = qty1[23:-1]
desc = soup.find("div", class_="row1 detailContent Dmain").find("p", id='long_desc').string
img = soup.find("div", class_="row1 detailPic").find("img")['src']
price1 = soup.find("div", class_="conAddressRight").find("div", class_="row1 quantity").find("span")
price1.find('p').replaceWith('')
price2 = price1.text
price = price2[11:-3]
addressdiv = soup.find("div", class_="row1 content conAddres").find("h4")
addressdiv.find('a').replaceWith('')
addstring = addressdiv.text
wholeadd = addstring[2:]
addlist = wholeadd.split(",")
address = []
for item in addlist:
item = item[1:]
address.append(item)
district = address.pop()
fulladdress = ",".join(str(i) for i in address)
sep = ","
myvar = str(title)+sep+str(author)+sep+str(phone)+sep+str(email)+sep+str(postdate)+sep+str(qty)+sep+str(desc)+sep+str(img)+sep+str(price)+sep+str(district)+sep+str(fulladdress)+'\n'
f.write(myvar)
f.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment