imweijh · November 20, 2020 06:57
diff --git a/gistfile1.txt b/gistfile1.txt
 用法：安装anaconda
 python szfdc.py > hr4.txt

 一次一栋，抓其他的要自己改改 urlz 链接
diff --git a/szfdc.py b/szfdc.py
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
 import re

 urlz = "http://zjj.sz.gov.cn/ris/bol/szfdc/building.aspx?id=38063&presellid=49373"
 htmlz = urlopen(urlz)
 soupz = BeautifulSoup(htmlz, 'lxml')
 all_links = soupz.find_all("a",href=re.compile('housedetail'))
 # 默认只抓了前12个
 # for link in all_links[:]:
 for link in all_links[:12]:
    theurl = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
    #print (theurl)
    thehtml = urlopen(theurl)
    thesoup = BeautifulSoup(thehtml, 'lxml')
    tds = thesoup.find_all('td')
    mystr = "\t".join([tds[1].text.strip(),tds[11].text.strip(),tds[7].text.strip(),tds[15].text.strip(),tds[17].text.strip(),tds[19].text.strip(),tds[13].text.strip()])
    print(mystr)
 print (urlz)
	from urllib.request import urlopen
	from bs4 import BeautifulSoup
	import re

	urlz = "http://zjj.sz.gov.cn/ris/bol/szfdc/building.aspx?id=38063&presellid=49373"
	htmlz = urlopen(urlz)
	soupz = BeautifulSoup(htmlz, 'lxml')
	all_links = soupz.find_all("a",href=re.compile('housedetail'))
	# 默认只抓了前12个
	# for link in all_links[:]:
	for link in all_links[:12]:
	theurl = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
	#print (theurl)
	thehtml = urlopen(theurl)
	thesoup = BeautifulSoup(thehtml, 'lxml')
	tds = thesoup.find_all('td')
	mystr = "\t".join([tds[1].text.strip(),tds[11].text.strip(),tds[7].text.strip(),tds[15].text.strip(),tds[17].text.strip(),tds[19].text.strip(),tds[13].text.strip()])
	print(mystr)
	print (urlz)