imweijh · November 26, 2020 14:06
diff --git a/szfdcProject.py b/szfdcProject.py
 from urllib.request import urlopen
 from bs4 import BeautifulSoup
 import re

 count = 1
 url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49916"
 #url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49818"
 #url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49633"
 #url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49574"
 html0 = urlopen(url0)
 soup0 = BeautifulSoup(html0, 'lxml')
 all_links = soup0.find_all("a",href=re.compile('building'))
 for link in all_links[:]:
    theurl = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
    #print (theurl)
    htmlz = urlopen(theurl)
    soupz = BeautifulSoup(htmlz, 'lxml')
    all_links = soupz.find_all("a",href=re.compile('building'))
    for link in all_links[:]:
        theurl1 = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
        # print (theurl1)
        thehtml1 = urlopen(theurl1)
        thesoup1 = BeautifulSoup(thehtml1, 'lxml')
        all_links = thesoup1.find_all("a",href=re.compile('housedetail'))
        for link in all_links[:]:
            theurl2 = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
            #print (theurl2)
            thehtml2 = urlopen(theurl2)
            thesoup2 = BeautifulSoup(thehtml2, 'lxml')
            tds = thesoup2.find_all('td')
            mystr = ",".join([format(count,'04d'),tds[1].text.strip(),tds[3].text.strip(),tds[9].text.strip(),tds[11].text.strip(),tds[7].text.strip(),tds[15].text.strip(),tds[17].text.strip(),tds[19].text.strip(),tds[13].text.strip()])
            print(mystr)
            count += 1
	from urllib.request import urlopen
	from bs4 import BeautifulSoup
	import re

	count = 1
	url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49916"
	#url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49818"
	#url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49633"
	#url0 = "http://zjj.sz.gov.cn/ris/bol/szfdc/projectdetail.aspx?id=49574"
	html0 = urlopen(url0)
	soup0 = BeautifulSoup(html0, 'lxml')
	all_links = soup0.find_all("a",href=re.compile('building'))
	for link in all_links[:]:
	theurl = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
	#print (theurl)
	htmlz = urlopen(theurl)
	soupz = BeautifulSoup(htmlz, 'lxml')
	all_links = soupz.find_all("a",href=re.compile('building'))
	for link in all_links[:]:
	theurl1 = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
	# print (theurl1)
	thehtml1 = urlopen(theurl1)
	thesoup1 = BeautifulSoup(thehtml1, 'lxml')
	all_links = thesoup1.find_all("a",href=re.compile('housedetail'))
	for link in all_links[:]:
	theurl2 = "http://zjj.sz.gov.cn/ris/bol/szfdc/" + link.get("href")
	#print (theurl2)
	thehtml2 = urlopen(theurl2)
	thesoup2 = BeautifulSoup(thehtml2, 'lxml')
	tds = thesoup2.find_all('td')
	mystr = ",".join([format(count,'04d'),tds[1].text.strip(),tds[3].text.strip(),tds[9].text.strip(),tds[11].text.strip(),tds[7].text.strip(),tds[15].text.strip(),tds[17].text.strip(),tds[19].text.strip(),tds[13].text.strip()])
	print(mystr)
	count += 1