https://github.com/xgqfrms/Python
https://libraries.io/github/xgqfrms-GitHub/servo
https://www.cnblogs.com/xgqfrms/tag/python/
https://abc.xgqfrms.xyz/Python/
https://www.python.org/dev/peps/pep-0008/
https://github.com/xgqfrms/Python/blob/gh-pages/ebooks/CodeSchool-TryDjango.pdf
https://www.cnblogs.com/xgqfrms/p/5828910.html
https://www.pythonanywhere.com/
在云中托管,运行和编码Python!
https://xgqfrms.pythonanywhere.com/
https://www.pythonanywhere.com/pricing/
https://github.com/FideoJ/get_stjszx/blob/master/get_stjszx.py
__author__ = 'Lin Jian'
# -*- coding: utf-8 -*-
import urllib2
import re
import sys
def get_page(page_index, filename):
page_url = 'http://www.stjszx.net/jszxgk/gk.asp?page=' + str(page_index)
try:
#headers = {'user-agent' : 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.106 Safari/537.36'}
request = urllib2.Request(page_url)
response = urllib2.urlopen(request, timeout = 10)
print 'Page {0}: Connected'.format(page_index)
except urllib2.URLError, e:
print 'Page {0}: {1}. Retry!'.format(page_index, e.reason)
#exit()
return False
#lines = response.readlines()
#valid_lines = lines[1056:]
#content = ''.join(valid_lines).decode('gbk')
content = u''
for i, line in enumerate(response):
if i > 1090:
content += line.decode('gbk')
#print 'Page {0}: Loaded'.format(page_index)
pattern = re.compile(r'<tr>[\s ]*<td><div align="center">(?P<ID>\d+)</div></td>[\s ]*<td><div align="center">(?P<Name>.*?)</div></td>[\s ]*<td><div align="center">(?P<College>.*?)</div></td>[\s ]*<td><div align="center">(?P<Major>.*?)</div></td>[\s ]*<td><div align="center">(?P<ArriveDate>.*?) </div></td>[\s ]*<td><div align="center">(?P<ArriveID>.*?) </div></td>[\s ]*</tr>', re.S)
persons = re.findall(pattern, content)
#print 'Page {0}: Found'.format(page_index)
with open(filename, 'a') as f:
for person in persons:
for item in person:
f.write(u'{0:15}'.format(item).encode('utf-8'))
f.write(u'\n'.encode('utf-8'))
print 'Page {0}: Done'.format(page_index)
return True
for page_index in range(1,12):
while (get_page(page_index, sys.argv[1]) == False):
pass
爬取汕头市金山中学官网上2016级高考录取信息 网址:http://www.stjszx.net/jszxgk/gk.asp
svg logos
https://rollbar.com/docs/
https://rollbar.com/assets/shared/logos/react-icon.svg
https://rollbar.com/assets/shared/logos/angular-icon.svg
https://rollbar.com/assets/shared/logos/javascript-icon.png
??? how to get the
::before
&background-image
js spider ???
// TODO list