Skip to content

Instantly share code, notes, and snippets.

@svetlyak40wt
Created April 14, 2015 13:00
Show Gist options
  • Save svetlyak40wt/c2c318ac78dd06b68a05 to your computer and use it in GitHub Desktop.
Save svetlyak40wt/c2c318ac78dd06b68a05 to your computer and use it in GitHub Desktop.
Пример работы с posgresql из python + gevent и пулом коннектов от sqlalchemy
#!/usr/bin/env python
# SOME DOCS:
# PostgreSQL driver: http://initd.org/psycopg/docs/
# How to setup PosgreSQL in Docker: https://registry.hub.docker.com/_/postgres/
import gevent.monkey
gevent.monkey.patch_thread()
import psycogreen.gevent
psycogreen.gevent.patch_psycopg()
import os
import re
import urlparse
import grequests
import datetime
import contextlib
# run:
# sudo apt-get install libpq-dev
# pip install psycopg2
from sqlalchemy import create_engine
# disable ssl warnings on old 2.7 python
# https://urllib3.readthedocs.org/en/latest/security.html#insecureplatformwarning
from requests.packages import urllib3
urllib3.disable_warnings()
pool = create_engine('postgresql://{user}:{password}@{host}:{port}/{dbname}'.format(
host=os.environ['POSTGRES_PORT_5432_TCP_ADDR'],
port=os.environ['POSTGRES_PORT_5432_TCP_PORT'],
user='postgres',
password='mysecretpassword',
dbname='async'),
pool_size=10)
@contextlib.contextmanager
def connection():
conn = pool.connect()
yield conn
# conn.close()
download_limit = 100
visited = set()
def queue(url):
if url not in visited:
visited.add(url)
gevent.spawn(fetch_page, url)
def fetch_page(url):
global download_limit
if download_limit > 0:
print 'Fetching page', url
result = grequests.get(url).send()
if result.status_code == 200:
content = result.text
with connection() as conn:
conn.execute('INSERT INTO docs (url, created, text) VALUES(%s, %s, %s)',
(url, datetime.date.today(), content))
download_limit -= 1
all_links = re.finditer(ur'<a href="(?P<url>/.*?)"',
content)
for match in all_links:
url = urlparse.urljoin(result.url,
match.group('url'))
queue(url)
else:
print 'ERROR, status code', result.status_code, 'for page', url
def main():
queue('https://yaca.yandex.ru')
while download_limit > 0:
gevent.sleep(1)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment