Skip to content

Instantly share code, notes, and snippets.

@codeperfectplus
Last active April 24, 2021 15:17
Show Gist options
  • Save codeperfectplus/01338ac2c0b05f1a15863263fe36bf5a to your computer and use it in GitHub Desktop.
Save codeperfectplus/01338ac2c0b05f1a15863263fe36bf5a to your computer and use it in GitHub Desktop.
scrap github user data using async api and save in sqlite database
'''
Script to get GitHub user Data using Asynchronous API, logging and save the output in the SQLite Database.
'''
import logging
import sqlite3
import aiohttp
import asyncio
from sqlite3 import Error
async def fetch_github(url):
""" Async function to get user data """
async with aiohttp.ClientSession() as session:
async with session.get(url) as response:
return await response.json()
table_name = "ghuser"
db_file = "database.db"
# logging into debug.log
logging.basicConfig(
format="%(levelname)s - %(asctime)s - %(name)s- %(message)s",
datefmt="%d/%m%Y %I:%M:%S %p",
level=logging.INFO,
handlers=[
logging.FileHandler('debug.log'),
logging.StreamHandler()
]
)
def create_connection(db_file):
""" Create connection to database """
conn = None
try:
with sqlite3.connect(db_file) as conn:
return conn
except Error:
return conn
def create_table(table_name):
conn = create_connection(db_file)
if conn is not None:
cur = conn.cursor()
cur.execute(f''' CREATE TABLE IF NOT EXISTS {table_name}(
login text,
name text,
company text,
location text,
email text,
hireable txt,
bio text,
twitter_username text,
public_repos int,
followers int,
following int)
''')
return conn, cur
loop = asyncio.get_event_loop()
def insert_data(username):
""" Fetch and insert data into database """
conn, cur = create_table(table_name)
loop = asyncio.get_event_loop()
result = loop.run_until_complete(fetch_github(f"https://api.github.com/users/{username}"))
login = result["login"]
name = result["name"]
company = result["company"]
location = result["location"]
email = result["email"]
hireable = result["hireable"]
bio = result["bio"]
twitter_username = result["twitter_username"]
public_repos = result["public_repos"]
followers = result["followers"]
following = result["following"]
cur.execute("INSERT INTO ghuser VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
(login, name, company, location, email, hireable, bio, twitter_username, public_repos, followers, following))
logging.info(f"saving {username} to database")
conn.commit()
if __name__ == '__main__':
userNameList = ["python", "google"]
for name in userNameList:
insert_data(name)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment