Skip to content

Instantly share code, notes, and snippets.

@emrekasg
Created April 18, 2020 20:43
Show Gist options
  • Save emrekasg/8e237858970c9fa5c4de25361bad1766 to your computer and use it in GitHub Desktop.
Save emrekasg/8e237858970c9fa5c4de25361bad1766 to your computer and use it in GitHub Desktop.
hehe
import sys
import os
import colors
import Logo
import argparse
import mysql.connector
from http.server import HTTPServer, BaseHTTPRequestHandler
mydb = mysql.connector.connect(
host="localhost",
user="root",
passwd="",
database="test"
)
mycursor = mydb.cursor()
mycursor.execute("CREATE TABLE IF NOT EXISTS `allinfo` (id INT AUTO_INCREMENT PRIMARY KEY, name TEXT, location TEXT, username TEXT,website TEXT,followers INT,stars INT,following INT,email TEXT,type VARCHAR(10),repositories INT)")
def getting_header(soup_text):
title = soup_text.title.get_text()
start = title.find('/')
stop = title.find(':')
return title[start + 1: stop]
def format_url(url):
if url.startswith('http://'):
url = url.replace('http', 'https')
elif url.startswith('www.'):
url = url.replace('www.', 'https://')
elif url.startswith('https://') or url.startswith('https://www.'):
pass
else:
colors.error("Enter the repositories url in given format"
"[ https://github.com/username/repository_name ]")
return url
def verify_url(page_data):
data = str(page_data)
if "Popular repositories" in data:
return False
elif "Page not found" in data:
return False
else:
return True
def get_latest_commit(repo_name, username):
email = ""
commit_data = requests.get(
"https://github.com"
"/{}/{}/commits?author={}".format(username,repo_name,username)).text
soup = BeautifulSoup(commit_data, "lxml")
a_tags = soup.findAll("a")
for a_tag in a_tags:
URL = a_tag.get("href")
if URL.startswith("/{}/{}/commit/".format(username, repo_name)):
label = str(a_tag.get("aria-label"))
if "Merge" not in label and label != "None":
patch_data = requests.get("https://github.com{}{}".format(
URL, ".patch")).text
try:
start = patch_data.index("<")
stop = patch_data.index(">")
email = patch_data[start + 1: stop]
except ValueError:
return "Not enough information."
break
if email != "":
return email
else:
return "Not enough information."
def save_info(dat='stardox'):
sql="TRUNCATE TABLE allinfo"
mycursor.execute(sql)
mydb.commit()
import data
import csv
if dat == 'stardox':
fields = ["Realname",'Username',"Website","Location", 'Repositories', 'Stars', 'Followers',
'Following', 'Email','Type']
rows = [[0 for x in range(10)] for y in range(len(data.username_list))]
print(data.realname_list)
for row in range(len(data.username_list)):
rows[row][0] = data.realname_list[row]
rows[row][1] = '@' + data.username_list[row]
rows[row][2] = data.url_list[row]
rows[row][3] = data.location_list[row]
rows[row][4] = data.repo_list[row]
rows[row][5] = data.star_list[row].strip()
rows[row][6] = data.followers_list[row].strip()
rows[row][7] = data.following_list[row].strip()
rows[row][8] = data.email_list[row]
rows[row][9] = data.type_list[row]
csv_file = data.header + '.csv' # Name of csv file
file_path = os.path.join("./", csv_file)
with open(file_path, 'w',encoding="utf-8") as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(fields)
csvwriter.writerows(rows)
colors.success("Saved the data into " + file_path, True)
for arr in rows:
print(arr)
sql = "INSERT INTO allinfo (name, username,website,location,repositories,stars,followers,following,email,type) VALUES ('"+arr[0]+"','"+arr[1]+"','"+arr[2]+"','"+arr[3]+"','"+arr[4]+"',"+arr[5]+","+arr[6]+","+arr[7]+","+arr[8]+","+arr[9]+"')"
mycursor.execute(sql)
mydb.commit()
def stardox(repo_link):
repository_link = repo_link
try:
html = requests.get(repository_link, timeout=8).text
except (requests.exceptions.RequestException,
requests.exceptions.HTTPError):
colors.error(
"Enter the repositories url in given format "
"[ https://github.com/username/repository_name ]")
sys.exit(1)
result = verify_url(html)
soup1 = BeautifulSoup(html, "lxml")
import data
title = getting_header(soup1) # Getting the title of the page
data.header = title # Storing title of the page as Project Title
star_value = watch_value = fork_value = 0
a_tags = soup1.findAll("a")
for a_tag in a_tags: # Finding total stargazers of the repository
string = a_tag.get("href")
if(string.endswith("/watchers")): # Finding total watchers
watch_value = (a_tag.get_text()).strip()
if(string.endswith("/stargazers")): # Finding total stargazers
star_value = (a_tag.get_text()).strip()
if(string.endswith("/members")): # Finding total members
fork_value = (a_tag.get_text()).strip()
break
stargazer_link = repository_link + "/stargazers"
watchers_link = repository_link + "/watchers"
forkers_link = repository_link + "/network/members"
while (stargazer_link is not None):
stargazer_html = requests.get(stargazer_link).text
soup2 = BeautifulSoup(stargazer_html, "lxml")
a_next = soup2.findAll("a")
for a in a_next:
if a.get_text() == "Next":
stargazer_link = a.get('href')
break
else:
stargazer_link = None
follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
for name in follow_names:
a_tag = name.findAll("a")
data.name_list.append(a_tag[0].get_text())
username = a_tag[0].get("href")
data.username_list.append(username[1:])
data.type_list.append("Stargazer")
while(watchers_link is not None):
watchers_html = requests.get(watchers_link).text
soup2 = BeautifulSoup(watchers_html, "lxml")
a_next = soup2.findAll("a")
for a in a_next:
if a.get_text() == "Next":
watchers_link = a.get('href')
break
else:
watchers_link = None
follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
for name in follow_names:
a_tag = name.findAll("a")
data.name_list.append(a_tag[0].get_text())
username = a_tag[0].get("href")
data.username_list.append(username[1:])
data.type_list.append("Watcher")
break;
i=0
while(forkers_link is not None):
forkers_html = requests.get(forkers_link).text
soup2 = BeautifulSoup(forkers_html, "lxml")
a_next = soup2.findAll("a")
follow_names = soup2.findAll("div", {"class": "repo"})
i=i+1
if(i == len(follow_names)):
break;
else:
for name in follow_names:
a_tag = name.findAll("a")
data.name_list.append(a_tag[1].get_text())
username = a_tag[1].get("href")
data.username_list.append(username[1:])
data.type_list.append("Forkers")
print(data.username_list)
count = 1
pos = 0
while(count <= len(data.username_list)):
print(count)
starer_url = "https://github.com/" + data.username_list[pos]
user_html = requests.get(starer_url).text
soup3 = BeautifulSoup(user_html, "lxml")
repo_data = requests.get(
"https://github.com/{}?tab=repositories&type=source"
.format(data.username_list[pos])).text
repo_soup = BeautifulSoup(repo_data, "lxml")
a_tags = repo_soup.findAll("a")
repositories_list = []
for a_tag in a_tags:
if a_tag.get("itemprop") == "name codeRepository":
repositories_list.append(a_tag.get_text().strip())
if len(repositories_list) > 0:
email = get_latest_commit(repositories_list[0],data.username_list[pos]) # Getting stargazer's email
data.email_list.append(str(email))
else:
data.email_list.append("Not enough information.")
if(user_html is not None):
users_name = soup3.findAll("span",{"class":"p-name"})
if(users_name):
data.realname_list.append(users_name[0].get_text())
else:
data.realname_list.append("Nothing")
user_url = soup3.findAll("a",{"rel":"nofollow me"})
if(user_url):
data.url_list.append(user_url[1].get_text())
else:
data.url_list.append("Nothing.")
location = soup3.findAll("span",{"class":"p-label"})
if(location):
data.location_list.append(location[0].get_text())
else:
data.location_list.append("Nothing.")
items = soup3.findAll("a", {"class": "UnderlineNav-item"})
for item in items[1:]:
if item.get("href").endswith("repositories") is True:
a_tag = item.findAll("span")
repo_count = a_tag[0].get_text()
data.repo_list.append(repo_count)
elif item.get("href").endswith("stars") is True:
a_tag = item.findAll("span")
star_count = a_tag[0].get_text()
data.star_list.append(star_count)
# Getting total followers of the stargazers
elif item.get("href").endswith("followers") is True:
a_tag = item.findAll("span")
followers_count = a_tag[0].get_text()
data.followers_list.append(followers_count)
elif item.get("href").endswith("following") is True:
a_tag = item.findAll("span")
following_count = a_tag[0].get_text()
data.following_list.append(following_count)
count += 1
pos += 1
save_info()
if __name__ == '__main__':
from bs4 import BeautifulSoup
import requests
from bottle import run, request, post
@post('/')
def index():
postdata = request.body.read()
repo = request.forms.get("repo")
if(repo):
try:
stardox(repo)
return "Succesfull"
except:
return "Shit fix."
run(host='localhost', port=8080, debug=True)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment