Created
April 18, 2020 20:43
-
-
Save emrekasg/4dac42fd4e4e2ec1b25c8b2eb85e2d53 to your computer and use it in GitHub Desktop.
hehe
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import sys | |
import os | |
import colors | |
import Logo | |
import argparse | |
import mysql.connector | |
from http.server import HTTPServer, BaseHTTPRequestHandler | |
mydb = mysql.connector.connect( | |
host="localhost", | |
user="root", | |
passwd="", | |
database="test" | |
) | |
mycursor = mydb.cursor() | |
mycursor.execute("CREATE TABLE IF NOT EXISTS `allinfo` (id INT AUTO_INCREMENT PRIMARY KEY, name TEXT, location TEXT, username TEXT,website TEXT,followers INT,stars INT,following INT,email TEXT,type VARCHAR(10),repositories INT)") | |
def getting_header(soup_text): | |
title = soup_text.title.get_text() | |
start = title.find('/') | |
stop = title.find(':') | |
return title[start + 1: stop] | |
def format_url(url): | |
if url.startswith('http://'): | |
url = url.replace('http', 'https') | |
elif url.startswith('www.'): | |
url = url.replace('www.', 'https://') | |
elif url.startswith('https://') or url.startswith('https://www.'): | |
pass | |
else: | |
colors.error("Enter the repositories url in given format" | |
"[ https://github.com/username/repository_name ]") | |
return url | |
def verify_url(page_data): | |
data = str(page_data) | |
if "Popular repositories" in data: | |
return False | |
elif "Page not found" in data: | |
return False | |
else: | |
return True | |
def get_latest_commit(repo_name, username): | |
email = "" | |
commit_data = requests.get( | |
"https://github.com" | |
"/{}/{}/commits?author={}".format(username,repo_name,username)).text | |
soup = BeautifulSoup(commit_data, "lxml") | |
a_tags = soup.findAll("a") | |
for a_tag in a_tags: | |
URL = a_tag.get("href") | |
if URL.startswith("/{}/{}/commit/".format(username, repo_name)): | |
label = str(a_tag.get("aria-label")) | |
if "Merge" not in label and label != "None": | |
patch_data = requests.get("https://github.com{}{}".format( | |
URL, ".patch")).text | |
try: | |
start = patch_data.index("<") | |
stop = patch_data.index(">") | |
email = patch_data[start + 1: stop] | |
except ValueError: | |
return "Not enough information." | |
break | |
if email != "": | |
return email | |
else: | |
return "Not enough information." | |
def save_info(dat='stardox'): | |
sql="TRUNCATE TABLE allinfo" | |
mycursor.execute(sql) | |
mydb.commit() | |
import data | |
import csv | |
if dat == 'stardox': | |
fields = ["Realname",'Username',"Website","Location", 'Repositories', 'Stars', 'Followers', | |
'Following', 'Email','Type'] | |
rows = [[0 for x in range(10)] for y in range(len(data.username_list))] | |
print(data.realname_list) | |
for row in range(len(data.username_list)): | |
rows[row][0] = data.realname_list[row] | |
rows[row][1] = '@' + data.username_list[row] | |
rows[row][2] = data.url_list[row] | |
rows[row][3] = data.location_list[row] | |
rows[row][4] = data.repo_list[row] | |
rows[row][5] = data.star_list[row].strip() | |
rows[row][6] = data.followers_list[row].strip() | |
rows[row][7] = data.following_list[row].strip() | |
rows[row][8] = data.email_list[row] | |
rows[row][9] = data.type_list[row] | |
csv_file = data.header + '.csv' # Name of csv file | |
file_path = os.path.join("./", csv_file) | |
with open(file_path, 'w',encoding="utf-8") as csvfile: | |
csvwriter = csv.writer(csvfile) | |
csvwriter.writerow(fields) | |
csvwriter.writerows(rows) | |
colors.success("Saved the data into " + file_path, True) | |
for arr in rows: | |
print(arr) | |
sql = "INSERT INTO allinfo (name, username,website,location,repositories,stars,followers,following,email,type) VALUES ('"+arr[0]+"','"+arr[1]+"','"+arr[2]+"','"+arr[3]+"','"+arr[4]+"',"+arr[5]+","+arr[6]+","+arr[7]+","+arr[8]+","+arr[9]+"')" | |
mycursor.execute(sql) | |
mydb.commit() | |
def stardox(repo_link): | |
repository_link = repo_link | |
try: | |
html = requests.get(repository_link, timeout=8).text | |
except (requests.exceptions.RequestException, | |
requests.exceptions.HTTPError): | |
colors.error( | |
"Enter the repositories url in given format " | |
"[ https://github.com/username/repository_name ]") | |
sys.exit(1) | |
result = verify_url(html) | |
soup1 = BeautifulSoup(html, "lxml") | |
import data | |
title = getting_header(soup1) # Getting the title of the page | |
data.header = title # Storing title of the page as Project Title | |
star_value = watch_value = fork_value = 0 | |
a_tags = soup1.findAll("a") | |
for a_tag in a_tags: # Finding total stargazers of the repository | |
string = a_tag.get("href") | |
if(string.endswith("/watchers")): # Finding total watchers | |
watch_value = (a_tag.get_text()).strip() | |
if(string.endswith("/stargazers")): # Finding total stargazers | |
star_value = (a_tag.get_text()).strip() | |
if(string.endswith("/members")): # Finding total members | |
fork_value = (a_tag.get_text()).strip() | |
break | |
stargazer_link = repository_link + "/stargazers" | |
watchers_link = repository_link + "/watchers" | |
forkers_link = repository_link + "/network/members" | |
while (stargazer_link is not None): | |
stargazer_html = requests.get(stargazer_link).text | |
soup2 = BeautifulSoup(stargazer_html, "lxml") | |
a_next = soup2.findAll("a") | |
for a in a_next: | |
if a.get_text() == "Next": | |
stargazer_link = a.get('href') | |
break | |
else: | |
stargazer_link = None | |
follow_names = soup2.findAll("h3", {"class": "follow-list-name"}) | |
for name in follow_names: | |
a_tag = name.findAll("a") | |
data.name_list.append(a_tag[0].get_text()) | |
username = a_tag[0].get("href") | |
data.username_list.append(username[1:]) | |
data.type_list.append("Stargazer") | |
while(watchers_link is not None): | |
watchers_html = requests.get(watchers_link).text | |
soup2 = BeautifulSoup(watchers_html, "lxml") | |
a_next = soup2.findAll("a") | |
for a in a_next: | |
if a.get_text() == "Next": | |
watchers_link = a.get('href') | |
break | |
else: | |
watchers_link = None | |
follow_names = soup2.findAll("h3", {"class": "follow-list-name"}) | |
for name in follow_names: | |
a_tag = name.findAll("a") | |
data.name_list.append(a_tag[0].get_text()) | |
username = a_tag[0].get("href") | |
data.username_list.append(username[1:]) | |
data.type_list.append("Watcher") | |
break; | |
i=0 | |
while(forkers_link is not None): | |
forkers_html = requests.get(forkers_link).text | |
soup2 = BeautifulSoup(forkers_html, "lxml") | |
a_next = soup2.findAll("a") | |
follow_names = soup2.findAll("div", {"class": "repo"}) | |
i=i+1 | |
if(i == len(follow_names)): | |
break; | |
else: | |
for name in follow_names: | |
a_tag = name.findAll("a") | |
data.name_list.append(a_tag[1].get_text()) | |
username = a_tag[1].get("href") | |
data.username_list.append(username[1:]) | |
data.type_list.append("Forkers") | |
print(data.username_list) | |
count = 1 | |
pos = 0 | |
while(count <= len(data.username_list)): | |
print(count) | |
starer_url = "https://github.com/" + data.username_list[pos] | |
user_html = requests.get(starer_url).text | |
soup3 = BeautifulSoup(user_html, "lxml") | |
repo_data = requests.get( | |
"https://github.com/{}?tab=repositories&type=source" | |
.format(data.username_list[pos])).text | |
repo_soup = BeautifulSoup(repo_data, "lxml") | |
a_tags = repo_soup.findAll("a") | |
repositories_list = [] | |
for a_tag in a_tags: | |
if a_tag.get("itemprop") == "name codeRepository": | |
repositories_list.append(a_tag.get_text().strip()) | |
if len(repositories_list) > 0: | |
email = get_latest_commit(repositories_list[0],data.username_list[pos]) # Getting stargazer's email | |
data.email_list.append(str(email)) | |
else: | |
data.email_list.append("Not enough information.") | |
if(user_html is not None): | |
users_name = soup3.findAll("span",{"class":"p-name"}) | |
if(users_name): | |
data.realname_list.append(users_name[0].get_text()) | |
else: | |
data.realname_list.append("Nothing") | |
user_url = soup3.findAll("a",{"rel":"nofollow me"}) | |
if(user_url): | |
data.url_list.append(user_url[1].get_text()) | |
else: | |
data.url_list.append("Nothing.") | |
location = soup3.findAll("span",{"class":"p-label"}) | |
if(location): | |
data.location_list.append(location[0].get_text()) | |
else: | |
data.location_list.append("Nothing.") | |
items = soup3.findAll("a", {"class": "UnderlineNav-item"}) | |
for item in items[1:]: | |
if item.get("href").endswith("repositories") is True: | |
a_tag = item.findAll("span") | |
repo_count = a_tag[0].get_text() | |
data.repo_list.append(repo_count) | |
elif item.get("href").endswith("stars") is True: | |
a_tag = item.findAll("span") | |
star_count = a_tag[0].get_text() | |
data.star_list.append(star_count) | |
# Getting total followers of the stargazers | |
elif item.get("href").endswith("followers") is True: | |
a_tag = item.findAll("span") | |
followers_count = a_tag[0].get_text() | |
data.followers_list.append(followers_count) | |
elif item.get("href").endswith("following") is True: | |
a_tag = item.findAll("span") | |
following_count = a_tag[0].get_text() | |
data.following_list.append(following_count) | |
count += 1 | |
pos += 1 | |
save_info() | |
if __name__ == '__main__': | |
from bs4 import BeautifulSoup | |
import requests | |
from bottle import run, request, post | |
@post('/') | |
def index(): | |
postdata = request.body.read() | |
repo = request.forms.get("repo") | |
if(repo): | |
try: | |
stardox(repo) | |
return "Succesfull" | |
except: | |
return "Shit fix." | |
run(host='localhost', port=8080, debug=True) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment