emrekasg · April 18, 2020 20:43
diff --git a/das.py b/das.py
 import sys
 import os
 import colors
 import Logo
 import argparse
 import mysql.connector
 from http.server import HTTPServer, BaseHTTPRequestHandler



 mydb = mysql.connector.connect(
  host="localhost",
  user="root",
  passwd="",
  database="test"
 )

 mycursor = mydb.cursor()

 mycursor.execute("CREATE TABLE IF NOT EXISTS `allinfo` (id INT AUTO_INCREMENT PRIMARY KEY, name TEXT, location TEXT, username TEXT,website TEXT,followers INT,stars INT,following INT,email TEXT,type VARCHAR(10),repositories INT)")

 def getting_header(soup_text):
    title = soup_text.title.get_text()

    start = title.find('/')
    stop = title.find(':')
    return title[start + 1: stop]

 def format_url(url):
    if url.startswith('http://'):
        url = url.replace('http', 'https')
    elif url.startswith('www.'):
        url = url.replace('www.', 'https://')
    elif url.startswith('https://') or url.startswith('https://www.'):
        pass
    else:
        colors.error("Enter the repositories url in given format"
                     "[ https://github.com/username/repository_name ]")
    return url

 def verify_url(page_data):
    data = str(page_data)
    if "Popular repositories" in data:
        return False
    elif "Page not found" in data:
        return False
    else:
        return True


 def get_latest_commit(repo_name, username):
    email = ""
    commit_data = requests.get(
                "https://github.com"
                "/{}/{}/commits?author={}".format(username,repo_name,username)).text
    soup = BeautifulSoup(commit_data, "lxml")
    a_tags = soup.findAll("a")
    for a_tag in a_tags:
        URL = a_tag.get("href")
        if URL.startswith("/{}/{}/commit/".format(username, repo_name)):
            label = str(a_tag.get("aria-label"))
            if "Merge" not in label and label != "None":
                patch_data = requests.get("https://github.com{}{}".format(
                            URL, ".patch")).text
                try:
                    start = patch_data.index("<")
                    stop = patch_data.index(">")
                    email = patch_data[start + 1: stop]
                except ValueError:
                    return "Not enough information."
                break
    if email != "":
        return email
    else:
        return "Not enough information."

 def save_info(dat='stardox'):
    sql="TRUNCATE TABLE allinfo"
    mycursor.execute(sql)
    mydb.commit()
    import data
    import csv
    if dat == 'stardox':
        fields = ["Realname",'Username',"Website","Location", 'Repositories', 'Stars', 'Followers',
                  'Following', 'Email','Type']
        rows = [[0 for x in range(10)] for y in range(len(data.username_list))]
        print(data.realname_list)
        for row in range(len(data.username_list)):
            rows[row][0] = data.realname_list[row]
            rows[row][1] = '@' + data.username_list[row]
            rows[row][2] = data.url_list[row]
            rows[row][3] = data.location_list[row]
            rows[row][4] = data.repo_list[row]
            rows[row][5] = data.star_list[row].strip()
            rows[row][6] = data.followers_list[row].strip()
            rows[row][7] = data.following_list[row].strip()
            rows[row][8] = data.email_list[row]
            rows[row][9] = data.type_list[row]
           
        csv_file = data.header + '.csv'  # Name of csv file
        file_path = os.path.join("./", csv_file)
        with open(file_path, 'w',encoding="utf-8") as csvfile:
            csvwriter = csv.writer(csvfile)
            csvwriter.writerow(fields)
            csvwriter.writerows(rows)
            colors.success("Saved the data into " + file_path, True)    
            for arr in rows:
                print(arr)
                sql = "INSERT INTO allinfo (name, username,website,location,repositories,stars,followers,following,email,type) VALUES ('"+arr[0]+"','"+arr[1]+"','"+arr[2]+"','"+arr[3]+"','"+arr[4]+"',"+arr[5]+","+arr[6]+","+arr[7]+","+arr[8]+","+arr[9]+"')"
                mycursor.execute(sql)  
                mydb.commit()
    

 def stardox(repo_link):
    repository_link = repo_link
    try:
        html = requests.get(repository_link, timeout=8).text
    except (requests.exceptions.RequestException,
        requests.exceptions.HTTPError):
        colors.error(
            "Enter the repositories url in given format "
            "[ https://github.com/username/repository_name ]")
        sys.exit(1)
    result = verify_url(html)
    soup1 = BeautifulSoup(html, "lxml")
    import data
    title = getting_header(soup1)  # Getting the title of the page
    data.header = title  # Storing title of the page as Project Title
    star_value = watch_value = fork_value = 0
    a_tags = soup1.findAll("a")
    for a_tag in a_tags:  # Finding total stargazers of the repository
        string = a_tag.get("href")
        if(string.endswith("/watchers")):  # Finding total watchers
            watch_value = (a_tag.get_text()).strip()
        if(string.endswith("/stargazers")):  # Finding total stargazers
            star_value = (a_tag.get_text()).strip()
        if(string.endswith("/members")):  # Finding total members
            fork_value = (a_tag.get_text()).strip()
            break
    stargazer_link = repository_link + "/stargazers"
    watchers_link = repository_link + "/watchers"
    forkers_link = repository_link + "/network/members"
    while (stargazer_link is not None):
        stargazer_html = requests.get(stargazer_link).text
        soup2 = BeautifulSoup(stargazer_html, "lxml")
        a_next = soup2.findAll("a")
        for a in a_next:
            if a.get_text() == "Next":
                stargazer_link = a.get('href')
                break
            else:
                stargazer_link = None
        follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
        for name in follow_names:
            a_tag = name.findAll("a")
            data.name_list.append(a_tag[0].get_text())
            username = a_tag[0].get("href")
            data.username_list.append(username[1:])
            data.type_list.append("Stargazer")
    while(watchers_link is not None):
        watchers_html = requests.get(watchers_link).text
        soup2 = BeautifulSoup(watchers_html, "lxml")
        a_next = soup2.findAll("a")
        for a in a_next:
            if a.get_text() == "Next":
                watchers_link = a.get('href')
                break
            else:
                watchers_link = None
                follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
                for name in follow_names:
                    a_tag = name.findAll("a")
                    data.name_list.append(a_tag[0].get_text())
                    username = a_tag[0].get("href")
                    data.username_list.append(username[1:])
                    data.type_list.append("Watcher")
                break;
    i=0
    while(forkers_link is not None):
        forkers_html = requests.get(forkers_link).text
        soup2 = BeautifulSoup(forkers_html, "lxml")
        a_next = soup2.findAll("a")         
        follow_names = soup2.findAll("div", {"class": "repo"})
        i=i+1
        if(i == len(follow_names)):    
            break;
        else:
            for name in follow_names:
                a_tag = name.findAll("a")
                data.name_list.append(a_tag[1].get_text())
                username = a_tag[1].get("href")
                data.username_list.append(username[1:])
                data.type_list.append("Forkers")   
    print(data.username_list)
    count = 1
    pos = 0
    while(count <= len(data.username_list)):
        print(count)
        starer_url = "https://github.com/" + data.username_list[pos]
        user_html = requests.get(starer_url).text
        soup3 = BeautifulSoup(user_html, "lxml")
        repo_data = requests.get(
                    "https://github.com/{}?tab=repositories&type=source"
                    .format(data.username_list[pos])).text
        repo_soup = BeautifulSoup(repo_data, "lxml")
        a_tags = repo_soup.findAll("a")
        repositories_list = []
        for a_tag in a_tags:
            if a_tag.get("itemprop") == "name codeRepository":
                repositories_list.append(a_tag.get_text().strip())
        if len(repositories_list) > 0:
            email = get_latest_commit(repositories_list[0],data.username_list[pos])  # Getting stargazer's email
            data.email_list.append(str(email))
        else:
            data.email_list.append("Not enough information.")
        if(user_html is not None):
            users_name = soup3.findAll("span",{"class":"p-name"})
            if(users_name):
                data.realname_list.append(users_name[0].get_text())
            else:
                data.realname_list.append("Nothing")
            user_url = soup3.findAll("a",{"rel":"nofollow me"})
            if(user_url):
                data.url_list.append(user_url[1].get_text())
            else:
                data.url_list.append("Nothing.")
            location = soup3.findAll("span",{"class":"p-label"})
            if(location):
                data.location_list.append(location[0].get_text())
            else:
                data.location_list.append("Nothing.")
            items = soup3.findAll("a", {"class": "UnderlineNav-item"})
            for item in items[1:]:
                if item.get("href").endswith("repositories") is True:
                    a_tag = item.findAll("span")
                    repo_count = a_tag[0].get_text()
                    data.repo_list.append(repo_count)
                    
                elif item.get("href").endswith("stars") is True:
                    a_tag = item.findAll("span")
                    star_count = a_tag[0].get_text()
                    data.star_list.append(star_count)

                    # Getting total followers of the stargazers
                elif item.get("href").endswith("followers") is True:
                    a_tag = item.findAll("span")
                    followers_count = a_tag[0].get_text()
                    data.followers_list.append(followers_count)

                elif item.get("href").endswith("following") is True:
                    a_tag = item.findAll("span")
                    following_count = a_tag[0].get_text()
                    data.following_list.append(following_count)
            count += 1
            pos += 1
    save_info()

 if __name__ == '__main__':
    from bs4 import BeautifulSoup
    import requests
    from bottle import run, request, post
    @post('/')
    def index():
        postdata = request.body.read()
        repo = request.forms.get("repo")
        if(repo):
            try:
                stardox(repo)
                return "Succesfull"
            except:
                return "Shit fix."
    run(host='localhost', port=8080, debug=True)
	import sys
	import os
	import colors
	import Logo
	import argparse
	import mysql.connector
	from http.server import HTTPServer, BaseHTTPRequestHandler



	mydb = mysql.connector.connect(
	host="localhost",
	user="root",
	passwd="",
	database="test"
	)

	mycursor = mydb.cursor()

	mycursor.execute("CREATE TABLE IF NOT EXISTS `allinfo` (id INT AUTO_INCREMENT PRIMARY KEY, name TEXT, location TEXT, username TEXT,website TEXT,followers INT,stars INT,following INT,email TEXT,type VARCHAR(10),repositories INT)")

	def getting_header(soup_text):
	title = soup_text.title.get_text()

	start = title.find('/')
	stop = title.find(':')
	return title[start + 1: stop]

	def format_url(url):
	if url.startswith('http://'):
	url = url.replace('http', 'https')
	elif url.startswith('www.'):
	url = url.replace('www.', 'https://')
	elif url.startswith('https://') or url.startswith('https://www.'):
	pass
	else:
	colors.error("Enter the repositories url in given format"
	"[ https://github.com/username/repository_name ]")
	return url

	def verify_url(page_data):
	data = str(page_data)
	if "Popular repositories" in data:
	return False
	elif "Page not found" in data:
	return False
	else:
	return True


	def get_latest_commit(repo_name, username):
	email = ""
	commit_data = requests.get(
	"https://github.com"
	"/{}/{}/commits?author={}".format(username,repo_name,username)).text
	soup = BeautifulSoup(commit_data, "lxml")
	a_tags = soup.findAll("a")
	for a_tag in a_tags:
	URL = a_tag.get("href")
	if URL.startswith("/{}/{}/commit/".format(username, repo_name)):
	label = str(a_tag.get("aria-label"))
	if "Merge" not in label and label != "None":
	patch_data = requests.get("https://github.com{}{}".format(
	URL, ".patch")).text
	try:
	start = patch_data.index("<")
	stop = patch_data.index(">")
	email = patch_data[start + 1: stop]
	except ValueError:
	return "Not enough information."
	break
	if email != "":
	return email
	else:
	return "Not enough information."

	def save_info(dat='stardox'):
	sql="TRUNCATE TABLE allinfo"
	mycursor.execute(sql)
	mydb.commit()
	import data
	import csv
	if dat == 'stardox':
	fields = ["Realname",'Username',"Website","Location", 'Repositories', 'Stars', 'Followers',
	'Following', 'Email','Type']
	rows = [[0 for x in range(10)] for y in range(len(data.username_list))]
	print(data.realname_list)
	for row in range(len(data.username_list)):
	rows[row][0] = data.realname_list[row]
	rows[row][1] = '@' + data.username_list[row]
	rows[row][2] = data.url_list[row]
	rows[row][3] = data.location_list[row]
	rows[row][4] = data.repo_list[row]
	rows[row][5] = data.star_list[row].strip()
	rows[row][6] = data.followers_list[row].strip()
	rows[row][7] = data.following_list[row].strip()
	rows[row][8] = data.email_list[row]
	rows[row][9] = data.type_list[row]

	csv_file = data.header + '.csv' # Name of csv file
	file_path = os.path.join("./", csv_file)
	with open(file_path, 'w',encoding="utf-8") as csvfile:
	csvwriter = csv.writer(csvfile)
	csvwriter.writerow(fields)
	csvwriter.writerows(rows)
	colors.success("Saved the data into " + file_path, True)
	for arr in rows:
	print(arr)
	sql = "INSERT INTO allinfo (name, username,website,location,repositories,stars,followers,following,email,type) VALUES ('"+arr[0]+"','"+arr[1]+"','"+arr[2]+"','"+arr[3]+"','"+arr[4]+"',"+arr[5]+","+arr[6]+","+arr[7]+","+arr[8]+","+arr[9]+"')"
	mycursor.execute(sql)
	mydb.commit()


	def stardox(repo_link):
	repository_link = repo_link
	try:
	html = requests.get(repository_link, timeout=8).text
	except (requests.exceptions.RequestException,
	requests.exceptions.HTTPError):
	colors.error(
	"Enter the repositories url in given format "
	"[ https://github.com/username/repository_name ]")
	sys.exit(1)
	result = verify_url(html)
	soup1 = BeautifulSoup(html, "lxml")
	import data
	title = getting_header(soup1) # Getting the title of the page
	data.header = title # Storing title of the page as Project Title
	star_value = watch_value = fork_value = 0
	a_tags = soup1.findAll("a")
	for a_tag in a_tags: # Finding total stargazers of the repository
	string = a_tag.get("href")
	if(string.endswith("/watchers")): # Finding total watchers
	watch_value = (a_tag.get_text()).strip()
	if(string.endswith("/stargazers")): # Finding total stargazers
	star_value = (a_tag.get_text()).strip()
	if(string.endswith("/members")): # Finding total members
	fork_value = (a_tag.get_text()).strip()
	break
	stargazer_link = repository_link + "/stargazers"
	watchers_link = repository_link + "/watchers"
	forkers_link = repository_link + "/network/members"
	while (stargazer_link is not None):
	stargazer_html = requests.get(stargazer_link).text
	soup2 = BeautifulSoup(stargazer_html, "lxml")
	a_next = soup2.findAll("a")
	for a in a_next:
	if a.get_text() == "Next":
	stargazer_link = a.get('href')
	break
	else:
	stargazer_link = None
	follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
	for name in follow_names:
	a_tag = name.findAll("a")
	data.name_list.append(a_tag[0].get_text())
	username = a_tag[0].get("href")
	data.username_list.append(username[1:])
	data.type_list.append("Stargazer")
	while(watchers_link is not None):
	watchers_html = requests.get(watchers_link).text
	soup2 = BeautifulSoup(watchers_html, "lxml")
	a_next = soup2.findAll("a")
	for a in a_next:
	if a.get_text() == "Next":
	watchers_link = a.get('href')
	break
	else:
	watchers_link = None
	follow_names = soup2.findAll("h3", {"class": "follow-list-name"})
	for name in follow_names:
	a_tag = name.findAll("a")
	data.name_list.append(a_tag[0].get_text())
	username = a_tag[0].get("href")
	data.username_list.append(username[1:])
	data.type_list.append("Watcher")
	break;
	i=0
	while(forkers_link is not None):
	forkers_html = requests.get(forkers_link).text
	soup2 = BeautifulSoup(forkers_html, "lxml")
	a_next = soup2.findAll("a")
	follow_names = soup2.findAll("div", {"class": "repo"})
	i=i+1
	if(i == len(follow_names)):
	break;
	else:
	for name in follow_names:
	a_tag = name.findAll("a")
	data.name_list.append(a_tag[1].get_text())
	username = a_tag[1].get("href")
	data.username_list.append(username[1:])
	data.type_list.append("Forkers")
	print(data.username_list)
	count = 1
	pos = 0
	while(count <= len(data.username_list)):
	print(count)
	starer_url = "https://github.com/" + data.username_list[pos]
	user_html = requests.get(starer_url).text
	soup3 = BeautifulSoup(user_html, "lxml")
	repo_data = requests.get(
	"https://github.com/{}?tab=repositories&type=source"
	.format(data.username_list[pos])).text
	repo_soup = BeautifulSoup(repo_data, "lxml")
	a_tags = repo_soup.findAll("a")
	repositories_list = []
	for a_tag in a_tags:
	if a_tag.get("itemprop") == "name codeRepository":
	repositories_list.append(a_tag.get_text().strip())
	if len(repositories_list) > 0:
	email = get_latest_commit(repositories_list[0],data.username_list[pos]) # Getting stargazer's email
	data.email_list.append(str(email))
	else:
	data.email_list.append("Not enough information.")
	if(user_html is not None):
	users_name = soup3.findAll("span",{"class":"p-name"})
	if(users_name):
	data.realname_list.append(users_name[0].get_text())
	else:
	data.realname_list.append("Nothing")
	user_url = soup3.findAll("a",{"rel":"nofollow me"})
	if(user_url):
	data.url_list.append(user_url[1].get_text())
	else:
	data.url_list.append("Nothing.")
	location = soup3.findAll("span",{"class":"p-label"})
	if(location):
	data.location_list.append(location[0].get_text())
	else:
	data.location_list.append("Nothing.")
	items = soup3.findAll("a", {"class": "UnderlineNav-item"})
	for item in items[1:]:
	if item.get("href").endswith("repositories") is True:
	a_tag = item.findAll("span")
	repo_count = a_tag[0].get_text()
	data.repo_list.append(repo_count)

	elif item.get("href").endswith("stars") is True:
	a_tag = item.findAll("span")
	star_count = a_tag[0].get_text()
	data.star_list.append(star_count)

	# Getting total followers of the stargazers
	elif item.get("href").endswith("followers") is True:
	a_tag = item.findAll("span")
	followers_count = a_tag[0].get_text()
	data.followers_list.append(followers_count)

	elif item.get("href").endswith("following") is True:
	a_tag = item.findAll("span")
	following_count = a_tag[0].get_text()
	data.following_list.append(following_count)
	count += 1
	pos += 1
	save_info()

	if __name__ == '__main__':
	from bs4 import BeautifulSoup
	import requests
	from bottle import run, request, post
	@post('/')
	def index():
	postdata = request.body.read()
	repo = request.forms.get("repo")
	if(repo):
	try:
	stardox(repo)
	return "Succesfull"
	except:
	return "Shit fix."
	run(host='localhost', port=8080, debug=True)