|
import http.client |
|
import json |
|
import operator |
|
from collections import defaultdict |
|
from itertools import groupby |
|
import heapq |
|
|
|
from bs4 import BeautifulSoup |
|
from github import Github |
|
from pybitbucket.repository import Repository |
|
|
|
gh = Github('{MUH SECRIT}') |
|
|
|
connection = http.client.HTTPSConnection('minecraft.curseforge.com') |
|
baseUrl = '/mc-mods?filter-game-version=2020709689%3A6580&filter-sort=downloads' |
|
|
|
stats = defaultdict(int) |
|
projectData = [] |
|
repos = 0 |
|
|
|
page = 1 |
|
nextUrl = baseUrl + "&page=1" |
|
|
|
|
|
def inspect_github(repo_link): |
|
global repos |
|
repo = repo_link.replace('https://github.com/', '') |
|
try: |
|
languages = gh.get_repo(repo).get_languages() |
|
if len(languages) == 0: |
|
print('No language found') |
|
return "unknown" |
|
else: |
|
used_lang = max(languages.items(), key=operator.itemgetter(1))[0] |
|
repos += 1 |
|
return used_lang.lower() |
|
except Exception: |
|
print(repo_link) |
|
return "failure" |
|
|
|
|
|
def inspect_bitbucket(repo_link): |
|
global repos |
|
repo = repo_link.replace('https://bitbucket.org/', '') |
|
try: |
|
result = Repository.find_repository_by_full_name(repo) |
|
language = result.data['language'] |
|
if len(language) != 0: |
|
repos += 1 |
|
return lang.lower() |
|
else: |
|
print('No language found') |
|
return "unknown" |
|
except Exception: |
|
print(repo_link) |
|
return "failure" |
|
|
|
|
|
while nextUrl is not None: |
|
connection.request('GET', nextUrl) |
|
response = connection.getresponse() |
|
soup = BeautifulSoup(response.read().decode(), 'html.parser') |
|
projects = soup.select('ul.project-listing .name-wrapper a') |
|
for project in projects: |
|
projectUrl = project['href'] |
|
print('Inspecting ' + projectUrl) |
|
connection.request('GET', projectUrl) |
|
response = connection.getresponse() |
|
projectSoup = BeautifulSoup(response.read().decode(), 'html.parser') |
|
menu = projectSoup.select('.e-header-nav .e-menu a') |
|
lang = "unknown" |
|
for link in menu: |
|
if "Source" not in link.string: |
|
continue |
|
repoLink = link['href'] |
|
if "github" in repoLink: |
|
lang = inspect_github(repoLink) |
|
elif "bitbucket" in repoLink: |
|
lang = inspect_bitbucket(repoLink) |
|
licence = 'unknown' |
|
downloads = 0 |
|
details = projectSoup.select('.cf-details.project-details li') |
|
for detail in details: |
|
detail_label = detail.select_one('.info-label').string.strip() |
|
if detail_label == 'Total Downloads': |
|
downloads = int(detail.select_one('.info-data').string.replace(',', '')) |
|
elif detail_label == 'License': |
|
licenceLink = detail.select_one('.info-data a') |
|
licence = licenceLink.string |
|
if licence is None: |
|
licence = licenceLink.select_one('span')['title'] |
|
licence = licence.strip() |
|
notice = 'none' |
|
if lang == 'failure': |
|
notice = 'Failed: ' + repoLink |
|
lang = 'unknown' |
|
projectData.append({ |
|
'url': 'https://minecraft.curseforge.com' + projectUrl, |
|
'language': lang, |
|
'downloads': downloads, |
|
'license': licence, |
|
'notice': notice |
|
}) |
|
print("Done inspecting page " + str(page)) |
|
next_button = soup.select_one('.listing-header .paging-list a[rel="next"]') |
|
if next_button is None: |
|
nextUrl = None |
|
else: |
|
nextUrl = next_button["href"] |
|
page += 1 |
|
|
|
grouped = {} |
|
for l, psGrouper in groupby(projectData, lambda x: x['language']): |
|
ps = list(psGrouper) |
|
grouped[l] = ps |
|
|
|
for l, ps in grouped.items(): |
|
stats[l] = len(ps) |
|
print(l + ": " + str(list(map(lambda p: p['url'], heapq.nlargest(3, ps, lambda p: p['downloads']))))) |
|
|
|
with open('data-1.12.json', 'w') as outfile: |
|
json.dump(projectData, outfile, indent=4) |