Created
January 29, 2019 06:03
-
-
Save 648trindade/4b86d61b139e50a4c7ff4e173b7c636b to your computer and use it in GitHub Desktop.
GGJ 2019 Scrapper
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from bs4 import BeautifulSoup | |
from requests import get | |
from multiprocessing import Pool | |
from timeit import default_timer as timer | |
import csv | |
countries = (("AF", "Afghanistan"), ("AX", "Aland Islands"), ("AL", "Albania"), ("DZ", "Algeria"), ("AS", "American Samoa"), ("AD", "Andorra"), ("AO", "Angola"), ("AI", "Anguilla"), ("AQ", "Antarctica"), ("AG", "Antigua and Barbuda"), ("AR", "Argentina"), ("AM", "Armenia"), ("AW", "Aruba"), ("AU", "Australia"), ("AT", "Austria"), ("AZ", "Azerbaijan"), ("BS", "Bahamas"), ("BH", "Bahrain"), ("BD", "Bangladesh"), ("BB", "Barbados"), ("BY", "Belarus"), ("BE", "Belgium"), ("BZ", "Belize"), ("BJ", "Benin"), ("BM", "Bermuda"), ("BT", "Bhutan"), ("BO", "Bolivia"), ("BA", "Bosnia and Herzegovina"), ("BW", "Botswana"), ("BV", "Bouvet Island"), ("BR", "Brazil"), ("IO", "British Indian Ocean Territory"), ("VG", "British Virgin Islands"), ("BN", "Brunei"), ("BG", "Bulgaria"), ("BF", "Burkina Faso"), ("BI", "Burundi"), ("KH", "Cambodia"), ("CM", "Cameroon"), ("CA", "Canada"), ("CV", "Cape Verde"), ("BQ", "Caribbean Netherlands"), ("KY", "Cayman Islands"), ("CF", "Central African Republic"), ("TD", "Chad"), ("CL", "Chile"), ("CN", "China"), ("CX", "Christmas Island"), ("CC", "Cocos (Keeling) Islands"), ("CO", "Colombia"), ("KM", "Comoros"), ("CG", "Congo (Brazzaville)"), ("CD", "Congo (Kinshasa)"), ("CK", "Cook Islands"), ("CR", "Costa Rica"), ("HR", "Croatia"), ("CU", "Cuba"), ("CW", "Curaçao"), ("CY", "Cyprus"), ("CZ", "Czech Republic"), ("DK", "Denmark"), ("DJ", "Djibouti"), ("DM", "Dominica"), ("DO", "Dominican Republic"), ("EC", "Ecuador"), ("EG", "Egypt"), ("SV", "El Salvador"), ("GQ", "Equatorial Guinea"), ("ER", "Eritrea"), ("EE", "Estonia"), ("SZ", "Eswatini"), ("ET", "Ethiopia"), ("FK", "Falkland Islands"), ("FO", "Faroe Islands"), ("FJ", "Fiji"), ("FI", "Finland"), ("FR", "France"), ("GF", "French Guiana"), ("PF", "French Polynesia"), ("TF", "French Southern Territories"), ("GA", "Gabon"), ("GM", "Gambia"), ("GE", "Georgia"), ("DE", "Germany"), ("GH", "Ghana"), ("GI", "Gibraltar"), ("GR", "Greece"), ("GL", "Greenland"), ("GD", "Grenada"), ("GP", "Guadeloupe"), ("GU", "Guam"), ("GT", "Guatemala"), ("GG", "Guernsey"), ("GN", "Guinea"), ("GW", "Guinea-Bissau"), ("GY", "Guyana"), ("HT", "Haiti"), ("HM", "Heard Island and McDonald Islands"), ("HN", "Honduras"), ("HK", "Hong Kong S.A.R., China"), ("HU", "Hungary"), ("IS", "Iceland"), ("IN", "India"), ("ID", "Indonesia"), ("IR", "Iran"), ("IQ", "Iraq"), ("IE", "Ireland"), ("IM", "Isle of Man"), ("IL", "Israel"), ("IT", "Italy"), ("CI", "Ivory Coast"), ("JM", "Jamaica"), ("JP", "Japan"), ("JE", "Jersey"), ("JO", "Jordan"), ("KZ", "Kazakhstan"), ("KE", "Kenya"), ("KI", "Kiribati"), ("XK", "Kosovo"), ("KW", "Kuwait"), ("KG", "Kyrgyzstan"), ("LA", "Laos"), ("LV", "Latvia"), ("LB", "Lebanon"), ("LS", "Lesotho"), ("LR", "Liberia"), ("LY", "Libya"), ("LI", "Liechtenstein"), ("LT", "Lithuania"), ("LU", "Luxembourg"), ("MO", "Macao S.A.R., China"), ("MK", "Macedonia"), ("MG", "Madagascar"), ("MW", "Malawi"), ("MY", "Malaysia"), ("MV", "Maldives"), ("ML", "Mali"), ("MT", "Malta"), ("MH", "Marshall Islands"), ("MQ", "Martinique"), ("MR", "Mauritania"), ("MU", "Mauritius"), ("YT", "Mayotte"), ("MX", "Mexico"), ("FM", "Micronesia"), ("MD", "Moldova"), ("MC", "Monaco"), ("MN", "Mongolia"), ("ME", "Montenegro"), ("MS", "Montserrat"), ("MA", "Morocco"), ("MZ", "Mozambique"), ("MM", "Myanmar"), ("NA", "Namibia"), ("NR", "Nauru"), ("NP", "Nepal"), ("NL", "Netherlands"), ("AN", "Netherlands Antilles"), ("NC", "New Caledonia"), ("NZ", "New Zealand"), ("NI", "Nicaragua"), ("NE", "Niger"), ("NG", "Nigeria"), ("NU", "Niue"), ("NF", "Norfolk Island"), ("MP", "Northern Mariana Islands"), ("KP", "North Korea"), ("NO", "Norway"), ("OM", "Oman"), ("PK", "Pakistan"), ("PW", "Palau"), ("PS", "Palestinian Territory"), ("PA", "Panama"), ("PG", "Papua New Guinea"), ("PY", "Paraguay"), ("PE", "Peru"), ("PH", "Philippines"), ("PN", "Pitcairn"), ("PL", "Poland"), ("PT", "Portugal"), ("PR", "Puerto Rico"), ("QA", "Qatar"), ("RE", "Reunion"), ("RO", "Romania"), ("RU", "Russia"), ("RW", "Rwanda"), ("BL", "Saint Barthélemy"), ("SH", "Saint Helena"), ("KN", "Saint Kitts and Nevis"), ("LC", "Saint Lucia"), ("MF", "Saint Martin (French part)"), ("PM", "Saint Pierre and Miquelon"), ("VC", "Saint Vincent and the Grenadines"), ("WS", "Samoa"), ("SM", "San Marino"), ("ST", "Sao Tome and Principe"), ("SA", "Saudi Arabia"), ("SN", "Senegal"), ("RS", "Serbia"), ("SC", "Seychelles"), ("SL", "Sierra Leone"), ("SG", "Singapore"), ("SX", "Sint Maarten"), ("SK", "Slovakia"), ("SI", "Slovenia"), ("SB", "Solomon Islands"), ("SO", "Somalia"), ("ZA", "South Africa"), ("GS", "South Georgia and the South Sandwich Islands"), ("KR", "South Korea"), ("SS", "South Sudan"), ("ES", "Spain"), ("LK", "Sri Lanka"), ("SD", "Sudan"), ("SR", "Suriname"), ("SJ", "Svalbard and Jan Mayen"), ("SE", "Sweden"), ("CH", "Switzerland"), ("SY", "Syria"), ("TW", "Taiwan"), ("TJ", "Tajikistan"), ("TZ", "Tanzania"), ("TH", "Thailand"), ("TB", "Tibet"), ("TL", "Timor-Leste"), ("TG", "Togo"), ("TK", "Tokelau"), ("TO", "Tonga"), ("TT", "Trinidad and Tobago"), ("TN", "Tunisia"), ("TR", "Turkey"), ("TM", "Turkmenistan"), ("TC", "Turks and Caicos Islands"), ("TV", "Tuvalu"), ("VI", "U.S. Virgin Islands"), ("UG", "Uganda"), ("UA", "Ukraine"), ("AE", "United Arab Emirates"), ("GB", "United Kingdom"), ("US", "United States"), ("UM", "United States Minor Outlying Islands"), ("UY", "Uruguay"), ("UZ", "Uzbekistan"), ("VU", "Vanuatu"), ("VA", "Vatican"), ("VE", "Venezuela"), ("VN", "Vietnam"), ("WF", "Wallis and Futuna"), ("EH", "Western Sahara"), ("YE", "Yemen"), ("ZM", "Zambia"), ("ZW", "Zimbabwe")) | |
engines = (("adventuregamestudio", "Adventure Game Studio"), ("aframe", "A-Frame"), ("lumberyard", "Amazon Lumberyard"), ("bitsy", "Bitsy game maker"), ("cocos2d", "Cocos 2D"), ("construct2", "Construct"), ("corona", "Corona SDK"), ("cry", "CryEngine"), ("defold", "Defold"), ("dotnet", ".Net"), ("enchant_js", "Enchant.JS"), ("eye_tracking", "Eye tracking equipment"), ("game_maker", "GameMaker"), ("game_salad", "Game Salad"), ("godot", "Godot Engine"), ("greenfoot", "Greenfoot"), ("haxe", "Haxe"), ("houdini", "Houdini"), ("inform", "Inform"), ("intel", "Intel XDK"), ("libgdx", "LibGDX"), ("lookingglass", "Looking Glass"), ("playcanvas", "Play Canvas"), ("playstation_mobile", "PlayStation Mobile"), ("processing", "Processing"), ("puzzlescript", "Puzzlescript"), ("rpgmaker", "RPG Maker"), ("renpy", "Ren/Py"), ("scratch", "Scratch"), ("sdl", "SDL"), ("stencyl", "Stencyl"), ("tabletoptech", "Tabletop Technology"), ("twine", "Twine"), ("unity", "Unity"), ("unreal_engine", "Unreal Engine"), ("vr", "VR head mount display"), ("sensoryx", "VRFree by Sensoryx")) | |
URL = "https://globalgamejam.org/2019/games" | |
def make_request(params): | |
status_code = 0 | |
page = None | |
while status_code != 200: | |
page = get(URL, params) | |
status_code = int(page.status_code) | |
soup = BeautifulSoup(page.text, 'html.parser') | |
elem = soup.find(class_="view-header") | |
return int(elem.getText().split()[5]) if elem else 0 | |
def check_country(country_data): | |
start = timer() | |
country_key, country = country_data | |
params = { | |
'title' : '', | |
'country' : country_key, | |
'city' : '', | |
'tools' : 'All', | |
'diversifier' : 'All', | |
'platforms' : 'All', | |
'other_tools' : 'All' | |
} | |
total = make_request(params) | |
games = [country] | |
count = 0 | |
if total: | |
for key, _ in engines: | |
params['tools'] = key | |
number = make_request(params) | |
count += number | |
games.append(number) | |
else: | |
games += [0 for _ in engines] | |
games.append(total - count) | |
print(country, ':', timer() - start, 'secs') | |
return games | |
if __name__ == '__main__': | |
with Pool(100) as pool: | |
data = pool.map(check_country, countries, chunksize=1) | |
header = ['Country'] + [engine for _, engine in engines] + ['Other/Not Listed'] | |
with open('jam_2019.csv', mode='w') as jam_file: | |
jam_writer = csv.writer(jam_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
jam_writer.writerow(header) | |
jam_writer.writerows(data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment