Skip to content

Instantly share code, notes, and snippets.

@648trindade
Created January 29, 2019 06:03
Show Gist options
  • Save 648trindade/4b86d61b139e50a4c7ff4e173b7c636b to your computer and use it in GitHub Desktop.
Save 648trindade/4b86d61b139e50a4c7ff4e173b7c636b to your computer and use it in GitHub Desktop.
GGJ 2019 Scrapper
from bs4 import BeautifulSoup
from requests import get
from multiprocessing import Pool
from timeit import default_timer as timer
import csv
countries = (("AF", "Afghanistan"), ("AX", "Aland Islands"), ("AL", "Albania"), ("DZ", "Algeria"), ("AS", "American Samoa"), ("AD", "Andorra"), ("AO", "Angola"), ("AI", "Anguilla"), ("AQ", "Antarctica"), ("AG", "Antigua and Barbuda"), ("AR", "Argentina"), ("AM", "Armenia"), ("AW", "Aruba"), ("AU", "Australia"), ("AT", "Austria"), ("AZ", "Azerbaijan"), ("BS", "Bahamas"), ("BH", "Bahrain"), ("BD", "Bangladesh"), ("BB", "Barbados"), ("BY", "Belarus"), ("BE", "Belgium"), ("BZ", "Belize"), ("BJ", "Benin"), ("BM", "Bermuda"), ("BT", "Bhutan"), ("BO", "Bolivia"), ("BA", "Bosnia and Herzegovina"), ("BW", "Botswana"), ("BV", "Bouvet Island"), ("BR", "Brazil"), ("IO", "British Indian Ocean Territory"), ("VG", "British Virgin Islands"), ("BN", "Brunei"), ("BG", "Bulgaria"), ("BF", "Burkina Faso"), ("BI", "Burundi"), ("KH", "Cambodia"), ("CM", "Cameroon"), ("CA", "Canada"), ("CV", "Cape Verde"), ("BQ", "Caribbean Netherlands"), ("KY", "Cayman Islands"), ("CF", "Central African Republic"), ("TD", "Chad"), ("CL", "Chile"), ("CN", "China"), ("CX", "Christmas Island"), ("CC", "Cocos (Keeling) Islands"), ("CO", "Colombia"), ("KM", "Comoros"), ("CG", "Congo (Brazzaville)"), ("CD", "Congo (Kinshasa)"), ("CK", "Cook Islands"), ("CR", "Costa Rica"), ("HR", "Croatia"), ("CU", "Cuba"), ("CW", "Curaçao"), ("CY", "Cyprus"), ("CZ", "Czech Republic"), ("DK", "Denmark"), ("DJ", "Djibouti"), ("DM", "Dominica"), ("DO", "Dominican Republic"), ("EC", "Ecuador"), ("EG", "Egypt"), ("SV", "El Salvador"), ("GQ", "Equatorial Guinea"), ("ER", "Eritrea"), ("EE", "Estonia"), ("SZ", "Eswatini"), ("ET", "Ethiopia"), ("FK", "Falkland Islands"), ("FO", "Faroe Islands"), ("FJ", "Fiji"), ("FI", "Finland"), ("FR", "France"), ("GF", "French Guiana"), ("PF", "French Polynesia"), ("TF", "French Southern Territories"), ("GA", "Gabon"), ("GM", "Gambia"), ("GE", "Georgia"), ("DE", "Germany"), ("GH", "Ghana"), ("GI", "Gibraltar"), ("GR", "Greece"), ("GL", "Greenland"), ("GD", "Grenada"), ("GP", "Guadeloupe"), ("GU", "Guam"), ("GT", "Guatemala"), ("GG", "Guernsey"), ("GN", "Guinea"), ("GW", "Guinea-Bissau"), ("GY", "Guyana"), ("HT", "Haiti"), ("HM", "Heard Island and McDonald Islands"), ("HN", "Honduras"), ("HK", "Hong Kong S.A.R., China"), ("HU", "Hungary"), ("IS", "Iceland"), ("IN", "India"), ("ID", "Indonesia"), ("IR", "Iran"), ("IQ", "Iraq"), ("IE", "Ireland"), ("IM", "Isle of Man"), ("IL", "Israel"), ("IT", "Italy"), ("CI", "Ivory Coast"), ("JM", "Jamaica"), ("JP", "Japan"), ("JE", "Jersey"), ("JO", "Jordan"), ("KZ", "Kazakhstan"), ("KE", "Kenya"), ("KI", "Kiribati"), ("XK", "Kosovo"), ("KW", "Kuwait"), ("KG", "Kyrgyzstan"), ("LA", "Laos"), ("LV", "Latvia"), ("LB", "Lebanon"), ("LS", "Lesotho"), ("LR", "Liberia"), ("LY", "Libya"), ("LI", "Liechtenstein"), ("LT", "Lithuania"), ("LU", "Luxembourg"), ("MO", "Macao S.A.R., China"), ("MK", "Macedonia"), ("MG", "Madagascar"), ("MW", "Malawi"), ("MY", "Malaysia"), ("MV", "Maldives"), ("ML", "Mali"), ("MT", "Malta"), ("MH", "Marshall Islands"), ("MQ", "Martinique"), ("MR", "Mauritania"), ("MU", "Mauritius"), ("YT", "Mayotte"), ("MX", "Mexico"), ("FM", "Micronesia"), ("MD", "Moldova"), ("MC", "Monaco"), ("MN", "Mongolia"), ("ME", "Montenegro"), ("MS", "Montserrat"), ("MA", "Morocco"), ("MZ", "Mozambique"), ("MM", "Myanmar"), ("NA", "Namibia"), ("NR", "Nauru"), ("NP", "Nepal"), ("NL", "Netherlands"), ("AN", "Netherlands Antilles"), ("NC", "New Caledonia"), ("NZ", "New Zealand"), ("NI", "Nicaragua"), ("NE", "Niger"), ("NG", "Nigeria"), ("NU", "Niue"), ("NF", "Norfolk Island"), ("MP", "Northern Mariana Islands"), ("KP", "North Korea"), ("NO", "Norway"), ("OM", "Oman"), ("PK", "Pakistan"), ("PW", "Palau"), ("PS", "Palestinian Territory"), ("PA", "Panama"), ("PG", "Papua New Guinea"), ("PY", "Paraguay"), ("PE", "Peru"), ("PH", "Philippines"), ("PN", "Pitcairn"), ("PL", "Poland"), ("PT", "Portugal"), ("PR", "Puerto Rico"), ("QA", "Qatar"), ("RE", "Reunion"), ("RO", "Romania"), ("RU", "Russia"), ("RW", "Rwanda"), ("BL", "Saint Barthélemy"), ("SH", "Saint Helena"), ("KN", "Saint Kitts and Nevis"), ("LC", "Saint Lucia"), ("MF", "Saint Martin (French part)"), ("PM", "Saint Pierre and Miquelon"), ("VC", "Saint Vincent and the Grenadines"), ("WS", "Samoa"), ("SM", "San Marino"), ("ST", "Sao Tome and Principe"), ("SA", "Saudi Arabia"), ("SN", "Senegal"), ("RS", "Serbia"), ("SC", "Seychelles"), ("SL", "Sierra Leone"), ("SG", "Singapore"), ("SX", "Sint Maarten"), ("SK", "Slovakia"), ("SI", "Slovenia"), ("SB", "Solomon Islands"), ("SO", "Somalia"), ("ZA", "South Africa"), ("GS", "South Georgia and the South Sandwich Islands"), ("KR", "South Korea"), ("SS", "South Sudan"), ("ES", "Spain"), ("LK", "Sri Lanka"), ("SD", "Sudan"), ("SR", "Suriname"), ("SJ", "Svalbard and Jan Mayen"), ("SE", "Sweden"), ("CH", "Switzerland"), ("SY", "Syria"), ("TW", "Taiwan"), ("TJ", "Tajikistan"), ("TZ", "Tanzania"), ("TH", "Thailand"), ("TB", "Tibet"), ("TL", "Timor-Leste"), ("TG", "Togo"), ("TK", "Tokelau"), ("TO", "Tonga"), ("TT", "Trinidad and Tobago"), ("TN", "Tunisia"), ("TR", "Turkey"), ("TM", "Turkmenistan"), ("TC", "Turks and Caicos Islands"), ("TV", "Tuvalu"), ("VI", "U.S. Virgin Islands"), ("UG", "Uganda"), ("UA", "Ukraine"), ("AE", "United Arab Emirates"), ("GB", "United Kingdom"), ("US", "United States"), ("UM", "United States Minor Outlying Islands"), ("UY", "Uruguay"), ("UZ", "Uzbekistan"), ("VU", "Vanuatu"), ("VA", "Vatican"), ("VE", "Venezuela"), ("VN", "Vietnam"), ("WF", "Wallis and Futuna"), ("EH", "Western Sahara"), ("YE", "Yemen"), ("ZM", "Zambia"), ("ZW", "Zimbabwe"))
engines = (("adventuregamestudio", "Adventure Game Studio"), ("aframe", "A-Frame"), ("lumberyard", "Amazon Lumberyard"), ("bitsy", "Bitsy game maker"), ("cocos2d", "Cocos 2D"), ("construct2", "Construct"), ("corona", "Corona SDK"), ("cry", "CryEngine"), ("defold", "Defold"), ("dotnet", ".Net"), ("enchant_js", "Enchant.JS"), ("eye_tracking", "Eye tracking equipment"), ("game_maker", "GameMaker"), ("game_salad", "Game Salad"), ("godot", "Godot Engine"), ("greenfoot", "Greenfoot"), ("haxe", "Haxe"), ("houdini", "Houdini"), ("inform", "Inform"), ("intel", "Intel XDK"), ("libgdx", "LibGDX"), ("lookingglass", "Looking Glass"), ("playcanvas", "Play Canvas"), ("playstation_mobile", "PlayStation Mobile"), ("processing", "Processing"), ("puzzlescript", "Puzzlescript"), ("rpgmaker", "RPG Maker"), ("renpy", "Ren/Py"), ("scratch", "Scratch"), ("sdl", "SDL"), ("stencyl", "Stencyl"), ("tabletoptech", "Tabletop Technology"), ("twine", "Twine"), ("unity", "Unity"), ("unreal_engine", "Unreal Engine"), ("vr", "VR head mount display"), ("sensoryx", "VRFree by Sensoryx"))
URL = "https://globalgamejam.org/2019/games"
def make_request(params):
status_code = 0
page = None
while status_code != 200:
page = get(URL, params)
status_code = int(page.status_code)
soup = BeautifulSoup(page.text, 'html.parser')
elem = soup.find(class_="view-header")
return int(elem.getText().split()[5]) if elem else 0
def check_country(country_data):
start = timer()
country_key, country = country_data
params = {
'title' : '',
'country' : country_key,
'city' : '',
'tools' : 'All',
'diversifier' : 'All',
'platforms' : 'All',
'other_tools' : 'All'
}
total = make_request(params)
games = [country]
count = 0
if total:
for key, _ in engines:
params['tools'] = key
number = make_request(params)
count += number
games.append(number)
else:
games += [0 for _ in engines]
games.append(total - count)
print(country, ':', timer() - start, 'secs')
return games
if __name__ == '__main__':
with Pool(100) as pool:
data = pool.map(check_country, countries, chunksize=1)
header = ['Country'] + [engine for _, engine in engines] + ['Other/Not Listed']
with open('jam_2019.csv', mode='w') as jam_file:
jam_writer = csv.writer(jam_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
jam_writer.writerow(header)
jam_writer.writerows(data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment