j8jacobs · August 24, 2022 02:36 · birchb1024 · Aug 24, 2022
diff --git a/get_iso_names_from_wiki.py b/get_iso_names_from_wiki.py
 # original link found here: https://gis.stackexchange.com/questions/1047/seeking-full-list-of-iso-alpha-2-and-iso-alpha-3-country-codes
 import csv
 import urllib.request as urllib2
 from bs4 import BeautifulSoup
 import sys

 # helper function to get text and anchor tag text
 # this removes the new line characters, connects all the elements of the column
 def format_text(array):
    return ''.join(array).strip()

 # get the wiki ISO webpage
 opener = urllib2.build_opener()
 opener.addheaders = [('User-agent', 'Mozilla/5.0')]
 url = 'http://en.wikipedia.org/wiki/ISO_3166-1'
 page = opener.open(url)
 soup = BeautifulSoup(page.read())

 # "Current Codes" is second table on the page
 t = soup.findAll('table', {'class' : 'wikitable sortable'})[1]

 # create a new CSV for the output
 iso_csv = csv.writer(open('wikipedia-iso-country-codes.csv', 'w'))

 # get the header rows, write to the CSV
 header_row = [format_text(th.findAll(text=True)) for th in t.findAll('th')]
 iso_csv.writerow(header_row)

 # Iterate over the table pulling out the country table results. Skip the first 
 # row as it contains the already-parsed header information.
 for row in t.findAll("tr")[1:]:
    tds = row.findAll('td')
    raw_cols = [td.findAll(text=True) for td in tds]
    formatted_cols = [format_text(c) for c in raw_cols]
    iso_csv.writerow(formatted_cols)
	# original link found here: https://gis.stackexchange.com/questions/1047/seeking-full-list-of-iso-alpha-2-and-iso-alpha-3-country-codes
	import csv
	import urllib.request as urllib2
	from bs4 import BeautifulSoup
	import sys

	# helper function to get text and anchor tag text
	# this removes the new line characters, connects all the elements of the column
	def format_text(array):
	return ''.join(array).strip()

	# get the wiki ISO webpage
	opener = urllib2.build_opener()
	opener.addheaders = [('User-agent', 'Mozilla/5.0')]
	url = 'http://en.wikipedia.org/wiki/ISO_3166-1'
	page = opener.open(url)
	soup = BeautifulSoup(page.read())

	# "Current Codes" is second table on the page
	t = soup.findAll('table', {'class' : 'wikitable sortable'})[1]

	# create a new CSV for the output
	iso_csv = csv.writer(open('wikipedia-iso-country-codes.csv', 'w'))

	# get the header rows, write to the CSV
	header_row = [format_text(th.findAll(text=True)) for th in t.findAll('th')]
	iso_csv.writerow(header_row)

	# Iterate over the table pulling out the country table results. Skip the first
	# row as it contains the already-parsed header information.
	for row in t.findAll("tr")[1:]:
	tds = row.findAll('td')
	raw_cols = [td.findAll(text=True) for td in tds]
	formatted_cols = [format_text(c) for c in raw_cols]
	iso_csv.writerow(formatted_cols)