Mindy McAdams macloo

💭

Teaching some Python

I teach journalism students to code.

macloo / headless_selenium.py

Last active April 9, 2019 23:46

Run Chrome headless with Selenium

	import time
	from selenium import webdriver
	from selenium.webdriver.chrome.options import Options
	from bs4 import BeautifulSoup

	chrome_options = Options()
	chrome_options.add_argument("--headless")
	# fill in your own path to installed chromedriver
	driver = webdriver.Chrome(executable_path='/Users/dirname/dirname/dirname//chromedriver',
	options=chrome_options)

macloo / get_wiki_extract.py

Last active April 7, 2019 14:08

Use Python Wikipedia-API to get text summary for any subject in a list of subjects

	"""
	Requires Wikipedia-API 0.5.1 or greater - and Python 3
	https://pypi.org/project/Wikipedia-API/
	"""

	import wikipediaapi

	w = wikipediaapi.Wikipedia('en')

	p = w.page('N._K._Jemisin')

macloo / get_all_agency_urls2.py

Created April 1, 2019 13:33

For Sarah April 2019 - part 2

	from urllib.request import urlopen
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import time
	import csv

	driver = webdriver.Chrome('/Users/mcadams/Documents/python/scraping2019/chromedriver')
	# testing the 'C' page only
	driver.get('https://www.usa.gov/federal-agencies/c')

macloo / get_all_agency_urls.py

Created April 1, 2019 13:23

For Sarah April 2019

	from urllib.request import urlopen
	from bs4 import BeautifulSoup
	from selenium import webdriver
	import time
	import csv

	driver = webdriver.Chrome('/Users/mcadams/Documents/python/scraping2019/chromedriver')
	driver.get('https://www.usa.gov/federal-agencies')

	# pause because page is slow to load

macloo / get_rep_deets.py

Created March 31, 2019 22:22

Collect deets for each FL rep from 2 pages

	import requests
	from bs4 import BeautifulSoup
	import time

	base_url = "https://www.myfloridahouse.gov"

	# using a small list for testing
	small_list = [
	"/Sections/Representatives/details.aspx?MemberId=4684&LegislativeTermId=88",
	"/Sections/Representatives/details.aspx?MemberId=4624&LegislativeTermId=88",

macloo / get_all_urls.py

Created March 31, 2019 21:31

For Madison March 2019

	import requests
	from bs4 import BeautifulSoup

	url = "https://www.myfloridahouse.gov/Sections/Representatives/representatives.aspx"

	hdr = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.86 Safari/537.36',
	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3',
	'Accept-Encoding': 'none',
	'Accept-Language': 'en-US,en;q=0.8',

macloo / read_db.php

Last active January 24, 2023 01:14

A read-only page from a MySQL database - no JavaScript, no form handling

	<?php include 'database.php'; ?>

	<!DOCTYPE html>
	<html lang="en">
	<head>
	<meta charset="utf-8">
	<meta name=viewport content="width=device-width, initial-scale=1">
	<title> Read the Shoutbox DB </title>
	<link rel="stylesheet" href="css/main.css">
	</head>

macloo / keys.js

Created December 12, 2018 00:16

For Lorenzo P

	// old code
	/*
	$('#bebop-key').click(function(){
	resetAll();
	$('#bebop-key').hide();
	$('#bebop').css('background-color', '#EC7063');
	$('#hide3').show();
	$('#bebop').animate({height: '510px'}, 400);
	$('#bebop').animate({width: '400px'}, 400);
	$('#bebop-text').delay(1100).slideDown();

macloo / clean_transcript.py

Created May 26, 2018 21:41

Clean any .sbv transcript file from YouTube - remove blank lines and timecode

	# clean any .sbv transcript file from YouTube
	# preserve linebreaks

	filename = input('What is the filename? (include .sbv) ')
	myfile = open(filename)
	mylist = myfile.readlines()
	myfile.close()

	length = str(len(mylist))
	new_length = str( int(len(mylist) / 3) )

macloo / govtrack.py

Last active February 25, 2025 15:51

Scraping a page - requires BeautifulSoup and Requests

	from bs4 import BeautifulSoup
	import requests
	url = 'https://www.govtrack.us/congress/members/amy_klobuchar/412242'
	page = requests.get(url)
	soup = BeautifulSoup(page.text, 'html.parser')

	# get a list of all the h2 elements
	head_list = soup.find_all('h2')

	# loop over the list to find the heading where we start to scrape