thisismattmiller’s gists

thisismattmiller / google.py

Created April 6, 2022 21:41

Code for video: https://youtu.be/57rmnt15qDw

	import requests
	import json


	source_data = json.load(open('data.json'))



	url = 'https://maps.googleapis.com/maps/api/geocode/json'

thisismattmiller / extract.py

Created April 6, 2022 21:11

Code for video demo: https://youtu.be/TDTmlGyeNp8

	import requests
	import shutil
	import camelot.io as camelot
	import os
	from PyPDF2 import PdfFileReader, PdfFileWriter


	for year in range(2011,2017):

	url = f'https://files.dep.state.pa.us/Waste/Recycling/RecyclingPortalFiles/Documents/{year}_Recycling_Report.pdf'

thisismattmiller / extract.py

Created March 30, 2022 18:31

Code for https://youtu.be/pwnIcJ9p2C4 Web scraping with selenium

	import glob
	from bs4 import BeautifulSoup
	import json

	all_files = list(glob.glob('html/*.html'))
	all_data = []
	for file_name in all_files:

	with open(file_name) as infile:

thisismattmiller / extract.py

Created March 13, 2022 05:27



	import glob
	import json
	urls = {}

	for file in glob.glob('data_sogb/*'):

	with open(file) as inf:

thisismattmiller / artgarfunkle.py

Created March 13, 2022 05:16

	import waybackpy

	urls = [
	"http://dmc.signourguestbook.com/?username=dmc&trail=25",
	"http://dmc.signourguestbook.com/?username=dmc&trail=50",
	"http://dmc.signourguestbook.com/?username=dmc&trail=75",
	"http://dmc.signourguestbook.com/?username=dmc&trail=100",
	"http://dmc.signourguestbook.com/?username=dmc&trail=125",
	"http://dmc.signourguestbook.com/?username=dmc&trail=150",
	"http://dmc.signourguestbook.com/?username=dmc&trail=175",

thisismattmiller / gist:a4410ce29c5e56621c9f2f6ca103bf84

Created January 26, 2022 14:48

	{
	"id": "lc:RT:bf2:MIBluRayDVD:Instance",
	"propertyTemplates": [
	{
	"mandatory": "false",
	"propertyLabel": "Instance Of",
	"propertyURI": "http://id.loc.gov/ontologies/bibframe/instanceOf",
	"repeatable": "false",
	"resourceTemplates": [],
	"type": "resource",

thisismattmiller / gist:bbd002901c8474db3be80cb22207fbd8

Created December 1, 2021 02:22

thisismattmiller / gist:18db2d2135a6fecd2619c6c922cb21e2

Created September 1, 2021 22:07

	<div><strong>imdb_id1</strong>: <a href="/movie/<%=imdb_id1%>"><%=imdb_id1%></a></div>
	<div><strong>color1</strong>: <%=color1%></div>
	<div><strong>director_name1</strong>: <%=director_name1%></div>
	<div><strong>num_critic_for_reviews1</strong>: <%=num_critic_for_reviews1%></div>
	<div><strong>duration1</strong>: <%=duration1%></div>
	<div><strong>director_facebook_likes1</strong>: <%=director_facebook_likes1%></div>
	<div><strong>actor_3_facebook_likes1</strong>: <%=actor_3_facebook_likes1%></div>
	<div><strong>actor_2_name1</strong>: <%=actor_2_name1%></div>
	<div><strong>actor_1_facebook_likes1</strong>: <%=actor_1_facebook_likes1%></div>
	<div><strong>gross1</strong>: <%=gross1%></div>

thisismattmiller / ch_sparql.py

Last active August 6, 2021 14:34

Example using python to interact with Carnegie Hall's SPARQL endpoint

	import requests
	import json

	url = "http://data.carnegiehall.org/sparql/"

	sparql = """
	#Find works by string in the title (case-insensitive)
	PREFIX dcterms: <http://purl.org/dc/terms/>
	PREFIX foaf: <http://xmlns.com/foaf/0.1/>
	PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>

thisismattmiller / get_links.py

Created April 26, 2021 14:50

Downloading PBCore records from https://americanarchive.org

	import requests
	from bs4 import BeautifulSoup
	import json

	all_item_urls = []

	counter = 1


	while counter <= 609:

Matt Miller thisismattmiller