Just ... some scripts. Nothing special.
Last active
August 29, 2015 13:56
-
-
Save amandabee/8969833 to your computer and use it in GitHub Desktop.
A handful of pretty random python scripts.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" Generate PDF bingo cards.""" | |
import os | |
from reportlab.pdfgen import canvas | |
from reportlab.lib.pagesizes import letter | |
from reportlab.lib.units import inch | |
from reportlab.lib.colors import Color | |
import random | |
""" TO DO: use better fonts. | |
from reportlab.pdfbase.ttfonts import TTFont | |
Then can register TTF with | |
pdfmetrics.registerFont(TTFont('Vera','Vera.ttf')) | |
""" | |
def set_canvas(filename): | |
"""Take a filename and initiate a canvas object that we can draw to.""" | |
this_canvas = canvas.Canvas(filename) | |
this_canvas.setPageSize(letter) | |
# Was setting font here, turns out to be easier to set on draw strings. | |
return this_canvas | |
def get_box_width(word="bingo", page_size=letter, page_margin=1 * inch): | |
"""For any word, and any page dimensions, figure out how wide your boxes can | |
be to accomodate the word and fill the page. | |
!!! FIX: If I was clever, I'd set the base font size here, too. | |
""" | |
#page_margin = page_margin * inch | |
available_width = page_size[0] - page_margin * 2 | |
box_width = available_width / len(word) | |
return box_width | |
def get_coords(word="bingo"): | |
""" | |
Take some word and get a list of tuples of x,y coordinates for each letter, | |
and two list, one of x values, another of y values, that can be used to draw | |
a grid | |
""" | |
box = get_box_width(word) | |
# Start x so this is centered | |
# x is where the first letter gets drawn | |
# so the gutter is 0.5 boxes away. | |
x_coordinate = 0.5 * box + (letter[0] - box * len(word)) / 2 | |
y_offset = 0.5 * box + (letter[1] - box * (len(word) + 1)) / 2 | |
y_coordinate = 11 * inch - y_offset | |
x_values = [] # will get one extra value | |
y_values = [] | |
for _unused in range(0, len(word) + 1): | |
x_values.append(x_coordinate) | |
x_coordinate = x_coordinate + box | |
y_values.append(y_coordinate) | |
y_coordinate = y_coordinate - box | |
x_values.pop() | |
coords = [] | |
for x in x_values: | |
for y in y_values: | |
# print x, y | |
coords.append((x, y)) | |
x_list = [] | |
y_list = [] | |
# Set the X and Y values for the grid | |
for x in x_values: | |
x_list.append(int(x - box * 0.5)) | |
x_list.append(int(x_values[-1] + box * 0.5)) | |
for y in y_values: | |
y_list.append(int(y + box * 0.65)) | |
# This math is cheating: I just looked at what gets returned | |
# to figure out that it was off by 13. | |
y_list.append(int(y_values[-1] - box * 0.5 + 13)) | |
return x_list, y_list, coords | |
def set_ranges(word="bingo", i=15): | |
"""Takes a word (string) and an interval (integer) and returns a dict of max | |
and min values for each letter. Defaults to "bingo" with an interval of 15. | |
The resulting dict also includes the starting word. | |
""" | |
ranges = {} | |
ranges['word'] = word | |
j = 0 | |
for ltr in word: | |
ranges[ltr] = [j + 1, j + i] | |
j = j + i | |
return ranges | |
def set_strings(ranges): | |
"""Takes a dictionary produced by set_ranges() and generates a list of | |
random numbers for each letter in the range. How many random numbers depends | |
on how longthe base word is. | |
""" | |
# Get the word we're working with (probably "bingo") | |
word = ranges['word'] | |
# How long is the word? | |
length = len(word) | |
strings = [] | |
# For each letter in the word, pick length random numbers in the range. | |
for ltr in word: | |
strings.append(str.upper(ltr)) | |
random_numbers = random.sample( | |
range(ranges[ltr][0], ranges[ltr][1] + 1), length) | |
for i in random_numbers: | |
strings.append(i) | |
# Put a free cell in the middle, but only if word length is odd | |
if len(word) % 2 == 0: | |
print "No free cell" | |
else: | |
mid = len(strings) / 2 | |
strings[mid] = "FREE" | |
return strings | |
def draw_grid(this_canvas, coords): | |
""" Takes a canvas instance and x and y coordinate lists returned by | |
get_coords() and draws a grid (in red) on the canvas.""" | |
this_canvas.setLineWidth(2.0) | |
red50transparent = Color(100, 0, 0, alpha=0.5) | |
this_canvas.setStrokeColor(red50transparent) | |
# c.setStrokeGrey(0.75) | |
x_list = coords[0] | |
y_list = coords[1] | |
this_canvas.grid(x_list, y_list) | |
def draw_strings(this_canvas, coords, strings): | |
""" Takes a canvas, a list of tuples, a list of strings/ integers and draws | |
each string on the canvas according to the coordinates. Assumes that | |
"strings" includes letters as column headers and integers for the grid | |
itself. | |
!!! FIX: If the boxes are too small, the font should get reduced. | |
""" | |
for i in range(0, len(strings)): | |
# Print the "Free" cell in a smaller font. | |
if (strings[i] == "FREE"): | |
# print strings[i] | |
this_canvas.setFont('Helvetica', 28) | |
elif isinstance(strings[i], str): | |
this_canvas.setFont('Courier-Bold', 42) | |
# print "string" | |
else: | |
this_canvas.setFont('Helvetica-Bold', 36) | |
printable_string = str(strings[i]) | |
x = coords[i][0] | |
y = coords[i][1] | |
this_canvas.drawCentredString(x, y, printable_string) | |
def draw_cards(path, filename="bingo.pdf", i=45, word="bingo"): | |
"""Take a path, a filename, some integer (i), some word. Draw i bingo | |
cards at path/filename. | |
""" | |
os.chdir(path) | |
# The canvas, ranges and coordinates only need to be set once. | |
this_canvas = set_canvas(filename) | |
this_ranges = set_ranges(word) | |
this_coordinates = get_coords(word) | |
this_coord_tuples = this_coordinates[2] | |
# Generate new random strings for each card. | |
for _unused in range(0, i): | |
card_strings = set_strings(this_ranges) | |
draw_strings(this_canvas, this_coord_tuples, card_strings) | |
draw_grid(this_canvas, this_coordinates) | |
this_canvas.save() | |
""" | |
draw_cards("~", "bingo.pdf", 13, "bingo") | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Feb 2014: I started trying to scrape this w/Beautiful Soup, but it turns out the data is all in JSON. | |
Extract all facility addresses from http://www.bop.gov/locations/list.jsp | |
""" | |
import urllib2 | |
import json | |
import csv | |
url = "http://www.bop.gov/PublicInfo/execute/locations?todo=query&output=json" | |
json_string = urllib2.urlopen(url).read() | |
## Load the string of JSON into a dict | |
jsondata = json.loads(json_string) | |
## Review the keys of the dict | |
## or just use http://jsbeautifier.org/ to see what it looks like | |
for item in jsondata: | |
print item | |
## So I know there are three top level items | |
## Get the full list of items in "Locations" | |
for item in jsondata['Locations'][0]: | |
print item | |
### Open a CSV WRiter | |
f=csv.writer(open('/tmp/locations.csv','wb')) | |
###and write to it. | |
# and write to it. | |
for item in jsondata['Locations']: | |
f.writerow( | |
[item['hasFsl'], | |
item['code'], | |
item['contactEmail'], | |
item['special'], | |
item['city'], | |
item['privateFacl'], | |
item['nameDisplay'], | |
item['faclTypeDescription'], | |
item['state'], | |
item['phoneNumber'], | |
item['latitude'], | |
item['type'], | |
item['locationtype'], | |
item['zipCode'], | |
item['hasCamp'], | |
item['complexCode'], | |
item['address'], | |
item['securityLevel'], | |
item['name'], | |
item['gender'], | |
item['region'], | |
item['longitude'], | |
item['hasFdc'], | |
item['timeZone'], | |
item['nameTitle']]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
### Jan 2014 | |
### This is rough, but works. I wanted a spreadsheet of MLB Salaries for a | |
### basic lesson on means and medians and how wildly extravagant salaries | |
### distort the mean. So I scraped the data from Newsday's salary database. | |
#import scraperwiki | |
import urllib2 | |
from bs4 import BeautifulSoup | |
import csv | |
def get_soup(url): | |
#soup = BeautifulSoup(scraperwiki.scrape(url)) | |
soup = BeautifulSoup(urllib2.urlopen(url)) | |
return soup | |
def get_salaries(soup, linewriter): | |
table = soup.find("table", {"id":"sdb-results"}) | |
for row in table.findAll('tr'): | |
cells = row.find_all("td") | |
try: | |
data = { | |
'player' : cells[0].get_text().strip(), | |
'team' : cells[1].get_text().strip(), | |
'position' : cells[2].get_text().strip(), | |
'state' : cells[3].get_text().strip(), | |
'league' : cells[4].get_text().strip(), | |
'division' : cells[5].get_text().strip(), | |
'2013_salary' : cells[6].get_text().strip('$,').strip(), | |
'age' : cells[7].get_text().strip() | |
} | |
#scraperwiki.sqlite.save(unique_keys=['player'],data=data) | |
linewriter.writerow(data) | |
print "Saved " + data['player'] | |
except Exception,e: | |
print str(e) | |
base_url = "http://data.newsday.com/long-island/data/baseball/mlb-salaries-2013/?currentRecord=" | |
print range(1, 854, 50) | |
with open('/home/amanda/Desktop/mlb_salaries_alt.csv', 'a+') as csvfile: | |
fieldorder = ['player' , 'team' , 'position' , 'state' , | |
'league', 'division', '2013_salary', 'age'] | |
linewriter = csv.DictWriter(csvfile, fieldorder, delimiter='|', | |
quotechar='"', quoting=csv.QUOTE_MINIMAL) | |
for record in range(1, 854, 50): | |
print "starting..." | |
url = base_url + str(record); | |
soup = get_soup(url) | |
get_salaries(soup, linewriter) | |
print url; | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
Pull down a bunch of public mailman archives. | |
In this case, they're archived monthly and the URLs look something like this: | |
http://lists.example.net/pipermail/listname/2010-October.txt.gz | |
""" | |
import urllib2 | |
import calendar | |
import gzip | |
import os | |
def get_all_gzs(base, years): | |
""" | |
for some base URL (unique to your list) and range of years, | |
download all the archives. | |
""" | |
archive_folder = 'list_archives' | |
if not os.path.exists(archive_folder): | |
os.makedirs(archive_folder) | |
for year in years: | |
print year | |
for month in calendar.month_name: | |
filename = str(year) + "-" + month + ".txt.gz" | |
print filename | |
url = base + filename | |
print url | |
try: | |
req = urllib2.urlopen(url) | |
output = open(filename, 'wb') | |
output.write(req.read()) | |
output.close() | |
with gzip.open(filename, 'rb') as z: | |
file_content = z.read() | |
textfile = archive_folder + "/" + \ | |
str(year) + "-" + month + ".txt" | |
f = open(textfile, 'w') | |
f.write(file_content) | |
f.close | |
except Exception as e: | |
print e | |
URLBASE = "http://lists.example.net/pipermail/listname/" | |
YEARS = range(2008, 2016) | |
get_all_gzs(URLBASE, YEARS) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment