trey · January 7, 2017 17:10
diff --git a/apod b/apod
 #!/usr/bin/env sh

 cd $HOME/Code/apod
 export WORKON_HOME=$HOME/.virtualenvs
 export VIRTUALENVWRAPPER_PYTHON=/usr/local/bin/python
 source /usr/local/bin/virtualenvwrapper.sh
 workon apod
 if [ $# -eq 1 ]
 then
    ./scrape.py $1
 else
    ./scrape.py
 fi
 deactivate
diff --git a/Howto.md b/Howto.md
diff --git a/requirements.txt b/requirements.txt
 PyTumblr==0.0.6
 beautifulsoup4==4.5.0
 html5lib==1.0b10
 httplib2==0.9.2
 httpretty==0.8.14
 oauth2==1.9.0.post1
 pep8==1.7.0
 six==1.10.0
 webencodings==0.5
 wsgiref==0.1.2
diff --git a/scrape.py b/scrape.py
 #!/usr/bin/env python
 import re
 import urllib2
 import argparse
 from datetime import date, timedelta
 from bs4 import BeautifulSoup
 import pytumblr

 client = pytumblr.TumblrRestClient(
    '<consumer_key>',
    '<consumer_secret>',
    '<oauth_token>',
    '<oauth_secret>',
 )

 parser = argparse.ArgumentParser()
 parser.add_argument('date', type=int,
                    nargs='?',
                    help='Number of days in the past. Leave blank for today.')
 args = parser.parse_args()

 if args.date:
    apod_date = date.today() - timedelta(args.date)
 else:
    apod_date = date.today()

 apod_home_url = 'http://apod.nasa.gov/apod/'
 apod_today_url = apod_home_url + 'ap' \
    + apod_date.strftime('%y%m%d') \
    + '.html'
 apod_clean_url = re.sub('http://', '', apod_today_url)
 apod = urllib2.urlopen(apod_today_url).read()
 soup = BeautifulSoup(apod, 'html5lib')

 for a in soup.find_all('a'):
    # Append `apod_home_url` to any url that doesn't start with `http`.
    if re.match('^[^http]', a['href']):
        a['href'] = apod_home_url + a['href']
    # Get rid of line breaks that show up inside an `href`.
    a['href'] = re.sub('\n', '', a['href'])

 # Strip all the leading and trailing whitespace inside tags.
 for b in soup.find_all('b'):
    if b.string:
        b.string.replace_with(b.string.strip())
 for p in soup.find_all('p'):
    if p.string:
        p.string.replace_with(p.string.strip())
 for i in soup.find_all('i'):
    if i.string:
        i.string.replace_with(i.string.strip())

 title = soup.select('center + center > b:nth-of-type(1)')[0]\
    .get_text(strip=True)

 # Markdown heading with the date of this entry.
 dateheading = '# ' + soup.select('center:nth-of-type(1) p:nth-of-type(2)')[0]\
    .get_text(strip=True)
 content1 = str(soup.select('center + center')[0])\
    .replace('<center>', '')\
    .replace('</center>', '')\
    .strip()
 content1 = re.sub('\n+', ' ', content1)
 content1 = re.sub(' +', ' ', content1)
 content2 = str(soup.select('center + p')[0])\
    .replace('<p>', '')\
    .replace('</p>', '')\
    .strip()
 content2 = re.sub('\n+', ' ', content2)
 content2 = re.sub(' +', ' ', content2)
 content3 = '&infin; Source: <a href="' + apod_today_url + '">'\
    + apod_clean_url + '</a>'

 caption = dateheading +\
    '\n\n' + content1.decode('utf-8') +\
    '\n\n' + content2.decode('utf-8') +\
    '\n\n' + content3.decode('utf-8')

 if soup.select('center:nth-of-type(1) p:nth-of-type(2) a'):
    # There's an image here.
    image = soup.select('center:nth-of-type(1) p:nth-of-type(2) a')[0]['href']

    client.create_photo('apod', source=image.encode('utf-8'),
                        caption=caption.encode('utf-8'),
                        slug=title.encode('utf-8'),
                        format='markdown')

 else:
    # No image for this one. It's hopefully a YouTube video.
    image = soup.select(
        'center:nth-of-type(1) p:nth-of-type(2) iframe'
        )[0]['src'].replace('/embed/', '/watch?v=').replace('?rel=0', '')

    client.create_video('apod', embed=image.encode('utf-8'),
                        caption=caption.encode('utf-8'),
                        slug=title.encode('utf-8'),
                        format='markdown')

 # print image + '\n'
 # print title + '\n'
 # print apod_today_url + '\n'
 # print dateheading + '\n'
 # print content1 + '\n'
 # print content2
 # print caption
diff --git a/thanks.md b/thanks.md
	#!/usr/bin/env sh

	cd $HOME/Code/apod
	export WORKON_HOME=$HOME/.virtualenvs
	export VIRTUALENVWRAPPER_PYTHON=/usr/local/bin/python
	source /usr/local/bin/virtualenvwrapper.sh
	workon apod
	if [ $# -eq 1 ]
	then
	./scrape.py $1
	else
	./scrape.py
	fi
	deactivate
	PyTumblr==0.0.6
	beautifulsoup4==4.5.0
	html5lib==1.0b10
	httplib2==0.9.2
	httpretty==0.8.14
	oauth2==1.9.0.post1
	pep8==1.7.0
	six==1.10.0
	webencodings==0.5
	wsgiref==0.1.2
	#!/usr/bin/env python
	import re
	import urllib2
	import argparse
	from datetime import date, timedelta
	from bs4 import BeautifulSoup
	import pytumblr

	client = pytumblr.TumblrRestClient(
	'<consumer_key>',
	'<consumer_secret>',
	'<oauth_token>',
	'<oauth_secret>',
	)

	parser = argparse.ArgumentParser()
	parser.add_argument('date', type=int,
	nargs='?',
	help='Number of days in the past. Leave blank for today.')
	args = parser.parse_args()

	if args.date:
	apod_date = date.today() - timedelta(args.date)
	else:
	apod_date = date.today()

	apod_home_url = 'http://apod.nasa.gov/apod/'
	apod_today_url = apod_home_url + 'ap' \
	+ apod_date.strftime('%y%m%d') \
	+ '.html'
	apod_clean_url = re.sub('http://', '', apod_today_url)
	apod = urllib2.urlopen(apod_today_url).read()
	soup = BeautifulSoup(apod, 'html5lib')

	for a in soup.find_all('a'):
	# Append `apod_home_url` to any url that doesn't start with `http`.
	if re.match('^[^http]', a['href']):
	a['href'] = apod_home_url + a['href']
	# Get rid of line breaks that show up inside an `href`.
	a['href'] = re.sub('\n', '', a['href'])

	# Strip all the leading and trailing whitespace inside tags.
	for b in soup.find_all('b'):
	if b.string:
	b.string.replace_with(b.string.strip())
	for p in soup.find_all('p'):
	if p.string:
	p.string.replace_with(p.string.strip())
	for i in soup.find_all('i'):
	if i.string:
	i.string.replace_with(i.string.strip())

	title = soup.select('center + center > b:nth-of-type(1)')[0]\
	.get_text(strip=True)

	# Markdown heading with the date of this entry.
	dateheading = '# ' + soup.select('center:nth-of-type(1) p:nth-of-type(2)')[0]\
	.get_text(strip=True)
	content1 = str(soup.select('center + center')[0])\
	.replace('<center>', '')\
	.replace('</center>', '')\
	.strip()
	content1 = re.sub('\n+', ' ', content1)
	content1 = re.sub(' +', ' ', content1)
	content2 = str(soup.select('center + p')[0])\
	.replace('<p>', '')\
	.replace('</p>', '')\
	.strip()
	content2 = re.sub('\n+', ' ', content2)
	content2 = re.sub(' +', ' ', content2)
	content3 = '∞ Source: <a href="' + apod_today_url + '">'\
	+ apod_clean_url + '</a>'

	caption = dateheading +\
	'\n\n' + content1.decode('utf-8') +\
	'\n\n' + content2.decode('utf-8') +\
	'\n\n' + content3.decode('utf-8')

	if soup.select('center:nth-of-type(1) p:nth-of-type(2) a'):
	# There's an image here.
	image = soup.select('center:nth-of-type(1) p:nth-of-type(2) a')[0]['href']

	client.create_photo('apod', source=image.encode('utf-8'),
	caption=caption.encode('utf-8'),
	slug=title.encode('utf-8'),
	format='markdown')

	else:
	# No image for this one. It's hopefully a YouTube video.
	image = soup.select(
	'center:nth-of-type(1) p:nth-of-type(2) iframe'
	)[0]['src'].replace('/embed/', '/watch?v=').replace('?rel=0', '')

	client.create_video('apod', embed=image.encode('utf-8'),
	caption=caption.encode('utf-8'),
	slug=title.encode('utf-8'),
	format='markdown')

	# print image + '\n'
	# print title + '\n'
	# print apod_today_url + '\n'
	# print dateheading + '\n'
	# print content1 + '\n'
	# print content2
	# print caption