|  | #!/usr/bin/env python | 
        
          |  | import re | 
        
          |  | import urllib2 | 
        
          |  | import argparse | 
        
          |  | from datetime import date, timedelta | 
        
          |  | from bs4 import BeautifulSoup | 
        
          |  | import pytumblr | 
        
          |  |  | 
        
          |  | client = pytumblr.TumblrRestClient( | 
        
          |  | '<consumer_key>', | 
        
          |  | '<consumer_secret>', | 
        
          |  | '<oauth_token>', | 
        
          |  | '<oauth_secret>', | 
        
          |  | ) | 
        
          |  |  | 
        
          |  | parser = argparse.ArgumentParser() | 
        
          |  | parser.add_argument('date', type=int, | 
        
          |  | nargs='?', | 
        
          |  | help='Number of days in the past. Leave blank for today.') | 
        
          |  | args = parser.parse_args() | 
        
          |  |  | 
        
          |  | if args.date: | 
        
          |  | apod_date = date.today() - timedelta(args.date) | 
        
          |  | else: | 
        
          |  | apod_date = date.today() | 
        
          |  |  | 
        
          |  | apod_home_url = 'http://apod.nasa.gov/apod/' | 
        
          |  | apod_today_url = apod_home_url + 'ap' \ | 
        
          |  | + apod_date.strftime('%y%m%d') \ | 
        
          |  | + '.html' | 
        
          |  | apod_clean_url = re.sub('http://', '', apod_today_url) | 
        
          |  | apod = urllib2.urlopen(apod_today_url).read() | 
        
          |  | soup = BeautifulSoup(apod, 'html5lib') | 
        
          |  |  | 
        
          |  | for a in soup.find_all('a'): | 
        
          |  | # Append `apod_home_url` to any url that doesn't start with `http`. | 
        
          |  | if re.match('^[^http]', a['href']): | 
        
          |  | a['href'] = apod_home_url + a['href'] | 
        
          |  | # Get rid of line breaks that show up inside an `href`. | 
        
          |  | a['href'] = re.sub('\n', '', a['href']) | 
        
          |  |  | 
        
          |  | # Strip all the leading and trailing whitespace inside tags. | 
        
          |  | for b in soup.find_all('b'): | 
        
          |  | if b.string: | 
        
          |  | b.string.replace_with(b.string.strip()) | 
        
          |  | for p in soup.find_all('p'): | 
        
          |  | if p.string: | 
        
          |  | p.string.replace_with(p.string.strip()) | 
        
          |  | for i in soup.find_all('i'): | 
        
          |  | if i.string: | 
        
          |  | i.string.replace_with(i.string.strip()) | 
        
          |  |  | 
        
          |  | title = soup.select('center + center > b:nth-of-type(1)')[0]\ | 
        
          |  | .get_text(strip=True) | 
        
          |  |  | 
        
          |  | # Markdown heading with the date of this entry. | 
        
          |  | dateheading = '# ' + soup.select('center:nth-of-type(1) p:nth-of-type(2)')[0]\ | 
        
          |  | .get_text(strip=True) | 
        
          |  | content1 = str(soup.select('center + center')[0])\ | 
        
          |  | .replace('<center>', '')\ | 
        
          |  | .replace('</center>', '')\ | 
        
          |  | .strip() | 
        
          |  | content1 = re.sub('\n+', ' ', content1) | 
        
          |  | content1 = re.sub(' +', ' ', content1) | 
        
          |  | content2 = str(soup.select('center + p')[0])\ | 
        
          |  | .replace('<p>', '')\ | 
        
          |  | .replace('</p>', '')\ | 
        
          |  | .strip() | 
        
          |  | content2 = re.sub('\n+', ' ', content2) | 
        
          |  | content2 = re.sub(' +', ' ', content2) | 
        
          |  | content3 = '∞ Source: <a href="' + apod_today_url + '">'\ | 
        
          |  | + apod_clean_url + '</a>' | 
        
          |  |  | 
        
          |  | caption = dateheading +\ | 
        
          |  | '\n\n' + content1.decode('utf-8') +\ | 
        
          |  | '\n\n' + content2.decode('utf-8') +\ | 
        
          |  | '\n\n' + content3.decode('utf-8') | 
        
          |  |  | 
        
          |  | if soup.select('center:nth-of-type(1) p:nth-of-type(2) a'): | 
        
          |  | # There's an image here. | 
        
          |  | image = soup.select('center:nth-of-type(1) p:nth-of-type(2) a')[0]['href'] | 
        
          |  |  | 
        
          |  | client.create_photo('apod', source=image.encode('utf-8'), | 
        
          |  | caption=caption.encode('utf-8'), | 
        
          |  | slug=title.encode('utf-8'), | 
        
          |  | format='markdown') | 
        
          |  |  | 
        
          |  | else: | 
        
          |  | # No image for this one. It's hopefully a YouTube video. | 
        
          |  | image = soup.select( | 
        
          |  | 'center:nth-of-type(1) p:nth-of-type(2) iframe' | 
        
          |  | )[0]['src'].replace('/embed/', '/watch?v=').replace('?rel=0', '') | 
        
          |  |  | 
        
          |  | client.create_video('apod', embed=image.encode('utf-8'), | 
        
          |  | caption=caption.encode('utf-8'), | 
        
          |  | slug=title.encode('utf-8'), | 
        
          |  | format='markdown') | 
        
          |  |  | 
        
          |  | # print image + '\n' | 
        
          |  | # print title + '\n' | 
        
          |  | # print apod_today_url + '\n' | 
        
          |  | # print dateheading + '\n' | 
        
          |  | # print content1 + '\n' | 
        
          |  | # print content2 | 
        
          |  | # print caption |