Created
March 21, 2013 20:03
-
-
Save ClarkGoble/5216221 to your computer and use it in GitHub Desktop.
Open Safari front window NYT article with the paywall removed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
## opennyt.py | |
## | |
## Given a NYT article on the front page this reopens it without the paywall | |
import sys, os | |
import time, string, re | |
import urllib, json | |
from subprocess import PIPE, Popen | |
def getsource(): | |
""" | |
Gets the html source of the front document in safariopen | |
""" | |
applescript = """' | |
tell application "Safari" | |
get the source of the front document | |
end tell | |
'""" | |
cmd = "osascript -e " + applescript | |
(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None) | |
print err | |
return out | |
def scansource(): | |
""" | |
Scans source of NYT article for title | |
""" | |
source = getsource().split("\n") | |
for s in source: | |
if "NYT_HEADLINE" in s: | |
print s | |
mobj = re.search(r"([\s\w]+)</NYT_", s) | |
if mobj: | |
print mobj.group(1) | |
return mobj.group(1) | |
else: | |
print "***" | |
def safariopen(url): | |
""" | |
Opens an url in the current tab in Safari | |
""" | |
applescript = """' | |
tell application "Safari" | |
set the URL of the front document to "http://%s" | |
end tell | |
'"""%url | |
cmd = "osascript -e " + applescript | |
print applescript | |
(out, err) = Popen(cmd, stdin=PIPE, stdout=PIPE, stderr=PIPE, close_fds=True, shell=True).communicate(None) | |
print err | |
return out | |
def parsenewsdiffs(url): | |
""" | |
Given a newsdiff's url gets the original nyts url | |
""" | |
if "newsdiffs.org" in url: | |
mobj = re.search(r"(www.nytimes.com.*)", url) | |
if mobj: | |
return mobj.group(1) | |
else: | |
return None | |
def google(query): | |
""" | |
Gets a Google search result for the term, returning the url from newsdiffs | |
""" | |
if (query == None or query == "" or query == " "): | |
return None | |
searchurl = urllib.urlencode({'q':query}) | |
url = 'http://ajax.googleapis.com/ajax/services/search/web?v=1.0&%s' % searchurl | |
response= urllib.urlopen(url).read() | |
data = json.loads(response) | |
results = data['responseData']['results'] | |
found_url = None | |
for r in results: | |
print r['url'] | |
if "http://www.newsdiffs.org" in r['url']: | |
found_url = parsenewsdiffs(r['url']) | |
return found_url | |
def test(): | |
result = google("to save a man's life a muddy tug of war with the earth itself") | |
print result | |
print safariopen( result ) | |
def main(): | |
title = scansource() | |
result = google( title ) | |
safariopen( result ) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment