Skip to content

Instantly share code, notes, and snippets.

@chew-z
Created March 8, 2016 11:40
Show Gist options
  • Save chew-z/bbb06a9c9a5da3060e0f to your computer and use it in GitHub Desktop.
Save chew-z/bbb06a9c9a5da3060e0f to your computer and use it in GitHub Desktop.
Scrap web page to markdown (in Pythonista on iOS) and put reult into new draft with Drafts4
#scraps text from web page, puts in new Draft
import sys
import requests
import html2text
import clipboard
import webbrowser
import urllib
def get_page(url):
try:
headers = {
'User-Agent': 'Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)',
'Referer': 'https://www.google.com/'
}
response = requests.get(url, headers=headers)
# print response.url
# print response.headers
return response.text
except Exception, err:
sys.stderr.write('ERROR: %s\n' % str(err))
def create_draft(text):
URL = 'drafts4://x-callback-url/create?text='
args = text.encode('utf-8')
args = urllib.quote(args, safe='')
URL += args
# print URL
# print 'Can open? ', webbrowser.can_open(URL)
return webbrowser.open(URL)
h = html2text.HTML2Text()
h.ignore_links = True
h.ignore_images = True
if len(sys.argv) > 1:
adress = sys.argv[1]
else:
adress = clipboard.get()
try:
html = get_page(adress)
text = h.handle(html)
# print text
# clipboard.set(body)
create_draft(text)
except Exception, err:
sys.stderr.write('ERROR: %s\n' % str(err))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment