Last active
August 29, 2015 14:05
-
-
Save neoneo40/4e970028937603e1c86f to your computer and use it in GitHub Desktop.
get_title
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 | |
import os | |
import requests | |
from bs4 import BeautifulSoup | |
import re | |
import sys | |
reload(sys) | |
sys.setdefaultencoding('utf-8') | |
def get_title(url): | |
'''url을 복사하고 이 함수를 불러오게 되면 각 웹사이트의 | |
title을 Markdown형식으로 추출한다. | |
난 여기에 단축키까지 할당해서 편하게 title을 긁어올 수 있게 해놨다. | |
input: https://docs.python.org/2/tutorial/errors.html | |
output: [8. Errors and Exceptions — Python v2.7.8 documentation](https://docs.python.org/2/tutorial/errors.html) | |
''' | |
resp = requests.get(url) | |
if resp: | |
soup = BeautifulSoup(resp.text) | |
title = soup.find('title').text | |
# Added \ character at [], *, -, ~, ; in Markdown | |
title = re.sub(r'(\[|\]|\*|\~|\;)', r'\\\1', title) | |
print '[{title}]({url})'.format(title=title, url=url) | |
url = os.getenv('KMVAR_temp') | |
get_title(url) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment