-
-
Save gwthompson/b4fd77a860c8fb0ee3caecdf7302bd6f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"from bs4 import BeautifulSoup\n", | |
"import requests" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"url = requests.get('https://bolajiolajide.github.io')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"source = BeautifulSoup(url.text, 'html.parser')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 4, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"post_feed = source.find('div', class_=\"post-feed\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 5, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"posts = post_feed.find_all('article', class_=\"post-card\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 6, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"single_post = posts[0]" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 7, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"title = single_post.find('h2', class_='post-card-title')" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 8, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"u'Recursion For Beginners'" | |
] | |
}, | |
"execution_count": 8, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"title.text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 9, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"excerpt = single_post.find('section', class_=\"post-card-excerpt\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 10, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to'" | |
] | |
}, | |
"execution_count": 10, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"excerpt.p.text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 11, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"author = single_post.find('span', class_=\"post-card-author\")" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 12, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"u'Bolaji Olajide'" | |
] | |
}, | |
"execution_count": 12, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"author.a.text" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 13, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"def get_post_details(article):\n", | |
" title = article.find('h2', class_='post-card-title').text\n", | |
" excerpt = article.find('section', class_='post-card-excerpt').p.text\n", | |
" author = article.find('span', class_='post-card-author').a.text\n", | |
" return {\n", | |
" \"title\": title,\n", | |
" \"excerpt\": excerpt,\n", | |
" \"author\": author\n", | |
" }" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 14, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"# Making use of list comprehension instead of loops\n", | |
"blog_posts = [get_post_details(post) for post in posts ];" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 15, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to',\n", | |
" 'title': u'Recursion For Beginners'},\n", | |
" {'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'One of the amazing things about the Python language is the use of decorators to alter functionality. Decorators are used to extend functions without actually modifying them directly.',\n", | |
" 'title': u'Creating a Simple Python Decorator'},\n", | |
" {'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'Learning is the process of acquiring new or modifying existing knowledge, behaviors, skills, values, or preferences.',\n", | |
" 'title': u'The Art of Learning\\u200a\\u2014\\u200aMy Learning Process'}]" | |
] | |
}, | |
"execution_count": 15, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"blog_posts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 16, | |
"metadata": {}, | |
"outputs": [], | |
"source": [ | |
"blog_posts = []\n", | |
"for post in posts:\n", | |
" blog_posts.append(get_post_details(post))" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 17, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/plain": [ | |
"[{'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'What is recursion? Recursion is a method where the solution to a problem depends on solutions to smaller instances of the same problem (as opposed to iteration). The approach can be applied to',\n", | |
" 'title': u'Recursion For Beginners'},\n", | |
" {'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'One of the amazing things about the Python language is the use of decorators to alter functionality. Decorators are used to extend functions without actually modifying them directly.',\n", | |
" 'title': u'Creating a Simple Python Decorator'},\n", | |
" {'author': u'Bolaji Olajide',\n", | |
" 'excerpt': u'Learning is the process of acquiring new or modifying existing knowledge, behaviors, skills, values, or preferences.',\n", | |
" 'title': u'The Art of Learning\\u200a\\u2014\\u200aMy Learning Process'}]" | |
] | |
}, | |
"execution_count": 17, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"blog_posts" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": null, | |
"metadata": {}, | |
"outputs": [], | |
"source": [] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 2", | |
"language": "python", | |
"name": "python2" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 2 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython2", | |
"version": "2.7.14" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment