Skip to content

Instantly share code, notes, and snippets.

@filinvadim
Last active December 26, 2016 12:37
Show Gist options
  • Save filinvadim/d896492f6ec4852d84a515e7918e29b7 to your computer and use it in GitHub Desktop.
Save filinvadim/d896492f6ec4852d84a515e7918e29b7 to your computer and use it in GitHub Desktop.
# -*- coding: utf-8 -*-
import urllib3
import bs4
import requests
from lxml import html
import argparse
source_url = 'http://python.org'
def get_python_events_bysoup(url):
"""Grabs from url dates and events by BeautifulSoup method
Args:
param1 (string): URL to be parsed
Returns:
dict: Dictionary with events and dates"""
result_dict = {}
read_url = urllib3.PoolManager()
read_url = read_url.request(method='GET', url=url).data
soup = bs4.BeautifulSoup(read_url, 'html.parser')
for line in soup.find(class_='event-widget').find_all('li'):
result_dict.update({line.time['datetime'].split('T')[0]: line.a.text})
return result_dict
def get_python_events_byxpath(url):
"""Grabs from url dates and events by Xpath method
Args:
param1 (string): URL to be parsed
Returns:
dict: Dictionary with events and dates"""
result_dict = {}
read_url = requests.get(url)
parsed_body = html.document_fromstring(read_url.text)
parsed_time = parsed_body.xpath('//div[@class="medium-widget event-widget last"]/div/ul/li/time/@datetime')
parsed_event = parsed_body.xpath('//div[@class="medium-widget event-widget last"]/div/ul/li/a/text()')
for result in zip(parsed_time, parsed_event):
result_dict.update({result[0].split('T')[0]: result[1]})
return result_dict
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--seek', type=str, help='only_link')
args = parser.parse_args()
soup_args = get_python_events_bysoup(args.seek)
xpath_args = get_python_events_byxpath(args.seek)
print(soup_args, xpath_args)
if __name__ == '__main__':
main()
@filinvadim
Copy link
Author

import unittest
import dom_parser

class TestForSoupGrabber(unittest.TestCase):
def setUp(self):
self.url = 'http://python.org'

def test_for_dicttype(self):
    self.assertIs(type(dom_parser.get_python_events_bysoup(self.url)), dict)

def test_for_key_and_value(self):
    for key, value in dom_parser.get_python_events_bysoup(self.url).items():
        self.assertIsNotNone(key)
        self.assertIsNotNone(value)

if name == 'main':
unittest.main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment