Last active
February 16, 2016 01:20
-
-
Save zelark/3433539c2d4e6f08ef87 to your computer and use it in GitHub Desktop.
Web scraping events of a VK group.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bs4 | |
from requests import Session | |
vk_url = 'http://vk.com' | |
def get_events_urls(group_id, oid): | |
group_url = ''.join([vk_url, '/', group_id]) | |
session = Session() | |
session.head(group_url) | |
response = session.post( | |
url='http://vk.com/al_groups.php', | |
data={ | |
'act': 'show_events', | |
'al': '1', | |
'oid': oid | |
}, | |
headers={ | |
'Referer': group_url | |
} | |
) | |
soup = bs4.BeautifulSoup(response.text[59:-46]) | |
urls = [a.attrs.get('href') for a in soup.select('.name a[href]')] | |
return urls | |
def ltrim_event_url(event_url): | |
if event_url.startswith('/event'): | |
return event_url[6:] | |
return event_url[1:] | |
def get_events_ids(events_urls): | |
return [ltrim_event_url(event_url) for event_url in events_urls] | |
events_urls = get_events_urls('chadaovyatka', '-5907489') | |
events_ids = get_events_ids(events_urls) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment