Skip to content

Instantly share code, notes, and snippets.

@necroshine0
Last active August 5, 2022 12:16
Show Gist options
  • Save necroshine0/321ca44273b6cb04d9673ddb6ed35ddf to your computer and use it in GitHub Desktop.
Save necroshine0/321ca44273b6cb04d9673ddb6ed35ddf to your computer and use it in GitHub Desktop.
How to collect students' VK profiles via Python
import vk
import json
import pandas as pd
# Use vk api for data collecting, access token is needed
TOKEN = 'yourtoken'
vk_api = vk.API(access_token=TOKEN, v='5.131')
# find students aged 18-24 with photo from Russia, Moscow
# docs: https://dev.vk.com/method/users.search
def get_people(age, man=2):
return vk_api.users.search(sort=0, hometown='Москва', country=1, city=1, online=0, offset=15, sex=man,
count=1000, has_photo=1, fields='education,sex,bdate,city', age_from=age, age_to=age)['items']
people = []
for age in [_ for _ in range(18, 25)]:
men = get_people(age)
women = get_people(age, man=1)
people += (men + women)
# Validate
edu = []
for p in people:
if 'city' in p and 'university' in p and p['university'] > 0:
edu += [p]
edu[-1]['id'] = 'https://vk.com/id' + str(edu[-1]['id'])
df = pd.DataFrame(edu)
df['name'] = df.first_name + ' ' + df.last_name
df['city'] = 'Москва'
# convert birthday (date) to age (number)
df['bdate'] = pd.to_datetime(df['bdate'], errors="coerce")
df['age'] = (pd.to_datetime('today') - df['bdate']).astype('<m8[Y]').astype(int)
df.replace({'sex': {2: 'М', 1: 'Ж'}}, inplace=True)
df.drop(columns=['university', 'track_code', 'can_access_closed', 'bdate', 'is_closed',
'faculty', 'education_form', 'education_status', 'first_name', 'last_name'], inplace=True)
df.to_csv('edu.csv')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment