Skip to content

Instantly share code, notes, and snippets.

@klinkin
Last active August 29, 2015 14:19
Show Gist options
  • Save klinkin/2f1115de489aa1beb969 to your computer and use it in GitHub Desktop.
Save klinkin/2f1115de489aa1beb969 to your computer and use it in GitHub Desktop.
lab5.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import re
import arrow
import requests
import itertools
import collections
from pprint import pprint
from utils import get_date, get_age
FIRST_APRIL = arrow.Arrow(2015, 4, 1)
url1 = "http://ec2-52-17-77-210.eu-west-1.compute.amazonaws.com/method/groups.getMembers?group_id=55747450&fields=sex,bdate,interests&count=1000&offset=0"
url2 = "http://ec2-52-17-77-210.eu-west-1.compute.amazonaws.com/method/groups.getMembers?group_id=55747450&fields=sex,bdate,interests&count=1000&offset=1000"
r = requests.get(url1).json()['response']['items'] + requests.get(url2).json()['response']['items']
male = (user for user in r if user['sex'] == 2)
female = (user for user in r if user['sex'] == 1)
undef = (user for user in r if user['sex'] not in [1, 2])
bdate_users = filter(lambda user: get_date(user.get('bdate', ''))[0], r)
undef_bdate_users = filter(lambda user: not get_date(user.get('bdate', ''))[0], r)
le_10 = filter(lambda user: get_age(get_date(user.get('bdate', ''))[1]) <= 10, bdate_users)
le_20 = filter(lambda user: 11 <= get_age(get_date(user.get('bdate', ''))[1]) <= 20, bdate_users)
le_30 = filter(lambda user: 21 <= get_age(get_date(user.get('bdate', ''))[1]) <= 30, bdate_users)
ge_31 = filter(lambda user: 31 <= get_age(get_date(user.get('bdate', ''))[1]), bdate_users)
interests = [user['interests'] for user in r if user.get('interests', False)]
p = re.compile(ur'[а-яa-z\s\'\"]{4,}',re.U)
interes = []
for user in r:
interests_str = user.get('interests', False)
if interests_str:
user_i = p.findall(interests_str.lower())
user_i = map(lambda x: x.strip(), user_i)
user_i = list(set(user_i))
interes = interes + user_i
top_interest = collections.Counter(interes).most_common(1)
stat = {
"gender": {
"male": len(list(male)),
"female": len(list(female)),
"?": len(list(undef))
},
"age": {
"<=10": len(list(le_10)),
"11-20": len(list(le_20)),
"21-30": len(list(le_30)),
">=31": len(list(ge_31)),
"?": len(list(undef_bdate_users))
},
"top_interest": top_interest[0][0]
}
pprint(stat)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment