Last active
August 29, 2015 14:19
-
-
Save klinkin/2f1115de489aa1beb969 to your computer and use it in GitHub Desktop.
lab5.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import re | |
import arrow | |
import requests | |
import itertools | |
import collections | |
from pprint import pprint | |
from utils import get_date, get_age | |
FIRST_APRIL = arrow.Arrow(2015, 4, 1) | |
url1 = "http://ec2-52-17-77-210.eu-west-1.compute.amazonaws.com/method/groups.getMembers?group_id=55747450&fields=sex,bdate,interests&count=1000&offset=0" | |
url2 = "http://ec2-52-17-77-210.eu-west-1.compute.amazonaws.com/method/groups.getMembers?group_id=55747450&fields=sex,bdate,interests&count=1000&offset=1000" | |
r = requests.get(url1).json()['response']['items'] + requests.get(url2).json()['response']['items'] | |
male = (user for user in r if user['sex'] == 2) | |
female = (user for user in r if user['sex'] == 1) | |
undef = (user for user in r if user['sex'] not in [1, 2]) | |
bdate_users = filter(lambda user: get_date(user.get('bdate', ''))[0], r) | |
undef_bdate_users = filter(lambda user: not get_date(user.get('bdate', ''))[0], r) | |
le_10 = filter(lambda user: get_age(get_date(user.get('bdate', ''))[1]) <= 10, bdate_users) | |
le_20 = filter(lambda user: 11 <= get_age(get_date(user.get('bdate', ''))[1]) <= 20, bdate_users) | |
le_30 = filter(lambda user: 21 <= get_age(get_date(user.get('bdate', ''))[1]) <= 30, bdate_users) | |
ge_31 = filter(lambda user: 31 <= get_age(get_date(user.get('bdate', ''))[1]), bdate_users) | |
interests = [user['interests'] for user in r if user.get('interests', False)] | |
p = re.compile(ur'[а-яa-z\s\'\"]{4,}',re.U) | |
interes = [] | |
for user in r: | |
interests_str = user.get('interests', False) | |
if interests_str: | |
user_i = p.findall(interests_str.lower()) | |
user_i = map(lambda x: x.strip(), user_i) | |
user_i = list(set(user_i)) | |
interes = interes + user_i | |
top_interest = collections.Counter(interes).most_common(1) | |
stat = { | |
"gender": { | |
"male": len(list(male)), | |
"female": len(list(female)), | |
"?": len(list(undef)) | |
}, | |
"age": { | |
"<=10": len(list(le_10)), | |
"11-20": len(list(le_20)), | |
"21-30": len(list(le_30)), | |
">=31": len(list(ge_31)), | |
"?": len(list(undef_bdate_users)) | |
}, | |
"top_interest": top_interest[0][0] | |
} | |
pprint(stat) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment