Created
August 29, 2010 23:16
-
-
Save d33tah/556800 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
# -*- coding: utf-8 -*- | |
""" | |
Biblioteka dla NK pod Pythona. | |
BY d33tah, LICENSED UNDER WTFPL. | |
""" | |
""" | |
TODO (toclean): | |
* JSON exception for NK_profile.get_friends() | |
TEST: double-get friends | |
TEST: NK_forum.__eq__ | |
""" | |
import os | |
import json | |
import mechanize | |
from lxml import html | |
def debug_get(pynk,url): | |
#print "DEBUG: proba pobrania %s" % url | |
data = pynk.br.open(url).read() | |
open('debug/'+url.replace('/','_'),'w').write(data) | |
return data | |
nklink = "http://nk.pl" | |
def t(obj): | |
""" | |
t(obj) - calls obj.text_content() to avoid the long form | |
""" | |
return obj.text_content() | |
class NK_photo(object): | |
""" | |
NK_photo - carries information about a specific user's photo | |
""" | |
def __init__(self,nk,url,image_url=''): | |
self.nk = nk | |
self.url = url | |
self.image_url = image_url | |
def get_image_url(self): | |
if not self.image_url: | |
tree = html.fromstring(self.nk.br.open(self.url).read()) | |
self.image_url = tree.xpath('//img [@id="photo_img"]')[0].get('src') | |
return self.image_url | |
class NK_forum_post(object): | |
""" | |
NK_forum_post - carries information about a specific thread post. | |
""" | |
def __init__(self,date,contents,author): | |
self.date = date | |
self.contents = contents | |
self.author = author | |
def __repr__(self): | |
return "<NK_forum_post: %s, %s>" % (self.date, self.author) | |
class NK_forum_thread(object): | |
""" | |
NK_forum_thread - carries information about a specific froum thread. | |
""" | |
def __init__( | |
self, | |
pynk, | |
title, | |
url, | |
started_author, | |
started_time, | |
posts_count, | |
lastpost_summary, | |
lastpost_author, | |
lastpost_date): | |
self.pynk = pynk | |
self.title = title | |
self.url = url | |
self.started_author = started_author | |
self.started_time = started_time | |
self.posts_count = posts_count | |
self.lastpost_summary = lastpost_summary | |
self.lastpost_author = lastpost_author | |
self.lastpost_date = lastpost_date | |
def __repr__(self): | |
if self.title and self.url and self.posts_count: | |
return "<NK_forum_thread: title=%s, url=%s [%s]>" % ( | |
self.title.encode('utf-8'), | |
self.url.replace(nklink,''), | |
self.posts_count) | |
else: | |
return "<NK_forum_thread [%d]>" % id(self) | |
def __eq__(self): | |
if isinstance(obj1,NK_forum_thread) and isinstance(obj2,NK_forum_thread): | |
return obj1.url == obj2.url | |
def get_posts(self): | |
""" | |
get_posts(self) - fetches a list of posts in a specified | |
thread | |
""" | |
ret = [] | |
self.pynk.br.open(thread.url) | |
while 1: | |
thread_tree = html.fromstring(self.pynk.br.response().read()) | |
posts = thread_tree.xpath('//div [@class="post"]') | |
for post in posts: | |
ret.append(NK_forum_post( | |
date = t(post.find('.//div[@class="datetime"]')), | |
contents = t(post.find('.//div[@class="post_content"]')), | |
author = NK_profile( | |
pynk = self.pynk, | |
name = t(post.find('.//div[@class="avatar_no_js "]/a/span[@class="avatar_user_name_txt"]')), | |
location = t(post.find('.//div[@class="avatar_no_js "]/a/span[@class="avatar_user_city"]')), | |
url = nklink+post.find('.//div[@class="avatar_no_js "]/a').get('href'), | |
friends_count = t(post.find('.//div[@class="avatar_no_js "]/div[@class="avatar_bar"]')), | |
) | |
)) | |
nextpage = thread_tree.find('//a [contains(@title, "pna")]').get('href') | |
if not nextpage: | |
break | |
self.pynk.br.open(nklink+nextpage) | |
return ret | |
class NK_forum(object): | |
""" | |
NK_forum - carries information about a specific forum | |
""" | |
def __init__(self,pynk,url,school_name=''): | |
self.pynk = pynk | |
self.url = url | |
self.school_name = school_name | |
def __eq__(obj1,obj2): | |
if isinstance(obj1,NK_forum) and isinstance(obj2,NK_forum): | |
return obj1.url == obj2.url | |
def __repr__(self): | |
if self.school_name: | |
return "<NK_forum: school_name=%s>" % self.school_name | |
elif self.url: | |
return "<NK_forum: url=%s>" % self.url | |
else: | |
return "<NK_forum [%d]>" % id(self) | |
def get_threads(self): | |
""" | |
get_forum_threads(self) - fetches a list of threads | |
""" | |
ret = []; page = 1 | |
self.pynk.br.open(self.url) | |
while 1: | |
forum_tree = html.fromstring(self.pynk.br.response().read()) | |
threads = forum_tree.xpath('//div [@id="threads"]//tr[contains(@class,"thread")]') | |
for thread in threads: | |
ret.append(NK_forum_thread( | |
pynk = self.pynk, | |
title = t(thread.find('.//div[@class="name"]/a')), | |
url = nklink+thread.xpath('.//div[@class="name"]/a').get('href')[1:], | |
started_author = NK_profile(pynk=self.pynk, | |
name=t(thread.find('.//div[@class="author"]/a')), | |
url=thread.find('.//div[@class="author"]/a').get('href') | |
), | |
started_time = t(thread.find('.//div[@class="datetime"]')), | |
posts_count = int(t(thread.find('.//td[@class="count"]'))), | |
lastpost_summary = t(thread.find('.//span[@class="demo"]')), | |
lastpost_author = NK_profile(pynk=self.pynk, | |
name=t(thread.find('.//td[@class="last_post"]//a//span[@class="author"]')).strip('\n '), | |
url=thread.find('.//div[@class="author"]/a').get('href') | |
), | |
lastpost_date = t(thread.xpath('.//span[@class="datetime"]')), | |
)) | |
nextpage = forum_tree.find('.//a [contains(@title, "pna")]').get('href') | |
if not nextpage: | |
break | |
self.pynk.br.open(nklink+nextpage) | |
return ret | |
class NK_profile_class(object): | |
""" | |
NK_profile_class - carries an information about a single class a given user belongs to | |
""" | |
def __init__(self,pynk,url,name,school,year_start,year_finish): | |
self.pynk = pynk | |
self.url = url | |
self.name = name | |
self.school = school | |
self.year_start = year_start | |
self.year_finish = year_finish | |
def __eq__(obj1,obj2): | |
if isinstance(obj1,NK_profile_class) and isinstance(obj2,NK_profile_class): | |
return \ | |
obj1.url == obj2.url and \ | |
obj1.name == obj2.name and \ | |
obj1.school == obj2.school and \ | |
obj1.year_start == obj2.year_start and \ | |
obj1.year_finish == obj2.year_finish | |
def get_members(self): | |
ret = [] | |
tree = html.fromstring(self.pynk.br.open(self.url).read()) | |
members = tree.xpath('//div [@id="dziennik"]//div [@class="student student_expanded"]') | |
for member in members: | |
ret.append(NK_profile( | |
pynk = self.pynk, | |
name = t(member.find('.//a[@class="student_link"]')), | |
location = t(member.find('.//div[@class="city_content"]')), | |
friends_count = t(member.find('.//div[@class="button"]/span')), | |
url = member.find('.//a[@class="student_link"]').get('href'), | |
)) | |
return ret | |
class NK_profile_shout(object): | |
""" | |
NK_profile_shout - carries an information about user's single shout in a shoutbox | |
""" | |
def __init__(self,contents='',datetime=''): | |
self.contents = contents | |
self.datetime = datetime | |
def __repr__(self): | |
return "<NK_profile_shout: len(contents)=%d, datetime=%s>" % (len(self.contents),self.datetime) | |
class NK_profile_details(object): | |
""" | |
NK_profile_details - contains user's profile details that could be loaded from his profile page | |
""" | |
def __init__(self,age,nick,sex,phone,classes,shouts,avatar_thumb_url,avatar_url): | |
self.age = age | |
self.nick = nick | |
self.sex = sex | |
self.phone = phone | |
self.classes = classes | |
self.shouts = shouts | |
self.avatar_thumb_url = avatar_thumb_url | |
self.avatar_url = avatar_url | |
class NK_profile(object): | |
""" | |
NK_profile - carries information about people profiles. | |
""" | |
def __init__(self,pynk, | |
name=None, | |
location=None, | |
url=None, | |
friends_count=None, | |
uid=None): | |
self.pynk = pynk | |
self.name = name | |
self.location = location | |
self.friends_count = friends_count | |
self.friends = [] | |
self.details = None | |
if uid and not url: | |
self.uid = uid | |
self.url = nklink+'/profile/%d' % uid | |
elif url and not uid: | |
self.url = url | |
self.uid = url[url.rfind('/')+1:] | |
else: | |
self.url = url | |
self.uid = uid | |
def __repr__(self): | |
if self.name and self.location and self.url: | |
return "<NK_profile: name=%s, location=%s, url=%s>" % (unicode(self.name),unicode(self.location),self.url) | |
else: | |
return "<NK_profile [%d]>" % id(self) | |
def __eq__(obj1,obj2): | |
if isinstance(obj1,NK_profile) and isinstance(obj2,NK_profile): | |
return obj1.url == obj2.url \ | |
or obj1.uid == obj2.uid | |
def get_friends(self): | |
""" | |
get_friends() - fetches friends list for a given user. | |
""" | |
if not self.friends: | |
json_url = 'http://nk.pl/friends_list/%s/575/0/0?t=%s' % (self.uid,self.pynk.basic_auth) | |
try: | |
json_data = json.loads(self.pynk.br.open(json_url).read()[3:]) | |
except: #BUG - also catches keyboard interrputs etc | |
raise Exception("NK_profile.get_friends() failed for '%s'" % json_url) | |
for i, uid in enumerate(json_data["UID"]): | |
name = "%s %s" % (json_data["FIRST_NAME"][i],json_data["LAST_NAME"][i]) | |
location = json_data["CITY"][i] | |
url = nklink+"/profile/%s" % uid | |
friends_count = json_data["FRIENDS_COUNT"][i] | |
self.friends.append(NK_profile( | |
pynk=self.pynk, | |
name=name, | |
location=location, | |
url = url, | |
friends_count = friends_count,) | |
) | |
return self.friends | |
def get_details(self): | |
""" | |
get_details() - loads user's profile page and returns the details | |
""" | |
if not self.details or not all((self.name,self.location,self.friends_count)): | |
raw_data = self.pynk.get_url(self.url) | |
tree = html.fromstring(raw_data) | |
data = tree.xpath('//table [@class="profile_info_box"]//td [contains(@class, "content")]') | |
shouts = [] | |
for shout in tree.xpath('//div [@id="comments"]//table [@class="comment_table"]'): | |
contents = t(shout.find_class('comment_content')[0]) | |
datetime = t(shout.find_class('datetime')[0]) | |
shouts.append(NK_profile_shout(contents=contents,datetime=datetime)) | |
classes = [] | |
for school in tree.xpath('//li [@class="school"]'): | |
school_name = t(school.xpath('.//div//a')[0]) | |
for class_node in school.xpath('.//li//a [@class="user_class"]'): | |
class_details = t(class_node).split(' (') | |
class_url = class_node.get('href') | |
name = class_details[0] | |
if len(class_details)==2: | |
years = class_details[1].split('-') | |
year_start = years[0] | |
year_finish = years[1][:-1] | |
else: | |
year_start = year_finish = 0 | |
classes.append(NK_profile_class(self.pynk,class_url,name,school_name,year_start,year_finish)) | |
get_detail = lambda param: t(tree.xpath('//table [@class="profile_info_box"]//td [contains(@class, "label")' + \ | |
'and contains(., "%s")]' % param)[0].getnext()) | |
self.name="%s %s" % ( get_detail(u'Imię'), get_detail('Nazwisko') ) | |
self.location=get_detail(u'Miejscowość') | |
self.friends_count=t(tree.xpath('//div[@class="ikonki"]/a[contains(@title," znajomych")]/span')[0])[2:] | |
avatar_url = nklink+tree.xpath(u'//div [@class="profil_avatar"]//div [@class="avatar "]//a [contains(@title,"Pokaż profil")]')[0].get('href') | |
avatar_thumb_url=tree.xpath('//div [@class="profil_avatar"]//div [@class="avatar "]//'+ \ | |
u'img [contains(@alt,"Pokaż profil")]')[0].get('src') | |
self.details = NK_profile_details( | |
age=get_detail('Wiek').strip(' lat'), | |
nick=get_detail('Pseudonim'), | |
sex=get_detail(u'Płeć'), | |
phone=get_detail('Telefon'), | |
classes=classes, | |
shouts=shouts, | |
avatar_thumb_url=avatar_thumb_url, | |
avatar_url=avatar_url, | |
) | |
return self.details | |
class PyNK(object): | |
""" | |
PyNK - Nasza-Klasa Python API. | |
""" | |
def __init__ (self): | |
self.br = mechanize.Browser() | |
user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)' | |
self.br.addheaders = [("User-agent",user_agent)] | |
self.br.set_handle_robots(False) | |
#self.br.viewing_html=lambda: True | |
def login(self,username,password): | |
""" | |
login(self) - logs in using passed parameters | |
""" | |
self.br.open(nklink) | |
self.br.select_form(nr=0) | |
self.br.form["login"]=username | |
self.br.form["password"]=password | |
self.br.submit() | |
dashboard_html = self.br.response().read() | |
if dashboard_html.find('Witaj') == -1: | |
raise Exception('Blad logowania, zle haslo?') | |
self.basic_auth = self.br._ua_handlers['_cookies'].cookiejar._cookies[".nk.pl"]["/"]["basic_auth"].value | |
self.my_profile = self.get_my_profile(dashboard_html) | |
self.watched_forums = self.get_my_watched_forums(dashboard_html) | |
def get_my_profile(self,dashboard_html): | |
""" | |
get_my_profile(self) - fetches information about logged in user from his dashboard | |
""" | |
box_tree = html.fromstring(dashboard_html) | |
box = box_tree.xpath('.//div[contains(@class,"profile_box")]//div[@class="avatar_no_js "]')[0] | |
return NK_profile( | |
pynk = self, | |
name = t(box.find('./a/span[@class="avatar_user_name_txt"]')), | |
location = t(box.find('./a/span[@class="avatar_user_city"]')), | |
url = nklink+box.find('./a').get('href'), | |
friends_count = t(box.find('./div[@class="avatar_bar"]//span')) | |
) | |
def get_my_watched_forums(self, dashboard_html, unread_only=False): | |
""" | |
get_my_watched_forums(self, unread_only=False) - extracts information | |
about logged in user's watched forums. | |
""" | |
ret = [] | |
for entry in html.fromstring(dashboard_html).xpath('//ul [@id="forum_max"]//li'): | |
if not unread_only or not entry.find('span').get('class').find('unread'): | |
ret.append(NK_forum( | |
pynk = self, | |
school_name = t(entry.find('a')), | |
url = nklink+entry.find('a').get('href')) | |
) | |
return ret | |
def get_url(self,url,cached=True): | |
""" | |
get_url(self,url,cached=True) - fetches data either from the cache or from URL | |
""" | |
if not cached: | |
return self.br.open(url).read() | |
filename = 'cache/'+url.replace('/','_') | |
if not os.path.exists(filename): | |
ret = self.br.open(url).read() | |
open(filename,'w').write(ret) | |
return ret | |
else: | |
return open(filename).read() | |
if __name__ == '__main__': | |
from sys import exit | |
from config import * #my config.py actually contains just "login" and "password" variables defined | |
nk = PyNK() | |
nk.login(login,password) | |
print nk.basic_auth | |
my_friends = nk.my_profile.get_friends() | |
""" | |
#his_friends = NK_profile(pynk=nk,url='http://nk.pl/profile/9766667').get_friends() | |
for friend in my_friends: | |
print friend.name | |
#print my_friends.age | |
""" | |
#uncomment the following for the simple mutual-friends demo | |
""" | |
for first in my_friends: | |
print "Sprawdzam "+first.name | |
first_friends = first.get_friends() | |
for second in first_friends: | |
if second in my_friends: | |
print "=> "+second.name | |
""" | |
#checks out all the posts of the first thread in first watched forums | |
""" | |
for forum in nk.watched_forums: | |
print unicode(forum) | |
threads = forum.get_threads() | |
for thread in threads: | |
if thread.posts_count < 1: | |
continue | |
print "Wybralem watek: %s" % thread | |
posts = thread.get_posts() | |
print "Znalazlem postow: %s" % thread.posts_count | |
exit(0) | |
""" | |
""" | |
my_classes = nk.my_profile.get_details().classes | |
for my_friend in my_friends: | |
print "=>Przetwarzam %s" % my_friend | |
my_friend_classes = my_friend.get_details().classes | |
for my_friend_class in my_friend_classes: | |
if my_friend_class in my_classes: | |
print "%s byl z %s w %s" % (my_friend.name, nk.my_profile.name, my_friend_class.name) | |
""" | |
""" | |
for my_friend in my_friends: | |
my_friend_classes = my_friend.get_details().classes | |
for my_friend_class in my_friend_classes: | |
if nk.my_profile in my_friend_class.get_members(): | |
print "Znalazlem %s w klasie %s z %s" % (my_friend.name, my_friend_class.name, nk.my_profile.name) | |
""" | |
""" | |
for my_friend in my_friends: | |
phone_no = my_friend.get_details().phone | |
if phone_no and phone_no != u'\xa0' and phone_no != 'ukryty': | |
print "%s => %s" % (my_friend.name, phone_no) | |
""" | |
from PIL import Image | |
import os | |
for my_friend in my_friends: | |
print '%s ( %s )' % (my_friend.name, my_friend.url) | |
url = my_friend.get_details().avatar_thumb_url | |
filename = 'photos/'+url.replace('/','_') | |
if not os.path.exists(filename+'.png'): | |
open(filename,'w').write(nk.br.open(url).read()) | |
Image.open(filename).save(filename+'.png') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment