Skip to content

Instantly share code, notes, and snippets.

@d33tah
Created August 29, 2010 23:16
Show Gist options
  • Save d33tah/556800 to your computer and use it in GitHub Desktop.
Save d33tah/556800 to your computer and use it in GitHub Desktop.
#!/usr/bin/python
# -*- coding: utf-8 -*-
"""
Biblioteka dla NK pod Pythona.
BY d33tah, LICENSED UNDER WTFPL.
"""
"""
TODO (toclean):
* JSON exception for NK_profile.get_friends()
TEST: double-get friends
TEST: NK_forum.__eq__
"""
import os
import json
import mechanize
from lxml import html
def debug_get(pynk,url):
#print "DEBUG: proba pobrania %s" % url
data = pynk.br.open(url).read()
open('debug/'+url.replace('/','_'),'w').write(data)
return data
nklink = "http://nk.pl"
def t(obj):
"""
t(obj) - calls obj.text_content() to avoid the long form
"""
return obj.text_content()
class NK_photo(object):
"""
NK_photo - carries information about a specific user's photo
"""
def __init__(self,nk,url,image_url=''):
self.nk = nk
self.url = url
self.image_url = image_url
def get_image_url(self):
if not self.image_url:
tree = html.fromstring(self.nk.br.open(self.url).read())
self.image_url = tree.xpath('//img [@id="photo_img"]')[0].get('src')
return self.image_url
class NK_forum_post(object):
"""
NK_forum_post - carries information about a specific thread post.
"""
def __init__(self,date,contents,author):
self.date = date
self.contents = contents
self.author = author
def __repr__(self):
return "<NK_forum_post: %s, %s>" % (self.date, self.author)
class NK_forum_thread(object):
"""
NK_forum_thread - carries information about a specific froum thread.
"""
def __init__(
self,
pynk,
title,
url,
started_author,
started_time,
posts_count,
lastpost_summary,
lastpost_author,
lastpost_date):
self.pynk = pynk
self.title = title
self.url = url
self.started_author = started_author
self.started_time = started_time
self.posts_count = posts_count
self.lastpost_summary = lastpost_summary
self.lastpost_author = lastpost_author
self.lastpost_date = lastpost_date
def __repr__(self):
if self.title and self.url and self.posts_count:
return "<NK_forum_thread: title=%s, url=%s [%s]>" % (
self.title.encode('utf-8'),
self.url.replace(nklink,''),
self.posts_count)
else:
return "<NK_forum_thread [%d]>" % id(self)
def __eq__(self):
if isinstance(obj1,NK_forum_thread) and isinstance(obj2,NK_forum_thread):
return obj1.url == obj2.url
def get_posts(self):
"""
get_posts(self) - fetches a list of posts in a specified
thread
"""
ret = []
self.pynk.br.open(thread.url)
while 1:
thread_tree = html.fromstring(self.pynk.br.response().read())
posts = thread_tree.xpath('//div [@class="post"]')
for post in posts:
ret.append(NK_forum_post(
date = t(post.find('.//div[@class="datetime"]')),
contents = t(post.find('.//div[@class="post_content"]')),
author = NK_profile(
pynk = self.pynk,
name = t(post.find('.//div[@class="avatar_no_js "]/a/span[@class="avatar_user_name_txt"]')),
location = t(post.find('.//div[@class="avatar_no_js "]/a/span[@class="avatar_user_city"]')),
url = nklink+post.find('.//div[@class="avatar_no_js "]/a').get('href'),
friends_count = t(post.find('.//div[@class="avatar_no_js "]/div[@class="avatar_bar"]')),
)
))
nextpage = thread_tree.find('//a [contains(@title, "pna")]').get('href')
if not nextpage:
break
self.pynk.br.open(nklink+nextpage)
return ret
class NK_forum(object):
"""
NK_forum - carries information about a specific forum
"""
def __init__(self,pynk,url,school_name=''):
self.pynk = pynk
self.url = url
self.school_name = school_name
def __eq__(obj1,obj2):
if isinstance(obj1,NK_forum) and isinstance(obj2,NK_forum):
return obj1.url == obj2.url
def __repr__(self):
if self.school_name:
return "<NK_forum: school_name=%s>" % self.school_name
elif self.url:
return "<NK_forum: url=%s>" % self.url
else:
return "<NK_forum [%d]>" % id(self)
def get_threads(self):
"""
get_forum_threads(self) - fetches a list of threads
"""
ret = []; page = 1
self.pynk.br.open(self.url)
while 1:
forum_tree = html.fromstring(self.pynk.br.response().read())
threads = forum_tree.xpath('//div [@id="threads"]//tr[contains(@class,"thread")]')
for thread in threads:
ret.append(NK_forum_thread(
pynk = self.pynk,
title = t(thread.find('.//div[@class="name"]/a')),
url = nklink+thread.xpath('.//div[@class="name"]/a').get('href')[1:],
started_author = NK_profile(pynk=self.pynk,
name=t(thread.find('.//div[@class="author"]/a')),
url=thread.find('.//div[@class="author"]/a').get('href')
),
started_time = t(thread.find('.//div[@class="datetime"]')),
posts_count = int(t(thread.find('.//td[@class="count"]'))),
lastpost_summary = t(thread.find('.//span[@class="demo"]')),
lastpost_author = NK_profile(pynk=self.pynk,
name=t(thread.find('.//td[@class="last_post"]//a//span[@class="author"]')).strip('\n '),
url=thread.find('.//div[@class="author"]/a').get('href')
),
lastpost_date = t(thread.xpath('.//span[@class="datetime"]')),
))
nextpage = forum_tree.find('.//a [contains(@title, "pna")]').get('href')
if not nextpage:
break
self.pynk.br.open(nklink+nextpage)
return ret
class NK_profile_class(object):
"""
NK_profile_class - carries an information about a single class a given user belongs to
"""
def __init__(self,pynk,url,name,school,year_start,year_finish):
self.pynk = pynk
self.url = url
self.name = name
self.school = school
self.year_start = year_start
self.year_finish = year_finish
def __eq__(obj1,obj2):
if isinstance(obj1,NK_profile_class) and isinstance(obj2,NK_profile_class):
return \
obj1.url == obj2.url and \
obj1.name == obj2.name and \
obj1.school == obj2.school and \
obj1.year_start == obj2.year_start and \
obj1.year_finish == obj2.year_finish
def get_members(self):
ret = []
tree = html.fromstring(self.pynk.br.open(self.url).read())
members = tree.xpath('//div [@id="dziennik"]//div [@class="student student_expanded"]')
for member in members:
ret.append(NK_profile(
pynk = self.pynk,
name = t(member.find('.//a[@class="student_link"]')),
location = t(member.find('.//div[@class="city_content"]')),
friends_count = t(member.find('.//div[@class="button"]/span')),
url = member.find('.//a[@class="student_link"]').get('href'),
))
return ret
class NK_profile_shout(object):
"""
NK_profile_shout - carries an information about user's single shout in a shoutbox
"""
def __init__(self,contents='',datetime=''):
self.contents = contents
self.datetime = datetime
def __repr__(self):
return "<NK_profile_shout: len(contents)=%d, datetime=%s>" % (len(self.contents),self.datetime)
class NK_profile_details(object):
"""
NK_profile_details - contains user's profile details that could be loaded from his profile page
"""
def __init__(self,age,nick,sex,phone,classes,shouts,avatar_thumb_url,avatar_url):
self.age = age
self.nick = nick
self.sex = sex
self.phone = phone
self.classes = classes
self.shouts = shouts
self.avatar_thumb_url = avatar_thumb_url
self.avatar_url = avatar_url
class NK_profile(object):
"""
NK_profile - carries information about people profiles.
"""
def __init__(self,pynk,
name=None,
location=None,
url=None,
friends_count=None,
uid=None):
self.pynk = pynk
self.name = name
self.location = location
self.friends_count = friends_count
self.friends = []
self.details = None
if uid and not url:
self.uid = uid
self.url = nklink+'/profile/%d' % uid
elif url and not uid:
self.url = url
self.uid = url[url.rfind('/')+1:]
else:
self.url = url
self.uid = uid
def __repr__(self):
if self.name and self.location and self.url:
return "<NK_profile: name=%s, location=%s, url=%s>" % (unicode(self.name),unicode(self.location),self.url)
else:
return "<NK_profile [%d]>" % id(self)
def __eq__(obj1,obj2):
if isinstance(obj1,NK_profile) and isinstance(obj2,NK_profile):
return obj1.url == obj2.url \
or obj1.uid == obj2.uid
def get_friends(self):
"""
get_friends() - fetches friends list for a given user.
"""
if not self.friends:
json_url = 'http://nk.pl/friends_list/%s/575/0/0?t=%s' % (self.uid,self.pynk.basic_auth)
try:
json_data = json.loads(self.pynk.br.open(json_url).read()[3:])
except: #BUG - also catches keyboard interrputs etc
raise Exception("NK_profile.get_friends() failed for '%s'" % json_url)
for i, uid in enumerate(json_data["UID"]):
name = "%s %s" % (json_data["FIRST_NAME"][i],json_data["LAST_NAME"][i])
location = json_data["CITY"][i]
url = nklink+"/profile/%s" % uid
friends_count = json_data["FRIENDS_COUNT"][i]
self.friends.append(NK_profile(
pynk=self.pynk,
name=name,
location=location,
url = url,
friends_count = friends_count,)
)
return self.friends
def get_details(self):
"""
get_details() - loads user's profile page and returns the details
"""
if not self.details or not all((self.name,self.location,self.friends_count)):
raw_data = self.pynk.get_url(self.url)
tree = html.fromstring(raw_data)
data = tree.xpath('//table [@class="profile_info_box"]//td [contains(@class, "content")]')
shouts = []
for shout in tree.xpath('//div [@id="comments"]//table [@class="comment_table"]'):
contents = t(shout.find_class('comment_content')[0])
datetime = t(shout.find_class('datetime')[0])
shouts.append(NK_profile_shout(contents=contents,datetime=datetime))
classes = []
for school in tree.xpath('//li [@class="school"]'):
school_name = t(school.xpath('.//div//a')[0])
for class_node in school.xpath('.//li//a [@class="user_class"]'):
class_details = t(class_node).split(' (')
class_url = class_node.get('href')
name = class_details[0]
if len(class_details)==2:
years = class_details[1].split('-')
year_start = years[0]
year_finish = years[1][:-1]
else:
year_start = year_finish = 0
classes.append(NK_profile_class(self.pynk,class_url,name,school_name,year_start,year_finish))
get_detail = lambda param: t(tree.xpath('//table [@class="profile_info_box"]//td [contains(@class, "label")' + \
'and contains(., "%s")]' % param)[0].getnext())
self.name="%s %s" % ( get_detail(u'Imię'), get_detail('Nazwisko') )
self.location=get_detail(u'Miejscowość')
self.friends_count=t(tree.xpath('//div[@class="ikonki"]/a[contains(@title," znajomych")]/span')[0])[2:]
avatar_url = nklink+tree.xpath(u'//div [@class="profil_avatar"]//div [@class="avatar "]//a [contains(@title,"Pokaż profil")]')[0].get('href')
avatar_thumb_url=tree.xpath('//div [@class="profil_avatar"]//div [@class="avatar "]//'+ \
u'img [contains(@alt,"Pokaż profil")]')[0].get('src')
self.details = NK_profile_details(
age=get_detail('Wiek').strip(' lat'),
nick=get_detail('Pseudonim'),
sex=get_detail(u'Płeć'),
phone=get_detail('Telefon'),
classes=classes,
shouts=shouts,
avatar_thumb_url=avatar_thumb_url,
avatar_url=avatar_url,
)
return self.details
class PyNK(object):
"""
PyNK - Nasza-Klasa Python API.
"""
def __init__ (self):
self.br = mechanize.Browser()
user_agent = 'Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1; SV1; .NET CLR 1.1.4322; .NET CLR 2.0.50727)'
self.br.addheaders = [("User-agent",user_agent)]
self.br.set_handle_robots(False)
#self.br.viewing_html=lambda: True
def login(self,username,password):
"""
login(self) - logs in using passed parameters
"""
self.br.open(nklink)
self.br.select_form(nr=0)
self.br.form["login"]=username
self.br.form["password"]=password
self.br.submit()
dashboard_html = self.br.response().read()
if dashboard_html.find('Witaj') == -1:
raise Exception('Blad logowania, zle haslo?')
self.basic_auth = self.br._ua_handlers['_cookies'].cookiejar._cookies[".nk.pl"]["/"]["basic_auth"].value
self.my_profile = self.get_my_profile(dashboard_html)
self.watched_forums = self.get_my_watched_forums(dashboard_html)
def get_my_profile(self,dashboard_html):
"""
get_my_profile(self) - fetches information about logged in user from his dashboard
"""
box_tree = html.fromstring(dashboard_html)
box = box_tree.xpath('.//div[contains(@class,"profile_box")]//div[@class="avatar_no_js "]')[0]
return NK_profile(
pynk = self,
name = t(box.find('./a/span[@class="avatar_user_name_txt"]')),
location = t(box.find('./a/span[@class="avatar_user_city"]')),
url = nklink+box.find('./a').get('href'),
friends_count = t(box.find('./div[@class="avatar_bar"]//span'))
)
def get_my_watched_forums(self, dashboard_html, unread_only=False):
"""
get_my_watched_forums(self, unread_only=False) - extracts information
about logged in user's watched forums.
"""
ret = []
for entry in html.fromstring(dashboard_html).xpath('//ul [@id="forum_max"]//li'):
if not unread_only or not entry.find('span').get('class').find('unread'):
ret.append(NK_forum(
pynk = self,
school_name = t(entry.find('a')),
url = nklink+entry.find('a').get('href'))
)
return ret
def get_url(self,url,cached=True):
"""
get_url(self,url,cached=True) - fetches data either from the cache or from URL
"""
if not cached:
return self.br.open(url).read()
filename = 'cache/'+url.replace('/','_')
if not os.path.exists(filename):
ret = self.br.open(url).read()
open(filename,'w').write(ret)
return ret
else:
return open(filename).read()
if __name__ == '__main__':
from sys import exit
from config import * #my config.py actually contains just "login" and "password" variables defined
nk = PyNK()
nk.login(login,password)
print nk.basic_auth
my_friends = nk.my_profile.get_friends()
"""
#his_friends = NK_profile(pynk=nk,url='http://nk.pl/profile/9766667').get_friends()
for friend in my_friends:
print friend.name
#print my_friends.age
"""
#uncomment the following for the simple mutual-friends demo
"""
for first in my_friends:
print "Sprawdzam "+first.name
first_friends = first.get_friends()
for second in first_friends:
if second in my_friends:
print "=> "+second.name
"""
#checks out all the posts of the first thread in first watched forums
"""
for forum in nk.watched_forums:
print unicode(forum)
threads = forum.get_threads()
for thread in threads:
if thread.posts_count < 1:
continue
print "Wybralem watek: %s" % thread
posts = thread.get_posts()
print "Znalazlem postow: %s" % thread.posts_count
exit(0)
"""
"""
my_classes = nk.my_profile.get_details().classes
for my_friend in my_friends:
print "=>Przetwarzam %s" % my_friend
my_friend_classes = my_friend.get_details().classes
for my_friend_class in my_friend_classes:
if my_friend_class in my_classes:
print "%s byl z %s w %s" % (my_friend.name, nk.my_profile.name, my_friend_class.name)
"""
"""
for my_friend in my_friends:
my_friend_classes = my_friend.get_details().classes
for my_friend_class in my_friend_classes:
if nk.my_profile in my_friend_class.get_members():
print "Znalazlem %s w klasie %s z %s" % (my_friend.name, my_friend_class.name, nk.my_profile.name)
"""
"""
for my_friend in my_friends:
phone_no = my_friend.get_details().phone
if phone_no and phone_no != u'\xa0' and phone_no != 'ukryty':
print "%s => %s" % (my_friend.name, phone_no)
"""
from PIL import Image
import os
for my_friend in my_friends:
print '%s ( %s )' % (my_friend.name, my_friend.url)
url = my_friend.get_details().avatar_thumb_url
filename = 'photos/'+url.replace('/','_')
if not os.path.exists(filename+'.png'):
open(filename,'w').write(nk.br.open(url).read())
Image.open(filename).save(filename+'.png')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment