import re import codecs import base64 import os from Tkinter import * from tkMessageBox import showinfo from tkFileDialog import askopenfilename class Person: def __init__(self, file=None, baseDir=None): self.name = str() self.uid = str() self.picture = str() self.friends = [] self.baseDir = baseDir self.friendsText = str() if file: self.text = self.getText(file) self.getProfile() self.getFriends() self.report() def getBase64(self, fn): if fn: fn = os.path.join(self.baseDir, fn) with open(fn, 'rb') as img: picture = base64.encodestring(img.read()) return picture def getFriends(self): text = self.text para = re.compile('(?<=aria-label="Add )(.*?)(?= as a fr)(.*?)(?<=profileid=")(.*?)(?=")') matches = re.finditer(para, text) for i in matches: pictureRe = '(?<=id=' + i.group(3) + ')(?:.*?)(?<=src=...)(.*?)(?=. alt)' uri = re.findall(pictureRe, text) pic = None if uri: pic = uri[0] friend = Person() friend.baseDir = self.baseDir friend.name = i.group(1) friend.uid = i.group(3) friend.picture = friend.getBase64(str(pic)) if friend.uid != self.uid: self.friends.append(friend) def getProfile(self): text = self.text uid = re.findall('(?<=URL./profile.php.id=)(.*?)(?=&)', text) name = re.findall('(?<=.span id="fb-timeline-cover-name".)(.*?)(?=..span)', text) profilePic = re.findall('(?<=.Profile Photo" src="..)(.*?)(?=.jpg")', text) self.picture = self.getBase64(profilePic[0] + '.jpg') self.name = name[0] self.uid = uid[0] def getText(self, file): file = os.path.join(self.baseDir, file) with codecs.open(file, mode='r', encoding='utf-8') as f: return f.read() def report(self): fileName = os.path.join(self.baseDir, (self.uid + '_' + self.name + '_' + 'friends.html')) with open(fileName, 'w') as f: f.write('<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />') f.write('<table>') f.write('<tr>') f.write('<th>parentUID</th>') f.write('<th>friendUID</th>') f.write('<th>nameUID</th>') f.write('<th>picture</th>') f.write('</tr>') f.write('<tr>') f.write('<td>' + self.uid.encode('utf8') + '</th>') f.write('<td>' + self.uid.encode('utf8') + '</th>') f.write('<td>' + self.name.encode('utf8') + '</th>') f.write('''<td><img src="data:image/jpg;base64,''' + (self.picture or '') + '"/></td>') f.write('</tr>') for fr in self.friends: try: f.write('<tr>') f.write('<td>' + self.uid.encode('utf8') + '</th>') f.write('<td>' + fr.uid.encode('utf8') + '</th>') f.write('<td>' + fr.name.encode('utf8') + '</th>') f.write('''<td><img src="data:image/jpg;base64,''' + (fr.picture or '') + '"/></td>') f.write('</tr>') except: pass f.write('</table>') def main(): def callback(): filename = askopenfilename() split = os.path.split(filename) baseDir = split[0] fileName = split[1] mb = showinfo("info", "Press OK To Start Processing Data") person = Person(file=fileName, baseDir=baseDir) mb = showinfo("info", "Done!") errmsg = 'Error!' button = Button(text='File Open', command=callback).pack(fill=X) mainloop() if __name__ == "__main__": main()